662 lines
1.1 MiB
662 lines
1.1 MiB
{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.0013527870178222656, "std": 0.2564818859100342, "min": -0.736083984375, "p10": -0.3432229995727539, "median": 0.038166046142578125, "p90": 0.29227676391601565, "max": 0.645111083984375, "pos_frac": 0.578125, "sample": [0.1120758056640625, 0.12518310546875, 0.31621551513671875, 0.13765716552734375, -0.12592506408691406, 0.23141098022460938, -0.21887779235839844, 0.21950721740722656, 0.04480743408203125, 0.020877838134765625, 0.0570220947265625, 0.058269500732421875, -0.4338226318359375, -0.030628204345703125, 0.645111083984375, -0.395477294921875, 0.09050941467285156, 0.0007190704345703125, -0.34615325927734375, 0.016077041625976562, -0.33638572692871094, 0.293853759765625, 0.17610931396484375, 0.22386932373046875, 0.21470260620117188, -0.08536529541015625, 0.0907745361328125, -0.03816986083984375, 0.39190101623535156, 0.16336441040039062, 0.08024787902832031, -0.031158447265625, 0.08477020263671875, 0.002460479736328125, -0.242034912109375, 0.07232666015625, -0.60186767578125, 0.20531463623046875, 0.155731201171875, -0.14299774169921875, -0.25698089599609375, 0.12331962585449219, -0.26497650146484375, 0.15140533447265625, -0.0920257568359375, -0.18599319458007812, 0.19028091430664062, 0.2496490478515625, 0.42162322998046875, 0.17873382568359375, -0.1525421142578125, -0.4972076416015625, 0.32010650634765625, -0.10365867614746094, -0.233795166015625, -0.19828224182128906, -0.4018898010253906, -0.13407135009765625, -0.09596633911132812, 0.031524658203125, 0.28859710693359375, -0.192962646484375, -0.736083984375, 0.3026123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000001.npy"}
|
|
{"epoch": 0.0015117157974300832, "step": 2, "batch_size": 64, "mean": 0.03744968771934509, "std": 0.2875921130180359, "min": -0.7604827880859375, "p10": -0.2812448501586914, "median": 0.03963661193847656, "p90": 0.3654294967651367, "max": 0.8134727478027344, "pos_frac": 0.5625, "sample": [0.30594635009765625, -0.24289894104003906, -0.11509323120117188, -0.13417816162109375, 0.06942558288574219, 0.36568641662597656, -0.14640045166015625, 0.1497650146484375, 0.30261993408203125, 0.10124588012695312, 0.13028717041015625, -0.0031890869140625, 0.0361480712890625, 0.5662612915039062, 0.09694290161132812, -0.01091766357421875, 0.1128997802734375, 0.0411834716796875, -0.21860504150390625, -0.1236419677734375, -0.08812713623046875, 0.10360527038574219, 0.1790008544921875, -0.5114288330078125, 0.3056755065917969, -0.14553451538085938, 0.28168487548828125, 0.26990509033203125, 0.1686878204345703, 0.038089752197265625, 0.19541168212890625, -0.10783576965332031, -0.2644004821777344, -0.19707489013671875, -0.140472412109375, 0.1349811553955078, 0.19672012329101562, -0.0714111328125, 0.53369140625, 0.1271820068359375, 0.8134727478027344, 0.2990264892578125, -0.7604827880859375, -0.08274078369140625, 0.05890846252441406, 0.029361724853515625, 0.4510040283203125, -0.1599273681640625, -0.29346656799316406, 0.10005569458007812, -0.27509117126464844, -0.1937713623046875, 0.19167327880859375, 0.28173065185546875, -0.09406471252441406, -0.3380699157714844, -0.29186248779296875, 0.36483001708984375, 0.009979248046875, 0.44391632080078125, -0.126708984375, -0.6550216674804688, 0.6160736083984375, -0.28388214111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000002.npy"}
|
|
{"epoch": 0.0030234315948601664, "step": 3, "batch_size": 64, "mean": 0.025411784648895264, "std": 0.2763363718986511, "min": -0.61077880859375, "p10": -0.3318611145019531, "median": 0.021366119384765625, "p90": 0.4037971496582032, "max": 0.9320526123046875, "pos_frac": 0.5625, "sample": [-0.35106658935546875, -0.09208297729492188, -0.184661865234375, -0.2628631591796875, 0.3055267333984375, 0.4192047119140625, -0.11469459533691406, 0.05049896240234375, 0.791534423828125, -0.33426666259765625, -0.038097381591796875, 0.02752685546875, 0.105804443359375, 0.1876678466796875, 0.096466064453125, 0.17395782470703125, -0.076995849609375, 0.0059661865234375, 0.0014629364013671875, 0.0438995361328125, -0.3704681396484375, -0.10743904113769531, -0.074676513671875, -0.15820693969726562, -0.4754486083984375, 0.45896339416503906, 0.9320526123046875, 0.19293212890625, -0.06399726867675781, 0.19295501708984375, 0.08741569519042969, -0.24651718139648438, 0.0074005126953125, 0.018341064453125, 0.05953788757324219, -0.10698890686035156, -0.3746185302734375, -0.61077880859375, 0.12128639221191406, 0.217559814453125, 0.4097137451171875, -0.00971221923828125, -0.0189666748046875, 0.42071533203125, 0.467620849609375, 0.0637664794921875, 0.10398101806640625, 0.2468128204345703, 0.11502838134765625, -0.3262481689453125, -0.14240264892578125, -0.08635711669921875, 0.10441970825195312, -0.10102081298828125, 0.06356048583984375, 0.38999176025390625, 0.16813278198242188, 0.19588470458984375, 0.058391571044921875, -0.47832489013671875, -0.1471405029296875, -0.2253570556640625, 0.02439117431640625, -0.1246185302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000003.npy"}
|
|
{"epoch": 0.0045351473922902496, "step": 4, "batch_size": 64, "mean": 0.022151529788970947, "std": 0.2944813668727875, "min": -0.7115478515625, "p10": -0.3688451766967773, "median": -0.0099945068359375, "p90": 0.38884086608886725, "max": 0.777557373046875, "pos_frac": 0.484375, "sample": [-0.149810791015625, 0.32270050048828125, -0.1146240234375, 0.00415802001953125, 0.0742950439453125, -0.5396270751953125, 0.15482330322265625, 0.6247749328613281, 0.2374725341796875, 0.12656402587890625, -0.1976165771484375, 0.15022659301757812, -0.113861083984375, -0.320709228515625, -0.060039520263671875, -0.062835693359375, 0.11020660400390625, -0.3219642639160156, 0.398773193359375, -0.7115478515625, -0.45195770263671875, 0.025623321533203125, 0.2487640380859375, -0.226287841796875, -0.0609893798828125, 0.12198638916015625, 0.506744384765625, 0.3286285400390625, 0.17702102661132812, 0.2358245849609375, 0.589202880859375, 0.08875274658203125, 0.050853729248046875, 0.05109405517578125, -0.4077911376953125, 0.30615234375, 0.777557373046875, -0.08166885375976562, -0.0711669921875, -0.10479736328125, 0.05462646484375, -0.11073684692382812, 0.3656654357910156, -0.2290058135986328, -0.01165771484375, 0.6235198974609375, -0.5302143096923828, 0.1341533660888672, -0.23168182373046875, 0.15243911743164062, -0.09546661376953125, -0.013660430908203125, -0.008331298828125, -0.14898681640625, -0.0390472412109375, 0.41290283203125, -0.2186431884765625, 0.17987442016601562, -0.032772064208984375, -0.38893699645996094, -0.39435577392578125, -0.01995849609375, 0.3079833984375, -0.0549163818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000004.npy"}
|
|
{"epoch": 0.006046863189720333, "step": 5, "batch_size": 64, "mean": 0.053210049867630005, "std": 0.3282647430896759, "min": -0.7918472290039062, "p10": -0.27035980224609374, "median": 0.010957717895507812, "p90": 0.5004379272460938, "max": 1.044464111328125, "pos_frac": 0.53125, "sample": [-0.2978248596191406, -0.14028167724609375, -0.09636116027832031, 1.044464111328125, 0.0491180419921875, -0.243133544921875, 0.01123809814453125, 0.04460906982421875, -0.079254150390625, 0.18628692626953125, 0.558807373046875, -0.3459625244140625, -0.1459808349609375, 0.08854103088378906, -0.7918472290039062, -0.274566650390625, 0.41809844970703125, 0.023107528686523438, -0.225830078125, -0.08869171142578125, -0.070648193359375, -0.18346214294433594, 0.161834716796875, 0.15970230102539062, -0.634613037109375, 0.4951934814453125, -0.2605438232421875, -0.31748390197753906, 0.50775146484375, 0.4427604675292969, 0.2227783203125, 0.0667572021484375, 0.00170135498046875, 0.2610321044921875, 0.2281646728515625, -0.5180015563964844, -0.1352691650390625, -0.17895126342773438, 0.18361854553222656, -0.023359298706054688, -0.12620162963867188, -0.2102508544921875, 0.21576690673828125, 0.23849105834960938, 0.6761932373046875, -0.11387443542480469, -0.2464752197265625, -0.09336090087890625, 0.4810333251953125, -0.0049285888671875, 0.2000446319580078, 0.6461257934570312, 0.1839141845703125, 0.75048828125, 0.502685546875, 0.0499725341796875, -0.2154083251953125, -0.13645172119140625, 0.0801849365234375, -0.015827178955078125, -0.11160659790039062, 0.22626495361328125, 0.010677337646484375, 0.3144874572753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000005.npy"}
|
|
{"epoch": 0.007558578987150416, "step": 6, "batch_size": 64, "mean": -0.07339566946029663, "std": 0.31926488876342773, "min": -1.273040771484375, "p10": -0.3979475021362304, "median": -0.031589508056640625, "p90": 0.2577075958251954, "max": 0.596466064453125, "pos_frac": 0.46875, "sample": [-0.12425994873046875, 0.0064544677734375, 0.008235931396484375, 0.19072723388671875, -0.03208160400390625, -0.3275012969970703, -0.07293128967285156, -0.1134490966796875, -0.147125244140625, -0.20249366760253906, 0.04265785217285156, -0.23040771484375, -0.4997406005859375, 0.1461639404296875, -0.2742919921875, -0.674896240234375, -0.11769866943359375, 0.596466064453125, -0.08997726440429688, -0.4061317443847656, -0.05490875244140625, 0.07715606689453125, 0.08512687683105469, 0.2692432403564453, 0.03243255615234375, -0.06914520263671875, -0.126708984375, 0.1949462890625, -0.10863685607910156, 0.21364593505859375, 0.2307910919189453, -0.1516132354736328, -0.0218658447265625, 0.00128173828125, -0.5836868286132812, 0.32936859130859375, 0.033077239990234375, 0.090179443359375, -0.2356109619140625, -0.031097412109375, 0.15778350830078125, 0.021131515502929688, 0.2271575927734375, 0.09472274780273438, -0.5210037231445312, -0.16115570068359375, 0.513214111328125, 0.16845321655273438, -0.37885093688964844, 0.3722496032714844, 0.15981292724609375, 0.33339691162109375, -0.3576164245605469, -0.26189613342285156, -0.2011566162109375, 0.04522895812988281, -0.23972320556640625, -0.31256103515625, -1.273040771484375, -1.091827392578125, -0.1954193115234375, 0.016736984252929688, 0.29944419860839844, 0.03590202331542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000006.npy"}
|
|
{"epoch": 0.009070294784580499, "step": 7, "batch_size": 64, "mean": -0.03246006369590759, "std": 0.23877570033073425, "min": -0.6004714965820312, "p10": -0.31418704986572266, "median": -0.06448745727539062, "p90": 0.28140964508056643, "max": 0.52911376953125, "pos_frac": 0.40625, "sample": [0.3328704833984375, -0.0295867919921875, -0.06412506103515625, -0.15154075622558594, 0.16388511657714844, 0.16291046142578125, 0.21402740478515625, 0.0649871826171875, -0.36406707763671875, 0.3295173645019531, 0.013301849365234375, -0.000232696533203125, 0.16832542419433594, -0.10628509521484375, -0.036838531494140625, 0.0016307830810546875, -0.08717727661132812, -0.25827789306640625, -0.4247169494628906, 0.15744781494140625, 0.08633804321289062, 0.08826446533203125, -0.07147216796875, 0.1381683349609375, 0.52911376953125, -0.009588241577148438, 0.2703399658203125, -0.12133026123046875, -0.13653564453125, -0.15302658081054688, -0.34464263916015625, -0.3163299560546875, -0.21381378173828125, 0.3738136291503906, -0.06717300415039062, 0.182403564453125, -0.064849853515625, 0.3753204345703125, -0.10051345825195312, 0.13020896911621094, 0.0791778564453125, -0.3091869354248047, -0.12766265869140625, 0.048980712890625, -0.6004714965820312, -0.210296630859375, -0.4515037536621094, 0.21162796020507812, -0.30791473388671875, -0.09710693359375, -0.1839923858642578, -0.273834228515625, 0.28615379333496094, 0.11798095703125, -0.07634735107421875, -0.18951416015625, -0.19797515869140625, -0.304901123046875, -0.2850494384765625, -0.4443817138671875, 0.5271072387695312, 0.1630706787109375, -0.020313262939453125, -0.0918426513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000007.npy"}
|
|
{"epoch": 0.010582010582010581, "step": 8, "batch_size": 64, "mean": 0.015507668256759644, "std": 0.3783254623413086, "min": -0.8278350830078125, "p10": -0.444491958618164, "median": -0.029024124145507812, "p90": 0.44416313171386723, "max": 1.025177001953125, "pos_frac": 0.46875, "sample": [0.14367103576660156, 0.7318115234375, 0.8189697265625, 0.05077362060546875, -0.7358627319335938, -0.15027618408203125, 0.121917724609375, 0.42032623291015625, -0.2272663116455078, -0.01422119140625, -0.0644378662109375, -0.5428924560546875, -0.1933765411376953, -0.371856689453125, 0.41963958740234375, 0.9876480102539062, 0.0957183837890625, 0.28194618225097656, -0.22954940795898438, 1.025177001953125, -0.06975555419921875, -0.4685096740722656, -0.8278350830078125, -0.534423828125, 0.236175537109375, -0.3742332458496094, 0.1355113983154297, -0.0301971435546875, 0.4490852355957031, -0.2009735107421875, 0.1647796630859375, -0.096282958984375, -0.027851104736328125, -0.318115234375, -0.039089202880859375, -0.158203125, 0.43267822265625, 0.5279827117919922, 0.7463226318359375, 0.2478179931640625, 0.27042388916015625, -0.04913139343261719, -0.07595062255859375, -0.16873931884765625, -0.16571807861328125, -0.49224090576171875, 0.169464111328125, -0.2245330810546875, -0.1731433868408203, -0.059539794921875, 0.10400199890136719, 0.07835197448730469, -0.38845062255859375, 0.070770263671875, 0.155029296875, 0.20950698852539062, -0.1092987060546875, -0.092559814453125, 0.10495758056640625, 0.11360549926757812, 0.009675979614257812, 0.25439453125, -0.7394866943359375, -0.17164230346679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000008.npy"}
|
|
{"epoch": 0.012093726379440665, "step": 9, "batch_size": 64, "mean": 0.051593929529190063, "std": 0.40762022137641907, "min": -0.759307861328125, "p10": -0.5045188903808594, "median": 0.052626609802246094, "p90": 0.6570007324218753, "max": 1.27337646484375, "pos_frac": 0.5625, "sample": [-0.01657867431640625, 1.0384368896484375, 0.048404693603515625, 0.6838531494140625, 0.05684852600097656, -0.32408905029296875, 0.33158111572265625, -0.3758087158203125, 0.0655364990234375, -0.759307861328125, -0.5166244506835938, -0.5543136596679688, 0.5943450927734375, 0.13422775268554688, 0.025177001953125, -0.3409233093261719, -0.1299591064453125, 0.1370391845703125, 0.23226547241210938, 0.09137916564941406, 0.333038330078125, -0.03821754455566406, 0.80389404296875, 0.26920318603515625, -0.06942558288574219, 0.815185546875, 0.3064556121826172, 0.27178192138671875, 0.016042709350585938, -0.5513267517089844, -0.585205078125, -0.3851146697998047, 0.15033721923828125, 0.8441619873046875, 0.12391853332519531, -0.09768486022949219, 0.17602920532226562, -0.5705070495605469, -0.2908935546875, 0.17919921875, -0.02678680419921875, -0.4762725830078125, -0.6339263916015625, 0.0269622802734375, -0.09636306762695312, 0.10277938842773438, 0.10602188110351562, 0.29575538635253906, -0.175628662109375, -0.3013153076171875, 0.1551971435546875, -0.0825347900390625, 0.08504486083984375, 0.6851654052734375, 0.22970008850097656, 1.27337646484375, -0.046916961669921875, 0.12679290771484375, -0.25533294677734375, 0.06006622314453125, -0.25048828125, 0.499237060546875, -0.062000274658203125, -0.0588836669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000009.npy"}
|
|
{"epoch": 0.013605442176870748, "step": 10, "batch_size": 64, "mean": 0.00640559196472168, "std": 0.34175214171409607, "min": -0.97393798828125, "p10": -0.3608011245727539, "median": -0.048709869384765625, "p90": 0.3875701904296876, "max": 0.998077392578125, "pos_frac": 0.4375, "sample": [-0.97393798828125, -0.3667926788330078, -0.041973114013671875, 0.10315322875976562, 0.2366180419921875, -0.5195159912109375, -0.18425559997558594, 0.16436767578125, -0.0642242431640625, 0.48065185546875, -0.25647735595703125, 0.10258865356445312, 0.368194580078125, 0.15673828125, 0.196533203125, -0.11688995361328125, 0.17234230041503906, -0.086181640625, 0.2838897705078125, -0.2322540283203125, -0.020534515380859375, 0.3425712585449219, 0.5469207763671875, 0.5736541748046875, -0.2123851776123047, -0.05985260009765625, -0.32538604736328125, -0.4482421875, -0.203277587890625, -0.22108840942382812, -0.055446624755859375, 0.137725830078125, -0.07455062866210938, 0.08290863037109375, -0.10876655578613281, 0.02468109130859375, 0.1974945068359375, 0.2987537384033203, 0.312744140625, 0.12558746337890625, -0.000553131103515625, -0.20076751708984375, -0.177001953125, -0.379180908203125, -0.45629119873046875, -0.11111831665039062, -0.3468208312988281, -0.32021331787109375, 0.998077392578125, -0.2250518798828125, -0.307098388671875, -0.02024078369140625, 0.3958740234375, 0.24865341186523438, -0.10156631469726562, -0.12722396850585938, 0.9323577880859375, 0.15376853942871094, -0.155792236328125, 0.1803741455078125, 0.29323768615722656, -0.5279693603515625, 0.6144294738769531, -0.2860107421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000010.npy"}
|
|
{"epoch": 0.015117157974300832, "step": 11, "batch_size": 64, "mean": 0.08196571469306946, "std": 0.30230045318603516, "min": -0.6071243286132812, "p10": -0.2715120315551758, "median": 0.08228015899658203, "p90": 0.4141960144042969, "max": 1.0505752563476562, "pos_frac": 0.625, "sample": [0.15064239501953125, 0.04086112976074219, -0.4675788879394531, -0.051959991455078125, -0.2550163269042969, 0.25342559814453125, -0.06854057312011719, 0.021585464477539062, 0.592376708984375, -0.4064483642578125, 0.2705535888671875, 0.10959625244140625, 0.0277557373046875, 0.05558967590332031, -0.011487960815429688, -0.10432815551757812, 0.6037368774414062, 0.21240234375, -0.195709228515625, 0.24082183837890625, 0.10049819946289062, 0.24169921875, 0.7172317504882812, -0.18181991577148438, -0.2785816192626953, -0.39618682861328125, 0.11682510375976562, 0.10979652404785156, 0.1778717041015625, 0.3426551818847656, 0.10076332092285156, 0.009611129760742188, -0.09310531616210938, 0.8119049072265625, 0.019786834716796875, 0.27680206298828125, 0.2848854064941406, 1.0505752563476562, 0.264801025390625, -0.01740264892578125, -0.03314208984375, -0.0371246337890625, -0.530517578125, 0.25148773193359375, -0.01871490478515625, -0.6071243286132812, -0.2902412414550781, 0.06406211853027344, 0.15604400634765625, 0.5114364624023438, 0.026729583740234375, 0.11676025390625, -0.16455841064453125, 0.13050460815429688, 0.41535186767578125, -0.04944610595703125, -0.22350692749023438, 0.1502552032470703, 0.1581573486328125, -0.070068359375, 0.17322540283203125, 0.4114990234375, -0.2255706787109375, 0.25341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000011.npy"}
|
|
{"epoch": 0.016628873771730914, "step": 12, "batch_size": 64, "mean": -0.005672812461853027, "std": 0.3061535954475403, "min": -0.5679473876953125, "p10": -0.43599777221679686, "median": 0.0228118896484375, "p90": 0.39168663024902345, "max": 0.7239799499511719, "pos_frac": 0.53125, "sample": [0.09092521667480469, -0.06689453125, 0.09367561340332031, 0.03886985778808594, -0.2717170715332031, 0.38995361328125, 0.23996734619140625, 0.125274658203125, 0.38629150390625, 0.0294952392578125, -0.33538055419921875, -0.42220306396484375, -0.3085899353027344, -0.515838623046875, 0.36738014221191406, -0.2826080322265625, 0.469482421875, -0.44495391845703125, -0.0959014892578125, 0.07813835144042969, -0.055084228515625, -0.11602783203125, -0.4419097900390625, 0.234222412109375, 0.09863471984863281, -0.36045074462890625, -0.2576770782470703, -0.0543212890625, -0.3939208984375, 0.23628997802734375, -0.1428070068359375, -0.1812744140625, -0.48123931884765625, -0.5679473876953125, -0.3724021911621094, 0.05624961853027344, -0.5602645874023438, 0.18479156494140625, 0.14847946166992188, -0.07251548767089844, -0.180572509765625, -0.24852561950683594, 0.0161285400390625, 0.18585586547851562, -0.03339385986328125, 0.7239799499511719, -0.0651702880859375, 0.43538665771484375, 0.10092926025390625, 0.35482025146484375, 0.28922271728515625, 0.002483367919921875, 0.3924293518066406, -0.02783203125, 0.4704437255859375, 0.4001312255859375, -0.5031185150146484, 0.7012863159179688, -0.1940135955810547, 0.11807441711425781, 0.0337982177734375, 0.085906982421875, 0.07968711853027344, 0.032810211181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000012.npy"}
|
|
{"epoch": 0.018140589569160998, "step": 13, "batch_size": 64, "mean": -0.04802015423774719, "std": 0.33597585558891296, "min": -1.1180267333984375, "p10": -0.40967102050781246, "median": -0.02174091339111328, "p90": 0.3084941864013673, "max": 0.608795166015625, "pos_frac": 0.46875, "sample": [-0.9552001953125, -0.07225799560546875, -1.1180267333984375, 0.4176483154296875, 0.222900390625, -0.5385780334472656, 0.23931503295898438, 0.2710132598876953, 0.015472412109375, -0.22051239013671875, -0.14062118530273438, 0.030284881591796875, 0.15309906005859375, -0.17584228515625, 0.2833976745605469, 0.1996936798095703, 0.608795166015625, -0.056407928466796875, 0.1638031005859375, 0.31775665283203125, -0.0724945068359375, -0.43817138671875, -0.1805419921875, -0.2770233154296875, -0.1895599365234375, -0.045246124267578125, -0.343170166015625, -0.03689765930175781, -0.22296905517578125, -0.33447265625, 0.029428482055664062, -0.03726959228515625, 0.3803558349609375, -0.7672271728515625, -0.05401611328125, 0.3156318664550781, 0.010784149169921875, -0.11644363403320312, 0.1670246124267578, 0.0264892578125, 0.16022872924804688, 0.38214874267578125, -0.6359024047851562, -0.12276649475097656, -0.26152801513671875, -0.16670608520507812, -0.0491943359375, -0.14144134521484375, -0.3089561462402344, 0.09046173095703125, -0.00658416748046875, -0.00473785400390625, -0.243255615234375, -0.07065200805664062, 0.2708587646484375, -0.9801712036132812, 0.291839599609375, 0.23007965087890625, 0.0992279052734375, 0.36232757568359375, 0.01800537109375, 0.2401123046875, 0.08618927001953125, 0.22718238830566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000013.npy"}
|
|
{"epoch": 0.019652305366591082, "step": 14, "batch_size": 64, "mean": -0.042172253131866455, "std": 0.32476672530174255, "min": -0.83819580078125, "p10": -0.3944648742675781, "median": -0.04343223571777344, "p90": 0.21335010528564477, "max": 1.0364990234375, "pos_frac": 0.390625, "sample": [-0.34432220458984375, -0.2263031005859375, -0.0448150634765625, 0.103271484375, 0.13356971740722656, -0.17438888549804688, -0.04499053955078125, 0.6796875, -0.1484966278076172, 0.10314369201660156, -0.5598373413085938, 0.021678924560546875, 0.13227462768554688, 0.016143798828125, -0.0621795654296875, -0.09034156799316406, -0.10608482360839844, -0.0257720947265625, 0.0867919921875, -0.0115966796875, -0.2914085388183594, -0.6641693115234375, -0.10833168029785156, -0.12340545654296875, 0.10977935791015625, -0.13399505615234375, -0.83819580078125, 0.154327392578125, 0.14461708068847656, 0.26139068603515625, -0.02068328857421875, 1.0171051025390625, 0.0489959716796875, -0.0322265625, 0.07226181030273438, -0.5520172119140625, 0.5213470458984375, -0.11486053466796875, 0.2386455535888672, -0.2749481201171875, -0.790985107421875, 0.33837127685546875, -0.12376022338867188, 1.0364990234375, 0.07749176025390625, 0.12778091430664062, -0.1057891845703125, -0.19211578369140625, -0.17673683166503906, 0.10726356506347656, -0.41595458984375, -0.49507904052734375, 0.11505889892578125, -0.24322509765625, 0.03315544128417969, -0.016826629638671875, -0.13713836669921875, -0.010759353637695312, 0.024921417236328125, -0.27456092834472656, -0.07820320129394531, -0.2632293701171875, -0.044208526611328125, -0.04265594482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000014.npy"}
|
|
{"epoch": 0.021164021164021163, "step": 15, "batch_size": 64, "mean": 0.004087239503860474, "std": 0.30993908643722534, "min": -0.75933837890625, "p10": -0.3201860427856445, "median": 0.007308006286621094, "p90": 0.3801340103149415, "max": 0.788604736328125, "pos_frac": 0.515625, "sample": [0.008527755737304688, -0.171875, 0.14332962036132812, -0.18325424194335938, -0.2666339874267578, -0.0281219482421875, -0.361175537109375, -0.2644195556640625, 0.788604736328125, -0.75933837890625, 0.07782363891601562, -0.08234786987304688, -0.555908203125, -0.22231101989746094, 0.075164794921875, 0.07944297790527344, 0.6642837524414062, 0.21518325805664062, 0.35333251953125, -0.32828330993652344, -0.2438507080078125, -0.30129241943359375, -0.246124267578125, -0.2614784240722656, -0.1611804962158203, 0.1824798583984375, 0.32879638671875, -0.12530517578125, 0.1567840576171875, -0.12877655029296875, 0.5474395751953125, -0.26653289794921875, -0.2358551025390625, -0.3992156982421875, -0.12418365478515625, 0.39974212646484375, 0.27477264404296875, -0.01043701171875, -0.08365631103515625, 0.08457183837890625, -0.543243408203125, 0.6303787231445312, -0.27191162109375, 0.361053466796875, 0.31970977783203125, -0.5392303466796875, 0.2181549072265625, -0.13249969482421875, 0.38831138610839844, 0.0928802490234375, 0.200653076171875, 0.12860488891601562, -0.266693115234375, 0.1385040283203125, -0.2349567413330078, 0.21302413940429688, -0.100067138671875, 0.0070953369140625, 0.03341484069824219, 0.0075206756591796875, 0.475006103515625, 0.18372344970703125, 0.31586456298828125, 0.06756401062011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000015.npy"}
|
|
{"epoch": 0.022675736961451247, "step": 16, "batch_size": 64, "mean": 0.002114519476890564, "std": 0.2737308740615845, "min": -0.674591064453125, "p10": -0.29615325927734376, "median": -0.018108367919921875, "p90": 0.38298377990722665, "max": 0.735015869140625, "pos_frac": 0.46875, "sample": [-0.2582855224609375, 0.2683219909667969, -0.22829437255859375, 0.20528793334960938, 0.20222091674804688, -0.16890716552734375, 0.18952178955078125, 0.12693023681640625, 0.21794891357421875, -0.18761825561523438, -0.42087554931640625, 0.509033203125, -0.039703369140625, -0.4610786437988281, 0.3948020935058594, -0.3333148956298828, 0.04532814025878906, -0.055698394775390625, -0.046783447265625, -0.371978759765625, 0.18947601318359375, -0.19501495361328125, -0.59466552734375, -0.2597808837890625, 0.04815864562988281, -0.29106903076171875, 0.0861053466796875, -0.1010894775390625, -0.0894775390625, 0.735015869140625, 0.6246185302734375, 0.207000732421875, -0.01772308349609375, -0.01849365234375, -0.0001220703125, -0.14350128173828125, 0.4093780517578125, -0.1108245849609375, 0.027927398681640625, -0.20429420471191406, -0.09493064880371094, 0.17371749877929688, -0.2510833740234375, 0.4525794982910156, 0.44020843505859375, 0.025300979614257812, 0.09149932861328125, -0.29833221435546875, -0.05074882507324219, 0.11496543884277344, 0.07247352600097656, -0.10584831237792969, 0.13039112091064453, -0.2781829833984375, -0.09065628051757812, 0.011732101440429688, -0.18175506591796875, -0.674591064453125, 0.35540771484375, 0.021581649780273438, 0.14960670471191406, -0.046535491943359375, 0.31087493896484375, -0.030826568603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000016.npy"}
|
|
{"epoch": 0.02418745275888133, "step": 17, "batch_size": 64, "mean": -0.026118546724319458, "std": 0.39888036251068115, "min": -1.4758148193359375, "p10": -0.39237174987792967, "median": -0.02620220184326172, "p90": 0.3973491668701172, "max": 1.14581298828125, "pos_frac": 0.46875, "sample": [-0.09135055541992188, 0.11273193359375, -0.1399993896484375, 0.07073783874511719, -0.1628875732421875, 0.08105659484863281, -1.4758148193359375, -0.12195014953613281, 0.08821487426757812, -0.32053375244140625, 0.208343505859375, -0.11413192749023438, 0.06890869140625, 0.3619194030761719, -0.08150863647460938, -0.2239227294921875, 0.002288818359375, 0.005802154541015625, -0.012273788452148438, -0.5337600708007812, 0.3075103759765625, 0.059780120849609375, -0.995574951171875, 0.5083389282226562, -0.040130615234375, -0.04641532897949219, 0.18698883056640625, -0.15921974182128906, -0.1266021728515625, -0.3723258972167969, -0.1471710205078125, -0.9542922973632812, 0.6614151000976562, -0.000980377197265625, 0.2924346923828125, 0.6224517822265625, -0.11557388305664062, 0.17113494873046875, -0.1108856201171875, -0.874542236328125, 0.39996337890625, -0.4876899719238281, 0.16172027587890625, 0.1811676025390625, 0.09949493408203125, -0.40096282958984375, 0.4216766357421875, 0.3943023681640625, 0.30744171142578125, 1.14581298828125, -0.1633758544921875, -0.3499755859375, -0.30694580078125, -0.10130500793457031, -0.068634033203125, -0.1744537353515625, 0.09900474548339844, 0.2607536315917969, -0.2080841064453125, 0.13010406494140625, 0.3986549377441406, 0.2852516174316406, -0.08849906921386719, -0.195220947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000017.npy"}
|
|
{"epoch": 0.025699168556311415, "step": 18, "batch_size": 64, "mean": 0.06641131639480591, "std": 0.25935110449790955, "min": -1.008697509765625, "p10": -0.25290908813476565, "median": 0.10799598693847656, "p90": 0.3805524826049807, "max": 0.5103111267089844, "pos_frac": 0.625, "sample": [0.1634521484375, -0.10378646850585938, 0.14214706420898438, -0.046848297119140625, 0.16353988647460938, 0.26406097412109375, -0.028848648071289062, 0.29323577880859375, 0.5011520385742188, 0.4817771911621094, 0.15391159057617188, -0.2250518798828125, 0.09577560424804688, 0.11285781860351562, 0.2383575439453125, 0.142913818359375, 0.40303611755371094, -0.15763092041015625, 0.09012222290039062, 0.1031341552734375, -0.1456451416015625, 0.19287109375, 0.05641937255859375, 0.23883438110351562, -0.2802276611328125, 0.20265769958496094, 0.12596893310546875, 0.4205493927001953, 0.049060821533203125, 0.1562671661376953, 0.2250213623046875, 0.14902496337890625, 0.0436859130859375, -0.321014404296875, -0.2923393249511719, -0.08443450927734375, -0.09214973449707031, 0.3173408508300781, 0.2610321044921875, -0.100677490234375, -0.03429412841796875, 0.43956756591796875, -0.25746917724609375, -0.25457763671875, -0.24901580810546875, 0.3280906677246094, -0.1971111297607422, 0.00524139404296875, -0.038890838623046875, -1.008697509765625, 0.14190292358398438, 0.19042396545410156, -0.4325523376464844, -0.07081413269042969, 0.2737274169921875, -0.07342147827148438, -0.13380813598632812, 0.014326095581054688, 0.2683563232421875, -0.058696746826171875, 0.40610504150390625, 0.26268768310546875, 0.30937957763671875, 0.5103111267089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000018.npy"}
|
|
{"epoch": 0.027210884353741496, "step": 19, "batch_size": 64, "mean": 0.08624613285064697, "std": 0.35539817810058594, "min": -1.096954345703125, "p10": -0.2709625244140625, "median": 0.07560443878173828, "p90": 0.41291370391845705, "max": 1.3602294921875, "pos_frac": 0.65625, "sample": [0.3596649169921875, 0.07801055908203125, -0.1278820037841797, 0.166656494140625, 0.160552978515625, 0.34881591796875, 0.16669464111328125, -0.033267974853515625, -0.2993316650390625, -0.10919570922851562, 0.018524169921875, 0.0727691650390625, 0.22723388671875, 0.12026596069335938, 0.35338592529296875, -0.08238410949707031, 0.07486152648925781, 0.0857696533203125, -0.1058807373046875, 0.41766929626464844, 0.17307281494140625, -0.27974700927734375, 0.050994873046875, -0.6024169921875, 1.3602294921875, 0.46227264404296875, 0.11255645751953125, 0.05028533935546875, -0.5432586669921875, 0.07218742370605469, 0.787994384765625, 0.34424591064453125, 0.07634735107421875, 0.10332489013671875, -1.096954345703125, 0.356475830078125, 0.1809234619140625, 0.2053661346435547, 0.3091773986816406, 0.028369903564453125, -0.011064529418945312, -0.07854461669921875, 0.00079345703125, -0.23796463012695312, -0.25046539306640625, 0.06538581848144531, 0.106201171875, 0.459747314453125, -0.06494140625, 0.8415756225585938, 0.7648773193359375, -0.008771896362304688, -0.1072235107421875, -0.0070629119873046875, -0.6726760864257812, 0.40181732177734375, 0.1592254638671875, -0.10697555541992188, -0.14473533630371094, -0.3169403076171875, 0.1405200958251953, 0.046016693115234375, 0.17591094970703125, 0.3206672668457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000019.npy"}
|
|
{"epoch": 0.02872260015117158, "step": 20, "batch_size": 64, "mean": -0.020452216267585754, "std": 0.288522332906723, "min": -0.7599945068359375, "p10": -0.47275543212890625, "median": 0.005512237548828125, "p90": 0.2666378021240235, "max": 0.7068901062011719, "pos_frac": 0.546875, "sample": [0.40937042236328125, -0.20379257202148438, 0.001251220703125, 0.10237884521484375, -0.18454551696777344, -0.64678955078125, -0.06871414184570312, 0.22662353515625, 0.5727157592773438, -0.5438308715820312, 0.06576919555664062, 0.14870452880859375, -0.2645111083984375, 0.07941246032714844, -0.5381622314453125, -0.5526199340820312, -0.7599945068359375, -0.08015823364257812, -0.01690673828125, -0.47991943359375, 0.1915607452392578, 0.0022563934326171875, 0.008768081665039062, 0.05049896240234375, 0.013042449951171875, 0.01592254638671875, 0.1371440887451172, -0.1578655242919922, 0.475128173828125, -0.1881246566772461, -0.31292724609375, -0.08443450927734375, 0.1333160400390625, 0.2531394958496094, -0.3946685791015625, 0.15217971801757812, 0.4728355407714844, -0.1751556396484375, 0.108978271484375, 0.05643463134765625, -0.11094093322753906, 0.044353485107421875, 0.13895034790039062, 0.1390838623046875, -0.008026123046875, 0.2122650146484375, 0.175506591796875, -0.4560394287109375, 0.25260162353515625, -0.509918212890625, -0.13246917724609375, 0.00058746337890625, 0.1983489990234375, 0.27242279052734375, -0.1320953369140625, 0.029359817504882812, 0.7068901062011719, 0.1578369140625, 0.2765350341796875, -0.125732421875, -0.0642547607421875, -0.23328399658203125, -0.15804481506347656, -0.0071887969970703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000020.npy"}
|
|
{"epoch": 0.030234315948601664, "step": 21, "batch_size": 64, "mean": -0.040478646755218506, "std": 0.278280645608902, "min": -0.658111572265625, "p10": -0.43608760833740223, "median": -0.04176902770996094, "p90": 0.34604473114013673, "max": 0.66943359375, "pos_frac": 0.40625, "sample": [0.3189563751220703, -0.2287445068359375, -0.025806427001953125, -0.556427001953125, 0.373199462890625, -0.246856689453125, 0.354461669921875, -0.2003154754638672, -0.2057514190673828, 0.058269500732421875, 0.26634979248046875, 0.23656463623046875, 0.3875579833984375, -0.00299072265625, -0.050075531005859375, -0.1428375244140625, -0.02581787109375, 0.3759613037109375, -0.5673904418945312, 0.196746826171875, 0.1202545166015625, 0.15430450439453125, -0.14093017578125, 0.20754241943359375, -0.1612701416015625, -0.474456787109375, -0.13714218139648438, 0.36026763916015625, 0.00521087646484375, -0.3465595245361328, -0.13604736328125, -0.09206199645996094, -0.65020751953125, -0.12152481079101562, -0.4879608154296875, 0.34642982482910156, 0.66943359375, 0.12773895263671875, -0.5060882568359375, -0.16697311401367188, -0.08147430419921875, -0.0199737548828125, -0.658111572265625, 0.1833038330078125, -0.34024810791015625, 0.03433418273925781, -0.149200439453125, 0.34514617919921875, -0.3162384033203125, 0.04295921325683594, -0.09947967529296875, -0.044281005859375, 0.23055267333984375, -0.18249130249023438, -0.21923255920410156, 0.046539306640625, -0.03411102294921875, 0.0357208251953125, 0.12521934509277344, -0.2530517578125, -0.039257049560546875, -0.2414093017578125, 0.242401123046875, -0.08326339721679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000021.npy"}
|
|
{"epoch": 0.031746031746031744, "step": 22, "batch_size": 64, "mean": -0.02203691005706787, "std": 0.29658693075180054, "min": -0.8524932861328125, "p10": -0.3980262756347656, "median": -0.009622573852539062, "p90": 0.3129314422607422, "max": 0.8594207763671875, "pos_frac": 0.453125, "sample": [0.0703582763671875, -0.5122222900390625, -0.09492874145507812, -0.011371612548828125, -0.016954421997070312, -0.06840896606445312, 0.2830638885498047, -0.10308837890625, 0.22504615783691406, 0.14758872985839844, 0.3041114807128906, -0.00787353515625, -0.09973526000976562, 0.01667022705078125, 0.2548789978027344, 0.13799285888671875, -0.3515472412109375, -0.2791175842285156, 0.0179595947265625, -0.39281463623046875, 0.172149658203125, -0.0225982666015625, 0.32097625732421875, -0.06281089782714844, 0.08153533935546875, -0.3944549560546875, -0.10514259338378906, 0.06132316589355469, -0.0011272430419921875, -0.012760162353515625, -0.4669036865234375, -0.427978515625, 0.15791702270507812, -0.13308334350585938, 0.0281982421875, -0.20128631591796875, -0.055309295654296875, 0.02759552001953125, -0.709075927734375, -0.004058837890625, 0.3250255584716797, -0.8524932861328125, 0.0290985107421875, 0.208282470703125, 0.049564361572265625, -0.5549774169921875, 0.43964385986328125, 0.8594207763671875, 0.27414703369140625, -0.3976898193359375, -0.16987991333007812, 0.31671142578125, 0.07201385498046875, -0.39817047119140625, 0.6406326293945312, -0.01694488525390625, -0.04680442810058594, -0.11227226257324219, 0.361968994140625, 0.08075714111328125, -0.07269287109375, -0.135009765625, 0.2151775360107422, -0.298583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000022.npy"}
|
|
{"epoch": 0.03325774754346183, "step": 23, "batch_size": 64, "mean": 0.0519389808177948, "std": 0.35646840929985046, "min": -0.69366455078125, "p10": -0.40419311523437496, "median": 0.021755218505859375, "p90": 0.5761817932128909, "max": 0.834808349609375, "pos_frac": 0.515625, "sample": [-0.6364288330078125, -0.371124267578125, 0.7550582885742188, -0.2265625, -0.07769393920898438, 0.28522491455078125, -0.233306884765625, -0.69366455078125, -0.31900787353515625, 0.5182342529296875, -0.0121612548828125, -0.1457843780517578, 0.5242233276367188, 0.4629688262939453, 0.31439781188964844, -0.60540771484375, 0.0402069091796875, 0.3107643127441406, -0.16963958740234375, 0.23133087158203125, -0.431427001953125, 0.165313720703125, -0.0281524658203125, -0.18365478515625, 0.18638229370117188, 0.834808349609375, 0.0494537353515625, 0.036529541015625, 0.59844970703125, 0.073760986328125, -0.2521820068359375, 0.06006622314453125, 0.7399520874023438, 0.1084442138671875, 0.7676773071289062, -0.07630157470703125, -0.422943115234375, 0.715362548828125, -0.018375396728515625, -0.5342998504638672, 0.02504730224609375, -0.418365478515625, 0.06476211547851562, 0.1583099365234375, 0.13185501098632812, -0.118011474609375, -0.1737060546875, -0.009639739990234375, -0.15012359619140625, -0.038482666015625, 0.018463134765625, -0.01320648193359375, -0.07387924194335938, 0.08911895751953125, -0.08147048950195312, -0.14204978942871094, 0.40607452392578125, -0.22551727294921875, 0.34711456298828125, 0.164794921875, 0.7978973388671875, 0.17611312866210938, 0.22751235961914062, -0.17900848388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000023.npy"}
|
|
{"epoch": 0.03476946334089191, "step": 24, "batch_size": 64, "mean": -0.011387407779693604, "std": 0.27748751640319824, "min": -0.6597366333007812, "p10": -0.37102584838867186, "median": 0.03801727294921875, "p90": 0.3230674743652346, "max": 0.6963348388671875, "pos_frac": 0.546875, "sample": [-0.2460784912109375, 0.27114105224609375, -0.317413330078125, 0.231689453125, -0.131500244140625, -0.21110153198242188, 0.49238014221191406, -0.14789581298828125, 0.0323638916015625, -0.379302978515625, -0.3141441345214844, 0.14215469360351562, 0.13338279724121094, -0.3496551513671875, 0.6963348388671875, 0.1597747802734375, 0.11663055419921875, -0.15885543823242188, 0.0764312744140625, 0.08039665222167969, -0.37577056884765625, 0.3621940612792969, -0.10155487060546875, -0.2629661560058594, 0.092926025390625, -0.09516143798828125, -0.01456451416015625, -0.4789619445800781, -0.5747528076171875, 0.3840484619140625, 0.12951087951660156, -0.2894153594970703, -0.6597366333007812, -0.12611770629882812, 0.0570831298828125, 0.3453216552734375, -0.260467529296875, 0.10651969909667969, -0.06802749633789062, 0.15456390380859375, 0.16268157958984375, 0.211395263671875, 0.11011695861816406, 0.09918212890625, -0.2981414794921875, 0.1050262451171875, 0.21619415283203125, 0.2208709716796875, 0.07846832275390625, -0.2375640869140625, -0.3822345733642578, -0.08637619018554688, 0.13460540771484375, 0.043670654296875, -0.380218505859375, -0.074432373046875, 0.0072803497314453125, -0.359954833984375, -0.20307159423828125, 0.11980438232421875, 0.45325469970703125, 0.6252899169921875, 0.201934814453125, 0.0020198822021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000024.npy"}
|
|
{"epoch": 0.036281179138321996, "step": 25, "batch_size": 64, "mean": -0.022065043449401855, "std": 0.283197820186615, "min": -1.0222320556640625, "p10": -0.3009450912475586, "median": 0.012280464172363281, "p90": 0.298956298828125, "max": 0.602447509765625, "pos_frac": 0.515625, "sample": [0.08855438232421875, -0.3971405029296875, -0.17287254333496094, 0.04107666015625, -0.09656715393066406, 0.1256847381591797, 0.5583038330078125, -0.3056354522705078, -0.1641693115234375, 0.2202606201171875, -0.3926849365234375, 0.025531768798828125, -0.15015411376953125, -0.23162841796875, 0.2963714599609375, 0.07579803466796875, -0.8121871948242188, 0.045200347900390625, -0.16855621337890625, 0.441864013671875, -0.12720108032226562, 0.11602210998535156, -0.4369964599609375, 0.602447509765625, 0.054767608642578125, -0.00710296630859375, 0.046138763427734375, 0.542999267578125, 0.027156829833984375, -0.059619903564453125, 0.11033058166503906, 0.09925270080566406, -0.268310546875, 0.2536773681640625, 0.3274383544921875, -0.29000091552734375, 0.04888916015625, -0.1515483856201172, -0.13556861877441406, -0.1241607666015625, -1.0222320556640625, -0.21068572998046875, -0.0327606201171875, 0.011270523071289062, 0.2470245361328125, -0.0881805419921875, 0.08771514892578125, 0.017818450927734375, 0.06793975830078125, -0.19695281982421875, -0.13246536254882812, -0.0807647705078125, 0.13007354736328125, -0.10796737670898438, 0.02965545654296875, -0.09932327270507812, 0.0132904052734375, -0.008632659912109375, 0.5592498779296875, -0.49977874755859375, -0.14600753784179688, 0.3000640869140625, 0.01690673828125, 0.0769195556640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000025.npy"}
|
|
{"epoch": 0.03779289493575208, "step": 26, "batch_size": 64, "mean": 0.05385178327560425, "std": 0.30056291818618774, "min": -0.6165313720703125, "p10": -0.30476417541503903, "median": 0.05456066131591797, "p90": 0.3983448028564453, "max": 1.15484619140625, "pos_frac": 0.578125, "sample": [0.18330001831054688, -0.6165313720703125, -0.46245574951171875, 0.23914337158203125, 0.19707107543945312, 0.34796714782714844, -0.1978302001953125, 0.105010986328125, -0.11205673217773438, 0.3600120544433594, 0.09536361694335938, 1.15484619140625, 0.2775421142578125, -0.3243370056152344, -0.206146240234375, 0.32660675048828125, -0.09102630615234375, 0.2916431427001953, 0.20642852783203125, 0.24341964721679688, 0.0484466552734375, -0.04653739929199219, -0.11504364013671875, -0.04293632507324219, -0.1657257080078125, -0.25909423828125, 0.43189239501953125, 0.040042877197265625, 0.09024810791015625, -0.10382461547851562, 0.08296394348144531, 0.4377918243408203, 0.18115997314453125, 0.5009422302246094, -0.1869964599609375, -0.4951171875, -0.25783538818359375, 0.036418914794921875, 0.13307952880859375, 0.048187255859375, 0.06067466735839844, -0.16400909423828125, -0.0050754547119140625, 0.1732025146484375, 0.3986320495605469, -0.1402740478515625, -0.343902587890625, 0.14494895935058594, 0.397674560546875, -0.13916969299316406, 0.2100982666015625, -0.40604400634765625, 0.17578887939453125, 0.5713043212890625, -0.3785266876220703, 0.176788330078125, -0.21397781372070312, 0.3073692321777344, 0.5910720825195312, 0.08533287048339844, -0.20576858520507812, 0.014886856079101562, -0.2301311492919922, -0.01041412353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000026.npy"}
|
|
{"epoch": 0.039304610733182165, "step": 27, "batch_size": 64, "mean": 0.04458439350128174, "std": 0.30530720949172974, "min": -0.790771484375, "p10": -0.3312957763671875, "median": 0.07229423522949219, "p90": 0.3699697494506836, "max": 0.843597412109375, "pos_frac": 0.578125, "sample": [-0.1318206787109375, 0.21976852416992188, 0.1872406005859375, 0.10059928894042969, -0.012086868286132812, 0.33889007568359375, 0.01753997802734375, -0.02933502197265625, 0.13994598388671875, -0.36187744140625, -0.47231292724609375, 0.03604888916015625, 0.177215576171875, 0.06803512573242188, -0.16266441345214844, -0.30237579345703125, 0.3732490539550781, -0.08876800537109375, -0.4798126220703125, -0.48162841796875, 0.12052154541015625, 0.318328857421875, 0.48675537109375, 0.3234405517578125, 0.843597412109375, 0.6700897216796875, 0.153533935546875, -0.08868789672851562, 0.35324859619140625, -0.2947654724121094, 0.19246673583984375, 0.09899139404296875, -0.05529975891113281, 0.06089019775390625, 0.44115447998046875, -0.35041046142578125, 0.3623180389404297, 0.8219680786132812, -0.04018402099609375, -0.13610076904296875, 0.22840118408203125, -0.20960235595703125, -0.06365585327148438, 0.100616455078125, -0.2953147888183594, -0.174774169921875, -0.3120269775390625, 0.4352455139160156, -0.2012176513671875, 0.2106494903564453, 0.08617401123046875, -0.3395538330078125, 0.17787551879882812, 0.0765533447265625, 0.10596466064453125, -0.14854049682617188, 0.0115966796875, -0.22230148315429688, 0.2354278564453125, -0.09233474731445312, 0.17650604248046875, -0.790771484375, 0.2836341857910156, 0.15714263916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000027.npy"}
|
|
{"epoch": 0.04081632653061224, "step": 28, "batch_size": 64, "mean": -0.018820255994796753, "std": 0.31523367762565613, "min": -0.68634033203125, "p10": -0.4337158203125, "median": -0.005318641662597656, "p90": 0.2882316589355469, "max": 0.9301300048828125, "pos_frac": 0.484375, "sample": [-0.3825836181640625, -0.68634033203125, -0.2213134765625, 0.28394317626953125, -0.3684234619140625, -0.004161834716796875, -0.445709228515625, -0.4738311767578125, 0.11770248413085938, 0.2834930419921875, -0.5352287292480469, 0.5434188842773438, -0.21935272216796875, 0.290069580078125, -0.2908172607421875, -0.31156158447265625, -0.14606094360351562, -0.47887611389160156, 0.17251205444335938, 0.00382232666015625, 0.005889892578125, 0.07220458984375, 0.013513565063476562, 0.06253814697265625, 0.1712665557861328, -0.0064754486083984375, -0.23684120178222656, 0.2576103210449219, -0.59930419921875, 0.11071014404296875, -0.1597442626953125, -0.2632713317871094, 0.2715644836425781, 0.7104339599609375, 0.49179840087890625, 0.359710693359375, 0.00897216796875, -0.2791595458984375, -0.11184310913085938, -0.1717681884765625, -0.08832550048828125, -0.19649505615234375, 0.0723876953125, 0.22129440307617188, 0.14898681640625, 0.05542755126953125, 0.015325546264648438, -0.4614410400390625, 0.17090988159179688, -0.009021759033203125, -0.1536712646484375, 0.9301300048828125, 0.26230430603027344, -0.405731201171875, 0.5968017578125, -0.103973388671875, -0.01908111572265625, -0.16539764404296875, -0.19432830810546875, -0.05836677551269531, 0.27013397216796875, 0.16167259216308594, 0.15277862548828125, -0.24532318115234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000028.npy"}
|
|
{"epoch": 0.042328042328042326, "step": 29, "batch_size": 64, "mean": 0.022249162197113037, "std": 0.3749988079071045, "min": -0.9905853271484375, "p10": -0.42625732421875, "median": 0.0022525787353515625, "p90": 0.45348567962646485, "max": 1.2249755859375, "pos_frac": 0.5, "sample": [0.44876861572265625, -0.4010162353515625, 0.4555072784423828, -0.8813629150390625, 0.5532989501953125, -0.19316864013671875, -0.4483795166015625, -0.17203140258789062, -0.5528106689453125, 0.7527923583984375, 0.4123802185058594, -0.015834808349609375, 0.19387054443359375, -0.16223526000976562, 0.4852485656738281, 0.33602142333984375, -0.9905853271484375, -0.24398422241210938, -0.4112396240234375, 0.5705490112304688, -0.00762176513671875, -0.11419677734375, -0.0047149658203125, 0.16124725341796875, 0.33416748046875, -0.097442626953125, -0.1957855224609375, -0.2746925354003906, -0.221343994140625, 0.24242782592773438, 0.4085044860839844, -0.23040771484375, -0.196380615234375, 0.25305747985839844, -0.586212158203125, 0.0076694488525390625, 0.18603897094726562, 0.1022186279296875, -0.13434219360351562, 0.313629150390625, -0.07731437683105469, -0.08915328979492188, -0.005889892578125, -0.20429611206054688, 0.15076255798339844, -0.39414215087890625, 0.22407913208007812, -0.1470184326171875, 0.2397918701171875, 0.15716171264648438, 0.0640869140625, -0.0031642913818359375, 0.3589935302734375, 0.027318954467773438, -0.4326934814453125, -0.10143280029296875, -0.45920562744140625, 0.16384124755859375, 0.05153656005859375, 1.2249755859375, 0.6358184814453125, 0.0749053955078125, 0.11796379089355469, 0.16541290283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000029.npy"}
|
|
{"epoch": 0.04383975812547241, "step": 30, "batch_size": 64, "mean": -0.010949641466140747, "std": 0.320455402135849, "min": -0.995391845703125, "p10": -0.3738441467285156, "median": -0.02999114990234375, "p90": 0.33927001953125013, "max": 1.161376953125, "pos_frac": 0.453125, "sample": [-0.351226806640625, 0.024522781372070312, 0.01613616943359375, -0.47534942626953125, -0.08517837524414062, 0.28816986083984375, 0.1173858642578125, 0.1123199462890625, 0.02159881591796875, -0.0645751953125, -0.059040069580078125, -0.15595245361328125, -0.30025482177734375, 0.5760726928710938, -0.4734344482421875, 0.371795654296875, -0.3313484191894531, 0.53741455078125, -0.5678253173828125, 0.47043609619140625, 0.0428619384765625, -0.061756134033203125, -0.03710174560546875, 0.2918586730957031, -0.0987091064453125, 0.1724700927734375, 0.19381332397460938, -0.44054412841796875, 0.1635894775390625, 0.16104888916015625, -0.047607421875, 0.033565521240234375, 0.01660919189453125, 0.49560546875, -0.538818359375, 0.30170440673828125, -0.38353729248046875, -0.0218963623046875, -0.1536865234375, -0.2936592102050781, -0.0519561767578125, 0.27661895751953125, 1.161376953125, -0.1769256591796875, -0.22275924682617188, 0.35536956787109375, -0.1311492919921875, -0.039524078369140625, 0.07300376892089844, 0.071533203125, -0.10158920288085938, -0.021759033203125, 0.20355987548828125, -0.26648712158203125, -0.19216156005859375, -0.25772666931152344, -0.02288055419921875, 0.26050567626953125, -0.995391845703125, -0.07101631164550781, 0.21334075927734375, -0.12403106689453125, -0.1719207763671875, 0.06371498107910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000030.npy"}
|
|
{"epoch": 0.045351473922902494, "step": 31, "batch_size": 64, "mean": 0.11337828636169434, "std": 0.29911476373672485, "min": -0.6525421142578125, "p10": -0.23199787139892578, "median": 0.11482524871826172, "p90": 0.4305105209350586, "max": 0.94635009765625, "pos_frac": 0.65625, "sample": [-0.34757232666015625, 0.1215972900390625, -0.6525421142578125, 0.2699317932128906, -0.1897258758544922, 0.10858154296875, 0.03397369384765625, 0.16318130493164062, 0.1136474609375, 0.00936126708984375, 0.1891040802001953, 0.24526214599609375, 0.42771339416503906, 0.20233154296875, 0.111968994140625, 0.415191650390625, 0.2636375427246094, 0.2987823486328125, 0.27022552490234375, -0.19803619384765625, 0.6954498291015625, 0.11600303649902344, -0.026092529296875, 0.1016387939453125, 0.27182960510253906, 0.2504425048828125, 0.6994705200195312, 0.2615203857421875, -0.2346649169921875, 0.191986083984375, 0.054107666015625, 0.1950531005859375, 0.0225372314453125, -0.2567424774169922, 0.056713104248046875, 0.08828353881835938, -0.1883087158203125, -0.12583541870117188, -0.22577476501464844, -0.23535728454589844, -0.1029052734375, -0.17388916015625, -0.15243911743164062, 0.7686920166015625, 0.94635009765625, 0.26734161376953125, -0.05193901062011719, -0.21258926391601562, 0.24993515014648438, 0.19988632202148438, -0.141815185546875, -0.3175048828125, 0.129852294921875, 0.22750091552734375, 0.90673828125, -0.03964996337890625, 0.2332477569580078, 0.1814422607421875, -0.3209991455078125, 0.43170928955078125, -0.058135986328125, 0.298736572265625, 0.471160888671875, -0.0533905029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000031.npy"}
|
|
{"epoch": 0.04686318972033258, "step": 32, "batch_size": 64, "mean": 0.02274876832962036, "std": 0.3433202803134918, "min": -0.8424911499023438, "p10": -0.39422912597656246, "median": -0.04020118713378906, "p90": 0.4405700683593751, "max": 0.7986221313476562, "pos_frac": 0.46875, "sample": [0.063720703125, 0.21495819091796875, -0.8424911499023438, -0.04241943359375, -0.1690521240234375, -0.25955963134765625, 0.0029296875, -0.5317459106445312, 0.2548370361328125, 0.33145904541015625, 0.78387451171875, 0.4146575927734375, 0.12253570556640625, -0.22735977172851562, 0.3847846984863281, -0.091949462890625, -0.1227569580078125, -0.12530136108398438, -0.08241653442382812, -0.15423965454101562, -0.13534927368164062, -0.11629295349121094, -0.11984825134277344, 0.4516754150390625, 0.735992431640625, -0.08535575866699219, 0.4704780578613281, 0.35113525390625, 0.2219696044921875, 0.7986221313476562, 0.24671173095703125, 0.27880096435546875, 0.04282188415527344, -0.15738677978515625, 0.727508544921875, 0.550445556640625, -0.6335678100585938, 0.19961166381835938, -0.48895263671875, 0.30896759033203125, 0.11168670654296875, 0.1485137939453125, -0.04814910888671875, -0.4190826416015625, 0.0297088623046875, 0.3017578125, -0.22386550903320312, -0.23890304565429688, -0.680450439453125, -0.09242057800292969, 0.278228759765625, 0.3314170837402344, -0.22800064086914062, -0.038600921630859375, -0.037586212158203125, -0.2911491394042969, -0.143585205078125, 0.13594818115234375, -0.1595306396484375, 0.03828239440917969, -0.07029342651367188, -0.04180145263671875, -0.3770751953125, -0.401580810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000032.npy"}
|
|
{"epoch": 0.04837490551776266, "step": 33, "batch_size": 64, "mean": 0.02684326469898224, "std": 0.3912205398082733, "min": -1.71429443359375, "p10": -0.39027633666992184, "median": 0.07732868194580078, "p90": 0.45199584960937506, "max": 0.8505630493164062, "pos_frac": 0.640625, "sample": [-0.8025741577148438, -0.487945556640625, 0.1381378173828125, -0.6424484252929688, 0.20319557189941406, 0.003875732421875, 0.46138763427734375, 0.2037944793701172, 0.14557647705078125, 0.11579132080078125, -0.2852783203125, 0.09726524353027344, -0.1541576385498047, -0.3535118103027344, 0.8505630493164062, -0.4060325622558594, -0.2098388671875, 0.4322357177734375, 0.18689346313476562, 0.8175048828125, 0.0552520751953125, 0.6451873779296875, 0.1519775390625, 0.12126541137695312, -0.5828704833984375, 0.28594970703125, 0.12677001953125, 0.05123138427734375, -0.2753143310546875, -0.1960906982421875, 0.09505081176757812, 0.30118370056152344, 0.42186737060546875, 0.05960655212402344, -0.08744621276855469, 0.12497901916503906, 0.03178596496582031, 0.2253875732421875, -0.1556529998779297, -0.0036373138427734375, -0.119384765625, -0.1950531005859375, -0.028913497924804688, 0.04302215576171875, -0.0230865478515625, 0.099853515625, 0.4604644775390625, 0.025686264038085938, 0.1345052719116211, 0.19572830200195312, -0.2434062957763672, 0.0370635986328125, 0.102783203125, -0.23541259765625, 0.1818695068359375, 0.20398712158203125, -0.04257965087890625, 0.2195281982421875, -1.71429443359375, 0.4613189697265625, 0.058971405029296875, 0.7639617919921875, 0.2263813018798828, -0.6059417724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000033.npy"}
|
|
{"epoch": 0.049886621315192746, "step": 34, "batch_size": 64, "mean": 0.0601862370967865, "std": 0.22527766227722168, "min": -0.36962127685546875, "p10": -0.19989242553710937, "median": 0.021892547607421875, "p90": 0.34317436218261727, "max": 0.6914596557617188, "pos_frac": 0.578125, "sample": [-0.121124267578125, 0.6914596557617188, 0.3505706787109375, 0.3259162902832031, 0.00098419189453125, 0.2085418701171875, -0.00377655029296875, 0.3521575927734375, -0.06179237365722656, 0.3212890625, 0.3952789306640625, -0.021484375, -0.032012939453125, 0.160400390625, -0.20082855224609375, -0.25783538818359375, 0.115997314453125, 0.279388427734375, 0.2079334259033203, 0.39922332763671875, -0.2098236083984375, 0.021081924438476562, 0.06552505493164062, 0.018463134765625, -0.30615997314453125, 0.13513946533203125, 0.32056427001953125, 0.19837570190429688, 0.1602783203125, -0.36962127685546875, 0.2765007019042969, 0.43699073791503906, 0.022703170776367188, 0.013086318969726562, 0.1557769775390625, -0.1977081298828125, -0.1868438720703125, 0.24894332885742188, 0.0329132080078125, 0.21289825439453125, -0.17879486083984375, -0.2693901062011719, -0.0318756103515625, 0.11493110656738281, 0.0276641845703125, 0.301666259765625, -0.19136810302734375, 0.018268585205078125, 0.1781005859375, -0.13068771362304688, 0.24823951721191406, -0.04236602783203125, 0.5397872924804688, -0.0510711669921875, -0.10573959350585938, -0.12457656860351562, 0.12134552001953125, -0.05927276611328125, -0.1487903594970703, -0.02523040771484375, 0.20882415771484375, -0.1905670166015625, -0.3677215576171875, -0.14882659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000034.npy"}
|
|
{"epoch": 0.05139833711262283, "step": 35, "batch_size": 64, "mean": 0.015972524881362915, "std": 0.33573222160339355, "min": -0.5987396240234375, "p10": -0.41553382873535155, "median": 0.005766868591308594, "p90": 0.40791473388671873, "max": 0.8925933837890625, "pos_frac": 0.5, "sample": [0.12313079833984375, -0.5263481140136719, 0.6629238128662109, 0.046253204345703125, -0.31673240661621094, 0.6556472778320312, -0.3680877685546875, -0.1524829864501953, -0.25702667236328125, -0.19205093383789062, -0.15118408203125, 0.04028511047363281, 0.3913898468017578, 0.18793678283691406, -0.2477874755859375, 0.2960243225097656, 0.8925933837890625, -0.41736602783203125, 0.1165008544921875, -0.1607666015625, 0.2175579071044922, 0.11869049072265625, 0.5049591064453125, 0.03737640380859375, -0.0071048736572265625, -0.4663543701171875, -0.33892822265625, 0.0572357177734375, -0.4814453125, -0.09493255615234375, -0.4680938720703125, 0.5355072021484375, 0.150115966796875, -0.0353851318359375, -0.23067092895507812, -0.2272186279296875, 0.026729583740234375, -0.248199462890625, 0.06857872009277344, -0.2756919860839844, 0.01863861083984375, 0.3996543884277344, 0.7645378112792969, -0.25881195068359375, -0.11225509643554688, 0.36238861083984375, 0.3511619567871094, -0.24568939208984375, -0.028354644775390625, 0.0315399169921875, -0.22064781188964844, -0.4112586975097656, 0.4076690673828125, -0.5987396240234375, -0.48724365234375, 0.238494873046875, 0.40802001953125, -0.0336456298828125, 0.12092971801757812, 0.3376312255859375, 0.2865924835205078, 0.316009521484375, -0.056446075439453125, -0.033512115478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000035.npy"}
|
|
{"epoch": 0.05291005291005291, "step": 36, "batch_size": 64, "mean": 0.002035379409790039, "std": 0.3715396821498871, "min": -0.829254150390625, "p10": -0.4920408248901366, "median": -0.0016450881958007812, "p90": 0.48935890197753923, "max": 1.1581878662109375, "pos_frac": 0.5, "sample": [0.3836212158203125, 1.1581878662109375, -0.232940673828125, -0.07987022399902344, -0.32236480712890625, -0.570953369140625, 0.12587738037109375, 0.3644142150878906, 0.2376995086669922, 0.20983123779296875, -0.0901031494140625, -0.019430160522460938, 0.5045623779296875, -0.584686279296875, 0.06321525573730469, -0.2282257080078125, 0.2425537109375, -0.26177978515625, 0.06324577331542969, 0.3830986022949219, 0.028966903686523438, 0.51910400390625, 0.01511383056640625, -0.11590957641601562, -0.3338127136230469, -0.5247039794921875, -0.241607666015625, 0.19673919677734375, 0.28810882568359375, -0.699432373046875, -0.0548553466796875, 0.1589221954345703, -0.2917938232421875, 0.244720458984375, -0.829254150390625, -0.14697265625, -0.14873504638671875, 0.509765625, -0.41582679748535156, -0.07373428344726562, -0.14765548706054688, 0.4246673583984375, 0.6837005615234375, -0.40683937072753906, 0.003021240234375, -0.0063114166259765625, -0.1191558837890625, 0.4538841247558594, 0.6189498901367188, 0.5545768737792969, 0.051921844482421875, -0.30452728271484375, 0.10112571716308594, 0.0387115478515625, -0.16336822509765625, -0.5550765991210938, 0.246856689453125, 0.0126190185546875, -0.701812744140625, -0.05593109130859375, -0.25885963439941406, 0.3832244873046875, -0.16106414794921875, 0.0068511962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000036.npy"}
|
|
{"epoch": 0.05442176870748299, "step": 37, "batch_size": 64, "mean": 0.12227392196655273, "std": 0.4483932852745056, "min": -0.810638427734375, "p10": -0.31193733215332026, "median": 0.04196643829345703, "p90": 0.6759025573730474, "max": 2.08447265625, "pos_frac": 0.640625, "sample": [0.28339385986328125, 0.93011474609375, 0.21585464477539062, -0.14267730712890625, 0.3560943603515625, 0.22783660888671875, 0.00228118896484375, 0.045085906982421875, 0.0158538818359375, -0.08887481689453125, -0.603302001953125, 0.3402824401855469, 0.7994270324707031, 0.959503173828125, -0.3303337097167969, -0.48888397216796875, 0.1142730712890625, 0.75537109375, 0.13594818115234375, 0.385650634765625, -0.26279640197753906, -0.19629478454589844, -0.810638427734375, 0.159393310546875, -0.15118408203125, 0.001155853271484375, -0.08542251586914062, -0.12630653381347656, -0.05008697509765625, -0.269012451171875, 0.4343719482421875, 0.03281402587890625, 0.37294769287109375, 0.5412826538085938, 0.2745361328125, 0.34114837646484375, -0.06815719604492188, -0.16184043884277344, 0.11760139465332031, 0.028255462646484375, 2.08447265625, 0.038482666015625, 0.17873764038085938, 0.1275482177734375, -0.6406021118164062, 0.15682220458984375, 0.18486785888671875, 0.413360595703125, -0.24957275390625, -0.4100189208984375, 0.20595741271972656, -0.2348785400390625, -0.006748199462890625, 0.03656005859375, -0.06671142578125, 0.4138679504394531, 0.0142364501953125, 0.2637920379638672, 0.058502197265625, -0.22048187255859375, 0.7335968017578125, -0.4675750732421875, 0.03884696960449219, 1.1378021240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000037.npy"}
|
|
{"epoch": 0.055933484504913075, "step": 38, "batch_size": 64, "mean": 0.019394874572753906, "std": 0.3999164402484894, "min": -0.8193893432617188, "p10": -0.5030078887939452, "median": 0.015474319458007812, "p90": 0.5647941589355472, "max": 1.122039794921875, "pos_frac": 0.53125, "sample": [-0.02462005615234375, 0.7020721435546875, 0.24416351318359375, 1.0967216491699219, -0.3040771484375, 0.07612991333007812, 0.15575408935546875, 0.7798652648925781, -0.09345245361328125, 0.3541259765625, 0.2587623596191406, -0.1284332275390625, -0.02918243408203125, -0.14298439025878906, 0.10337066650390625, -0.36592864990234375, -0.32680511474609375, -0.06923675537109375, -0.32738494873046875, 0.12265777587890625, -0.209869384765625, 0.1639862060546875, -0.6693878173828125, -0.7206344604492188, 0.15155029296875, -0.3112640380859375, 0.3828277587890625, -0.8193893432617188, -0.7498321533203125, 0.1091766357421875, -0.3136749267578125, -0.28859710693359375, 0.08870124816894531, 0.04620361328125, 0.6876983642578125, 0.48484039306640625, 0.20276641845703125, -0.5506782531738281, 0.031787872314453125, -0.06853103637695312, -0.020076751708984375, -0.16776275634765625, 0.3399028778076172, 0.1011199951171875, 0.021373748779296875, -0.6438102722167969, -0.2625389099121094, -0.0832672119140625, 0.3036155700683594, 1.122039794921875, 0.59906005859375, 0.1396331787109375, 0.6561279296875, -0.039035797119140625, 0.08356475830078125, -0.0473480224609375, -0.039134979248046875, 0.00316619873046875, -0.39177703857421875, 0.10608673095703125, 0.018268585205078125, -0.55499267578125, 0.0126800537109375, 0.25517845153808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000038.npy"}
|
|
{"epoch": 0.05744520030234316, "step": 39, "batch_size": 64, "mean": 0.07334855198860168, "std": 0.393395334482193, "min": -1.472015380859375, "p10": -0.39873504638671864, "median": 0.10403823852539062, "p90": 0.5521318435668946, "max": 0.74200439453125, "pos_frac": 0.640625, "sample": [0.5382156372070312, 0.031864166259765625, 0.2874298095703125, 0.35993385314941406, -0.11566162109375, 0.4521942138671875, 0.119903564453125, 0.5603446960449219, 0.2706260681152344, 0.06724929809570312, -0.28157806396484375, -0.1877460479736328, 0.5580959320068359, -0.18182373046875, 0.0215606689453125, 0.0828094482421875, -0.08650398254394531, 0.6302127838134766, 0.4895477294921875, -0.0107879638671875, -0.6459579467773438, -0.1298065185546875, -0.14982223510742188, 0.1806793212890625, 0.414306640625, -0.22562789916992188, -1.472015380859375, 0.2203826904296875, -0.4412841796875, -0.6331558227539062, -0.1414508819580078, 0.6136322021484375, -0.120697021484375, 0.24958038330078125, 0.29225921630859375, 0.3632354736328125, -0.23244094848632812, 0.35408782958984375, 0.08817291259765625, -0.5115890502929688, 0.12109375, -0.2994537353515625, -0.04917144775390625, 0.2176513671875, 0.74200439453125, -0.49578094482421875, 0.0064239501953125, 0.3165283203125, 0.6392250061035156, 0.1312122344970703, 0.5679397583007812, -0.8251800537109375, 0.23446273803710938, -0.16652297973632812, 0.40784263610839844, 0.395233154296875, 0.3578662872314453, 0.4771881103515625, 0.13990020751953125, 0.02359771728515625, 0.18115234375, 0.03711700439453125, 0.07451629638671875, -0.21891403198242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000039.npy"}
|
|
{"epoch": 0.05895691609977324, "step": 40, "batch_size": 64, "mean": 0.03887134790420532, "std": 0.3204992413520813, "min": -0.7983856201171875, "p10": -0.36328124999999994, "median": 0.03778266906738281, "p90": 0.40562934875488305, "max": 0.9315032958984375, "pos_frac": 0.5625, "sample": [-0.00522613525390625, 0.3388214111328125, -0.08457565307617188, 0.18791580200195312, -0.10767745971679688, 0.4916419982910156, 0.16593551635742188, 0.1678314208984375, -0.22113037109375, 0.5567626953125, 0.2871971130371094, 0.08592033386230469, -0.44472503662109375, -0.22603225708007812, 0.4563407897949219, 0.07907867431640625, -0.54534912109375, 0.2257537841796875, 0.3496265411376953, -0.1712646484375, 0.2904624938964844, 0.9315032958984375, 0.047100067138671875, 0.2990760803222656, 0.3544158935546875, -0.2928924560546875, 0.04854583740234375, 0.4275779724121094, 0.6979904174804688, -0.1168060302734375, 0.21915054321289062, -0.3934478759765625, 0.03753662109375, -0.2465667724609375, 0.19281768798828125, 0.03322792053222656, -0.1335430145263672, 0.27642250061035156, -0.1663665771484375, 0.25152587890625, -0.116973876953125, 0.476104736328125, 0.19484519958496094, 0.2728118896484375, -0.6164321899414062, 0.038028717041015625, 0.10511589050292969, -0.14845657348632812, -0.07099533081054688, 0.18804931640625, 0.26132965087890625, -0.7983856201171875, -0.21542739868164062, -0.07407379150390625, -0.005859375, -0.7164344787597656, 0.18841552734375, -0.18255615234375, -0.42414093017578125, -0.004425048828125, 0.03075408935546875, 0.017885208129882812, -0.154388427734375, -0.10160064697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000040.npy"}
|
|
{"epoch": 0.06046863189720333, "step": 41, "batch_size": 64, "mean": 0.1058172881603241, "std": 0.4105142652988434, "min": -0.8071327209472656, "p10": -0.30513248443603513, "median": 0.0160369873046875, "p90": 0.6763614654541016, "max": 1.1037750244140625, "pos_frac": 0.5, "sample": [0.10868072509765625, -0.17158126831054688, -0.03426170349121094, -0.007171630859375, -0.3541717529296875, -0.39849853515625, -0.13288497924804688, 0.9473800659179688, 0.14242935180664062, -0.8071327209472656, -0.03061676025390625, 0.14451980590820312, -0.12992095947265625, -0.3295440673828125, -0.0875244140625, -0.10296630859375, -0.26935577392578125, 0.5157928466796875, -0.09675979614257812, -0.05140495300292969, 0.6214523315429688, -0.49981689453125, -0.3083019256591797, 0.1476287841796875, 0.157623291015625, -0.0255584716796875, -0.15509033203125, 0.26464080810546875, 0.21437454223632812, 0.0825653076171875, -0.1656017303466797, 0.8720703125, 0.9425811767578125, 0.6704788208007812, -0.12125015258789062, -0.29773712158203125, 0.49895477294921875, -0.1341094970703125, 0.640838623046875, -0.14089584350585938, 0.0833740234375, -0.5420913696289062, -0.2929420471191406, 0.06546401977539062, 1.1037750244140625, 0.10407066345214844, 0.3713340759277344, -0.057575225830078125, 1.0682525634765625, -0.1011962890625, -0.235198974609375, 0.03924560546875, 0.07580184936523438, 0.1385498046875, -0.026760101318359375, -0.1613922119140625, 0.5430755615234375, 0.06891632080078125, 0.6788825988769531, 0.1920623779296875, 0.3343658447265625, 1.0643844604492188, -0.20236968994140625, 0.340423583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000041.npy"}
|
|
{"epoch": 0.06198034769463341, "step": 42, "batch_size": 64, "mean": 0.030096828937530518, "std": 0.38614359498023987, "min": -1.218017578125, "p10": -0.39677352905273433, "median": 0.031248092651367188, "p90": 0.43268737792968764, "max": 0.9844818115234375, "pos_frac": 0.53125, "sample": [0.100677490234375, 0.971405029296875, -0.1920166015625, -0.0309295654296875, -0.255462646484375, -0.6304473876953125, 0.09892082214355469, -0.08926200866699219, -0.660247802734375, 0.3099822998046875, 0.038722991943359375, 0.05556488037109375, -0.27700042724609375, 0.19028854370117188, -0.12032318115234375, 0.32479095458984375, 0.9844818115234375, 0.25567626953125, -0.21178627014160156, -0.031948089599609375, 0.01557159423828125, 0.8340072631835938, 0.3443603515625, -0.1710968017578125, -0.23046112060546875, -0.4048919677734375, 0.3475761413574219, 0.1661376953125, 0.09247970581054688, -0.3240222930908203, 0.19852828979492188, 0.31423187255859375, -0.05693244934082031, -0.37783050537109375, 0.7014274597167969, 0.11484527587890625, 0.44940948486328125, 0.2086181640625, -0.33449554443359375, -0.498931884765625, -0.22204971313476562, -0.224578857421875, -1.218017578125, -0.5203018188476562, -0.2438201904296875, 0.1831207275390625, 0.5248260498046875, -0.008087158203125, 0.3451499938964844, -0.1660919189453125, -0.05120658874511719, 0.326568603515625, 0.112030029296875, 0.023773193359375, 0.2695159912109375, 0.12621307373046875, -0.3649749755859375, -0.10886955261230469, 0.6418304443359375, 0.39366912841796875, 0.21673583984375, 0.18491554260253906, -0.09082794189453125, -0.422943115234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000042.npy"}
|
|
{"epoch": 0.06349206349206349, "step": 43, "batch_size": 64, "mean": 0.03938618302345276, "std": 0.4055257737636566, "min": -0.9562835693359375, "p10": -0.4831523895263672, "median": 0.05589485168457031, "p90": 0.5659393310546876, "max": 0.899993896484375, "pos_frac": 0.546875, "sample": [-0.26023101806640625, 0.27277374267578125, -0.4796142578125, 0.21434974670410156, 0.06715774536132812, -0.4614238739013672, -0.06372451782226562, 0.2572784423828125, 0.5804595947265625, -0.33400726318359375, 0.30914878845214844, -0.00760650634765625, -0.098541259765625, -0.4974212646484375, -0.06195068359375, 0.5320587158203125, 0.08147621154785156, -0.5382537841796875, 0.1441783905029297, 0.42607879638671875, 0.19903945922851562, 0.17108154296875, -0.7982254028320312, -0.03058624267578125, 0.2650489807128906, -0.2661266326904297, 0.09725189208984375, 0.498260498046875, -0.08008575439453125, 0.899993896484375, 0.584930419921875, -0.4481048583984375, 0.023624420166015625, -0.8150177001953125, 0.0446319580078125, 0.158111572265625, -0.090240478515625, -0.23437881469726562, 0.5191497802734375, 0.68505859375, 0.19571685791015625, -0.208526611328125, 0.221282958984375, -0.9562835693359375, 0.075927734375, 0.79168701171875, -0.1550750732421875, 0.35400390625, -0.16156005859375, 0.31554603576660156, -0.09708404541015625, 0.8751449584960938, 0.31797027587890625, -0.17124176025390625, 0.36286163330078125, 0.39963531494140625, -0.4846687316894531, 0.15017318725585938, 0.608062744140625, 0.009735107421875, -0.4472503662109375, -0.39609527587890625, -0.060153961181640625, -0.4846954345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000043.npy"}
|
|
{"epoch": 0.06500377928949358, "step": 44, "batch_size": 64, "mean": 0.05792495608329773, "std": 0.4097907841205597, "min": -0.8659133911132812, "p10": -0.4037178039550781, "median": 0.04228973388671875, "p90": 0.5517187118530275, "max": 1.4503555297851562, "pos_frac": 0.546875, "sample": [-0.42670249938964844, -0.14589691162109375, 0.32686614990234375, 0.2026500701904297, 0.45513153076171875, -0.0478515625, 0.026153564453125, -0.2823944091796875, -0.6052474975585938, -0.396209716796875, 0.10424423217773438, 0.7933807373046875, -0.2752838134765625, -0.0206756591796875, 0.18967437744140625, 0.04132080078125, 0.0432586669921875, 0.046062469482421875, 0.10809898376464844, 0.1987781524658203, 0.053375244140625, 0.4928398132324219, 0.5212574005126953, -0.4193878173828125, -0.3244781494140625, -0.114288330078125, -0.1162872314453125, -0.8659133911132812, -0.4041595458984375, 0.21758270263671875, 0.6774444580078125, -0.5331497192382812, 0.07176971435546875, 0.15354156494140625, -0.051464080810546875, 0.14557647705078125, 1.4503555297851562, -0.0873870849609375, -0.02754974365234375, -0.22119140625, -0.00318145751953125, -0.32334136962890625, 0.01556396484375, 0.098358154296875, -0.13433074951171875, 0.47135162353515625, -0.21498870849609375, -0.1673431396484375, 0.4233856201171875, -0.09839630126953125, -0.39167022705078125, 0.22625732421875, 0.5647735595703125, 0.0770721435546875, -0.2770500183105469, 0.08264923095703125, 0.64263916015625, 1.2021636962890625, 0.47698211669921875, 0.6359786987304688, 0.126220703125, 0.254180908203125, -0.40268707275390625, -0.5312347412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000044.npy"}
|
|
{"epoch": 0.06651549508692366, "step": 45, "batch_size": 64, "mean": 0.1341111958026886, "std": 0.42045143246650696, "min": -0.6685256958007812, "p10": -0.5069358825683593, "median": 0.1435546875, "p90": 0.53458251953125, "max": 1.3296966552734375, "pos_frac": 0.625, "sample": [0.049961090087890625, -0.16915130615234375, 0.3201446533203125, 0.2598724365234375, -0.00807952880859375, -0.03342437744140625, -0.3576831817626953, -0.0899658203125, -0.54583740234375, -0.549835205078125, 0.197052001953125, 0.54718017578125, 0.4690666198730469, 0.48751068115234375, 0.002063751220703125, 0.113616943359375, 0.3519248962402344, -0.6685256958007812, -0.513763427734375, 0.3666496276855469, 0.19390869140625, -0.49100494384765625, 0.37854766845703125, -0.09617805480957031, 0.47736358642578125, 1.211700439453125, -0.084075927734375, 0.03333282470703125, 0.34174346923828125, 0.43843841552734375, 0.46268463134765625, -0.135009765625, -0.5440139770507812, 0.2750816345214844, 0.4762077331542969, 1.2494964599609375, 0.008014678955078125, 0.28553009033203125, -0.2888336181640625, 0.16086578369140625, -0.527923583984375, 0.5424652099609375, -0.019327163696289062, 0.181121826171875, 0.29685211181640625, -0.07392692565917969, -0.5299530029296875, 0.3437461853027344, 0.79278564453125, 0.33759307861328125, -0.00862884521484375, 0.17729759216308594, 0.5161895751953125, 0.12624359130859375, 0.0723114013671875, 0.7287979125976562, 1.3296966552734375, 0.168975830078125, 0.07467269897460938, -0.15086746215820312, -0.32245635986328125, -0.24224853515625, 0.20391082763671875, -0.016788482666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000045.npy"}
|
|
{"epoch": 0.06802721088435375, "step": 46, "batch_size": 64, "mean": 0.11740574240684509, "std": 0.4100187122821808, "min": -0.9643707275390625, "p10": -0.34451828002929685, "median": 0.07937049865722656, "p90": 0.6931795120239258, "max": 0.9967117309570312, "pos_frac": 0.5625, "sample": [0.42438507080078125, -0.21411895751953125, -0.6819686889648438, 0.47383880615234375, 0.2566375732421875, -0.073516845703125, 0.22173309326171875, -0.28481292724609375, -0.04244232177734375, -0.32977294921875, 0.6984443664550781, 0.3388175964355469, -0.3163604736328125, 0.78668212890625, -0.2557830810546875, 0.11883926391601562, -0.9643707275390625, 0.0790252685546875, 0.41147422790527344, 0.25559043884277344, 0.17195892333984375, 0.5740966796875, -0.04363822937011719, 0.26932525634765625, 0.05316162109375, 0.120697021484375, 0.6642074584960938, 0.9389190673828125, -0.35083770751953125, -0.01690673828125, -0.620758056640625, 0.7361373901367188, -0.1352081298828125, -0.0105438232421875, -0.3550910949707031, 0.75482177734375, 0.09144973754882812, -0.026918411254882812, -0.4239215850830078, 0.6776618957519531, 0.4284210205078125, -0.204620361328125, 0.28255462646484375, -0.10602188110351562, 0.47369384765625, 0.07971572875976562, 0.06500244140625, -0.04364776611328125, -0.015773773193359375, 0.20342063903808594, 0.6808948516845703, 0.06043243408203125, -0.09893035888671875, 0.21326065063476562, 0.3503570556640625, 0.9967117309570312, -0.6496124267578125, -0.03383636474609375, -0.25266456604003906, 0.76690673828125, -0.1171722412109375, 0.19947242736816406, -0.22690200805664062, 0.49137115478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000046.npy"}
|
|
{"epoch": 0.06953892668178382, "step": 47, "batch_size": 64, "mean": 0.17234358191490173, "std": 0.37087303400039673, "min": -0.6967544555664062, "p10": -0.31357574462890625, "median": 0.1680746078491211, "p90": 0.6661277770996095, "max": 0.8878517150878906, "pos_frac": 0.703125, "sample": [0.40810394287109375, 0.22699737548828125, -0.11136627197265625, 0.4317169189453125, 0.6703414916992188, 0.28800010681152344, 0.017629623413085938, 0.397796630859375, 0.04506683349609375, 0.126953125, 0.2237834930419922, -0.06772994995117188, 0.3852081298828125, -0.6619415283203125, -0.00925445556640625, 0.25185394287109375, 0.6026878356933594, -0.3210601806640625, -0.139984130859375, -0.05306243896484375, 0.24452972412109375, -0.1389923095703125, -0.034198760986328125, 0.4234580993652344, 0.16339111328125, 0.0774993896484375, 0.21537399291992188, 0.26688194274902344, -0.18696975708007812, 0.1226959228515625, 0.5667076110839844, 0.6829643249511719, -0.0986480712890625, 0.8384170532226562, -0.098907470703125, 0.4473114013671875, 0.855010986328125, 0.04532623291015625, 0.06899261474609375, 0.7127609252929688, 0.2501220703125, 0.14307403564453125, -0.35117340087890625, 0.17494964599609375, 0.5261039733886719, 0.09421539306640625, 0.6562957763671875, 0.0147705078125, 0.52984619140625, 0.44268226623535156, 0.2613372802734375, 0.16473388671875, -0.6775283813476562, -0.6967544555664062, -0.6229667663574219, 0.7319107055664062, 0.1714153289794922, 0.8878517150878906, 0.5227775573730469, 0.031124114990234375, -0.031764984130859375, 0.5391845703125, -0.296112060546875, -0.32145118713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000047.npy"}
|
|
{"epoch": 0.0710506424792139, "step": 48, "batch_size": 64, "mean": 0.015427738428115845, "std": 0.4353944957256317, "min": -1.0181694030761719, "p10": -0.49358234405517576, "median": 0.018703460693359375, "p90": 0.44925994873046887, "max": 1.3669052124023438, "pos_frac": 0.53125, "sample": [0.13341903686523438, 0.193511962890625, -0.05254554748535156, 0.01612091064453125, -0.140960693359375, 0.8860969543457031, -0.3806610107421875, 0.257659912109375, -0.08413124084472656, -0.1389007568359375, 0.32170867919921875, -0.04288291931152344, -0.4034996032714844, -0.530426025390625, 0.32317352294921875, 0.767059326171875, 0.13053131103515625, -0.13584518432617188, -0.6103668212890625, 0.40758514404296875, 0.14667129516601562, -0.385528564453125, -0.4366607666015625, 0.4633636474609375, 0.380401611328125, 0.416351318359375, 0.203125, 0.08553695678710938, -0.4435577392578125, 0.8123397827148438, 0.0212860107421875, -0.1537933349609375, 0.1702423095703125, 0.08892822265625, 0.3952484130859375, 0.00670623779296875, 0.02684783935546875, 0.03461456298828125, 0.9123096466064453, -0.3334465026855469, -0.332611083984375, 0.32900238037109375, -0.00101470947265625, 0.09593009948730469, -0.555419921875, -0.07174491882324219, 0.14684486389160156, -0.16338729858398438, 0.0267791748046875, -0.4656391143798828, -0.1998748779296875, -0.1930084228515625, -0.7462558746337891, 0.32501220703125, -0.42881011962890625, 0.22640228271484375, 1.3669052124023438, -0.06720542907714844, -0.6653289794921875, -0.5055580139160156, 0.07860565185546875, 0.8547248840332031, -0.3764362335205078, -1.0181694030761719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000048.npy"}
|
|
{"epoch": 0.07256235827664399, "step": 49, "batch_size": 64, "mean": 0.07775917649269104, "std": 0.4694283604621887, "min": -1.2866363525390625, "p10": -0.3792236328125, "median": 0.02634716033935547, "p90": 0.5925148010253908, "max": 1.5475616455078125, "pos_frac": 0.578125, "sample": [0.37068939208984375, -0.16391754150390625, 0.027482986450195312, 0.3485107421875, 0.39888763427734375, -0.013647079467773438, -0.03316307067871094, 0.025211334228515625, 0.2745819091796875, -0.232025146484375, 0.06471633911132812, 0.17461395263671875, -0.11551284790039062, 0.3814697265625, -0.17274856567382812, -0.07990646362304688, -0.6153736114501953, -0.05530548095703125, 0.2349071502685547, -1.1817474365234375, 0.1312236785888672, 0.20806884765625, 0.05161476135253906, 0.10778045654296875, 0.1702423095703125, 0.8189849853515625, 0.38730621337890625, 0.11430168151855469, 0.14266014099121094, -0.4408111572265625, 0.19460487365722656, 0.5197639465332031, -0.048290252685546875, 0.7145004272460938, 0.4503154754638672, 0.70111083984375, -0.28018951416015625, 0.00858306884765625, 1.0666351318359375, 1.1360321044921875, 0.00140380859375, -0.027400970458984375, -0.17597389221191406, -0.2658233642578125, -0.4534759521484375, -0.14620208740234375, -0.19576263427734375, 0.5645675659179688, 0.3509101867675781, 0.03966522216796875, 0.01979827880859375, 0.5004196166992188, -1.2866363525390625, -0.28118133544921875, -0.23935317993164062, -0.8121261596679688, 0.6044921875, 1.5475616455078125, 0.3237648010253906, 0.016435623168945312, -0.12961578369140625, -0.38739013671875, -0.0234832763671875, -0.36016845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000049.npy"}
|
|
{"epoch": 0.07407407407407407, "step": 50, "batch_size": 64, "mean": 0.11088606715202332, "std": 0.40238696336746216, "min": -1.1205520629882812, "p10": -0.44513626098632797, "median": 0.11893844604492188, "p90": 0.6064926147460937, "max": 0.9968643188476562, "pos_frac": 0.59375, "sample": [0.5083847045898438, 0.576385498046875, 0.1614227294921875, 0.3538818359375, 0.2999000549316406, 0.6202392578125, 0.3045234680175781, -0.5275802612304688, 0.9968643188476562, 0.47414398193359375, 0.27291107177734375, 0.3674468994140625, -1.1205520629882812, -0.08214950561523438, -0.06380844116210938, -0.29193115234375, 0.749542236328125, 0.32550811767578125, 0.31273460388183594, -0.2234344482421875, 0.5750045776367188, -0.019336700439453125, -0.10479164123535156, 0.00154876708984375, 0.30960845947265625, 0.727935791015625, -0.18329238891601562, 0.2800750732421875, -0.15703964233398438, -0.1002960205078125, 0.8274669647216797, 0.3883552551269531, 0.09664154052734375, 0.6095466613769531, -0.799652099609375, -0.18560791015625, 0.418548583984375, 0.6089096069335938, -0.09270095825195312, 0.00939178466796875, 0.416290283203125, 0.4957733154296875, -0.5276947021484375, -0.5136566162109375, 0.05103302001953125, 0.10063743591308594, 0.2256317138671875, -0.05294036865234375, -0.6100921630859375, 0.3724365234375, 0.1372394561767578, -0.10015106201171875, -0.015316009521484375, 0.008790969848632812, 0.6008529663085938, -0.060024261474609375, -0.5107955932617188, -0.22437667846679688, 0.17380523681640625, -0.19847869873046875, 0.16704940795898438, -0.142333984375, 0.18494606018066406, -0.10666656494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000050.npy"}
|
|
{"epoch": 0.07558578987150416, "step": 51, "batch_size": 64, "mean": 0.20416662096977234, "std": 0.5492684841156006, "min": -0.8719406127929688, "p10": -0.4381217956542968, "median": 0.11561107635498047, "p90": 0.8708938598632813, "max": 2.0704345703125, "pos_frac": 0.609375, "sample": [0.6550445556640625, 0.2670269012451172, -0.01039886474609375, -0.22180938720703125, 0.43268585205078125, 0.48110198974609375, 0.11190605163574219, 0.32535362243652344, -0.22989845275878906, -0.45484161376953125, -0.044300079345703125, 0.4841728210449219, -0.4932975769042969, 0.8593902587890625, -0.1318511962890625, 0.049041748046875, 1.0116729736328125, 0.620147705078125, 0.46820068359375, 1.326629638671875, 0.01583099365234375, 0.23052406311035156, 0.6433277130126953, 0.875823974609375, 0.16835784912109375, -0.3526611328125, -0.27429962158203125, -0.7672214508056641, -0.0011196136474609375, 0.5072002410888672, 0.5059738159179688, -0.1464080810546875, 0.1562938690185547, 1.3737945556640625, -0.6370620727539062, -0.03656768798828125, -0.017398834228515625, -0.8719406127929688, -0.477630615234375, -0.03379058837890625, 0.615814208984375, -0.3337554931640625, 0.6046905517578125, 0.7798309326171875, 0.10903167724609375, -0.3098335266113281, -0.7583160400390625, 2.0704345703125, 0.3221435546875, 1.1558303833007812, 0.4425621032714844, 1.0022659301757812, 0.07569122314453125, -0.39910888671875, 0.08080291748046875, 0.029623031616210938, 0.11931610107421875, 0.77850341796875, 0.2590961456298828, -0.06081390380859375, -0.2732086181640625, 0.22430419921875, 0.21257972717285156, -0.047824859619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000051.npy"}
|
|
{"epoch": 0.07709750566893424, "step": 52, "batch_size": 64, "mean": 0.1978059560060501, "std": 0.572745680809021, "min": -0.6476211547851562, "p10": -0.32109718322753905, "median": 0.031444549560546875, "p90": 0.9106018066406254, "max": 2.627532958984375, "pos_frac": 0.5625, "sample": [1.56072998046875, -0.2447052001953125, 0.435546875, -0.0512542724609375, 0.7833786010742188, -0.2201995849609375, 0.16310691833496094, -0.1210479736328125, -0.12267303466796875, 1.3512496948242188, 1.48541259765625, 0.7455978393554688, -0.23616790771484375, 1.1215133666992188, 0.43145751953125, -0.30107879638671875, 0.30278778076171875, -0.428375244140625, -0.4596214294433594, -0.28324127197265625, -0.27507781982421875, -0.2600975036621094, 0.148956298828125, -0.05365562438964844, -0.304779052734375, 0.8173980712890625, 0.0251922607421875, 0.014385223388671875, 0.613494873046875, -0.011371612548828125, 0.29700660705566406, -0.4206390380859375, 0.16890716552734375, -0.34258460998535156, -0.1517162322998047, 0.43314361572265625, 0.15446090698242188, 0.3431854248046875, -0.282684326171875, -0.143096923828125, -0.02054595947265625, 0.3926353454589844, 0.13024139404296875, 0.23150062561035156, -0.2677726745605469, 0.667327880859375, -0.16214847564697266, 0.03769683837890625, 2.627532958984375, -0.07451629638671875, 0.9505462646484375, -0.13507080078125, -0.38922882080078125, 0.2624244689941406, 0.459930419921875, -0.6476211547851562, 1.03607177734375, -0.3280906677246094, 0.6383056640625, 0.3316516876220703, 0.1464996337890625, 0.011600494384765625, 0.0207977294921875, 0.05696868896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000052.npy"}
|
|
{"epoch": 0.07860922146636433, "step": 53, "batch_size": 64, "mean": 0.15570643544197083, "std": 0.4291461408138275, "min": -1.5431976318359375, "p10": -0.33143405914306634, "median": 0.19373130798339844, "p90": 0.646989440917969, "max": 1.1423187255859375, "pos_frac": 0.703125, "sample": [0.20693588256835938, 0.285491943359375, 0.9136734008789062, 0.27606201171875, 0.27399635314941406, 0.23482322692871094, 0.15041732788085938, -0.0456695556640625, 0.2832660675048828, -0.35501861572265625, 0.25769805908203125, 0.0799102783203125, -0.1884613037109375, 0.4112567901611328, -0.03563690185546875, 0.4646644592285156, 0.04032135009765625, 1.1423187255859375, -0.052059173583984375, -0.5917587280273438, 0.254180908203125, 0.19013214111328125, 0.595367431640625, 0.10019302368164062, -1.5431976318359375, 0.2602691650390625, -0.3804969787597656, 0.31247711181640625, 0.7499351501464844, 0.6691131591796875, 0.4061393737792969, -0.7770538330078125, -0.17882156372070312, 0.06092071533203125, 0.19097900390625, 0.4776153564453125, 0.45098304748535156, 0.9795303344726562, -0.25768089294433594, -0.36068153381347656, -0.15538597106933594, 0.45082855224609375, 0.19648361206054688, -0.2592010498046875, 0.2073822021484375, 0.86785888671875, -0.01509857177734375, 0.29117584228515625, 0.3830909729003906, 0.59442138671875, 0.26630401611328125, 0.594757080078125, -0.27640342712402344, 0.7851448059082031, 0.10469818115234375, -0.4175148010253906, 0.29486083984375, -0.16822052001953125, 0.01311492919921875, 0.09203529357910156, 0.0058135986328125, 0.1583251953125, -0.11490631103515625, 0.11351203918457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000053.npy"}
|
|
{"epoch": 0.0801209372637944, "step": 54, "batch_size": 64, "mean": 0.12304195761680603, "std": 0.4524027705192566, "min": -0.7234039306640625, "p10": -0.4646415710449219, "median": 0.12043952941894531, "p90": 0.6890958786010744, "max": 1.62994384765625, "pos_frac": 0.59375, "sample": [0.45882415771484375, 0.9540557861328125, 0.2113037109375, 0.29878807067871094, -0.32647705078125, -0.5305328369140625, 0.6975765228271484, 0.43912506103515625, 0.4538421630859375, -0.46445465087890625, -0.1599102020263672, -0.04640960693359375, 0.14586639404296875, -0.6939544677734375, 0.21568679809570312, 0.026123046875, 0.13616943359375, 0.13922500610351562, 0.20831298828125, -0.358001708984375, -0.04680633544921875, 1.62994384765625, -0.03209686279296875, 0.2999610900878906, 0.0461273193359375, 0.8117256164550781, 0.2324657440185547, -0.197296142578125, -0.08831787109375, 0.17235565185546875, -0.5665817260742188, -0.10079574584960938, -0.6365203857421875, -0.4469013214111328, 0.3461265563964844, -0.0885772705078125, 0.22021484375, -0.4647216796875, 0.6693077087402344, 0.6610679626464844, 0.44103050231933594, 0.09741592407226562, 0.13151168823242188, 0.5346908569335938, -0.10312271118164062, 0.6042346954345703, 0.7390060424804688, 0.31048011779785156, -0.10335731506347656, 0.308990478515625, 0.6379165649414062, -0.4847259521484375, 0.0403594970703125, -0.170257568359375, 0.5200901031494141, -0.17731094360351562, -0.01067352294921875, -0.27071571350097656, 0.8850440979003906, -0.44632720947265625, 0.01403045654296875, 0.10936737060546875, 0.7645721435546875, -0.7234039306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000054.npy"}
|
|
{"epoch": 0.08163265306122448, "step": 55, "batch_size": 64, "mean": 0.2467818409204483, "std": 0.64699786901474, "min": -1.480316162109375, "p10": -0.44507446289062497, "median": 0.20148468017578125, "p90": 1.1478366851806643, "max": 1.890533447265625, "pos_frac": 0.625, "sample": [1.7060089111328125, 0.10250091552734375, 0.4150276184082031, 0.817108154296875, -0.23924827575683594, -0.137237548828125, 0.42769622802734375, -0.238616943359375, -0.4910755157470703, -0.202911376953125, 0.09336090087890625, 0.4344635009765625, -0.6203765869140625, -0.091033935546875, -0.17201614379882812, 1.3485031127929688, 1.2658233642578125, 0.205718994140625, 0.2270050048828125, -0.11234283447265625, 0.3075675964355469, -1.480316162109375, 0.5899848937988281, 0.4079017639160156, 1.890533447265625, 0.2160472869873047, 0.38200950622558594, 0.6162261962890625, 0.14469146728515625, 0.462615966796875, 0.3249988555908203, 0.3416748046875, 0.8870391845703125, -0.2398223876953125, -0.03441619873046875, -0.45831298828125, 0.42630863189697266, -0.14665985107421875, 0.12641143798828125, -0.3983726501464844, 0.38721466064453125, 0.9735946655273438, -0.01300048828125, 0.9725341796875, 0.1972503662109375, 0.07583236694335938, 1.6063461303710938, -0.6093292236328125, -0.18381500244140625, -0.6368408203125, 0.43105316162109375, 0.5825252532958984, -0.2373809814453125, -0.08858871459960938, -0.4141845703125, 1.11126708984375, 0.35521697998046875, 0.07733154296875, 0.6430435180664062, 1.6536712646484375, 0.0375518798828125, -0.33697509765625, 1.1635093688964844, -1.058258056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000055.npy"}
|
|
{"epoch": 0.08314436885865457, "step": 56, "batch_size": 64, "mean": 0.21107253432273865, "std": 0.5767248868942261, "min": -1.8634033203125, "p10": -0.40861816406249996, "median": 0.18448638916015625, "p90": 0.8605991363525394, "max": 1.7452545166015625, "pos_frac": 0.671875, "sample": [-0.20276641845703125, 0.42108917236328125, 0.06918907165527344, -0.5331325531005859, 0.4620361328125, -0.30939483642578125, 1.1647872924804688, -0.7020721435546875, -0.3865509033203125, 0.24098777770996094, 0.6877059936523438, -0.4893951416015625, -0.1730804443359375, 0.41272735595703125, 0.34033203125, 0.896331787109375, 0.5313186645507812, -0.6780376434326172, 0.7072830200195312, 0.9422836303710938, -0.5228672027587891, 1.7452545166015625, 0.44701576232910156, 0.1245880126953125, 0.09509658813476562, 0.8938980102539062, -0.30858612060546875, 0.412353515625, -0.09224128723144531, 0.6580829620361328, -0.06287384033203125, 0.7092437744140625, 1.314727783203125, 0.5827579498291016, 0.142364501953125, 0.10296630859375, -1.8634033203125, 0.5971450805664062, 0.1491413116455078, 0.5811729431152344, 0.32724571228027344, 0.2232666015625, 0.217315673828125, -0.4180755615234375, 0.05347251892089844, 0.3819999694824219, 0.22882843017578125, 0.12590789794921875, -0.1836833953857422, 0.12634658813476562, 0.6130332946777344, 0.11449813842773438, 0.7507476806640625, 0.4422416687011719, 0.248931884765625, -0.2732963562011719, -0.24477767944335938, -0.09969329833984375, 0.7829017639160156, 0.1516571044921875, -0.29058265686035156, 1.6114501953125, -0.14109230041503906, -0.34748077392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000056.npy"}
|
|
{"epoch": 0.08465608465608465, "step": 57, "batch_size": 64, "mean": 0.32123592495918274, "std": 0.708128809928894, "min": -1.2262344360351562, "p10": -0.546136474609375, "median": 0.3314857482910156, "p90": 1.1246582031250003, "max": 2.446044921875, "pos_frac": 0.703125, "sample": [-0.3530235290527344, -0.4129962921142578, 0.38413047790527344, 2.446044921875, 0.07705879211425781, 0.992919921875, 0.3101959228515625, -0.6863327026367188, 0.7665748596191406, 0.431854248046875, 1.3409423828125, 0.8200855255126953, -0.7765655517578125, 0.29480934143066406, 0.6137008666992188, -0.7703170776367188, 1.2024688720703125, 0.9159622192382812, 1.7749481201171875, 0.17072105407714844, 0.35277557373046875, -0.27313995361328125, 0.7532386779785156, 0.72686767578125, -0.18693923950195312, -0.4609565734863281, 0.1357574462890625, 0.0606689453125, -0.2774162292480469, -1.2262344360351562, 0.501556396484375, -0.04669380187988281, 0.4013099670410156, 0.263214111328125, -0.2804222106933594, 1.0666275024414062, -0.896759033203125, 0.3649139404296875, -0.5548248291015625, 0.995269775390625, 0.16357421875, -0.40541839599609375, -0.5258636474609375, 0.6462726593017578, -0.47850799560546875, 0.36865234375, 0.574371337890625, 0.8669471740722656, 0.9104156494140625, 0.3696308135986328, 0.5882911682128906, 1.1519393920898438, 0.07903289794921875, 2.2313385009765625, 0.1515064239501953, 1.1495285034179688, 0.027252197265625, 0.6800155639648438, 0.21930694580078125, 0.655670166015625, 0.034999847412109375, -0.10792732238769531, 0.8221588134765625, -0.5760841369628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000057.npy"}
|
|
{"epoch": 0.08616780045351474, "step": 58, "batch_size": 64, "mean": 0.25284650921821594, "std": 0.9350485801696777, "min": -1.2862892150878906, "p10": -0.5491840362548828, "median": 0.046596527099609375, "p90": 1.098810577392578, "max": 4.47332763671875, "pos_frac": 0.53125, "sample": [-1.2862892150878906, 0.5109634399414062, 0.9115486145019531, -0.12939453125, 1.0852279663085938, -0.5113449096679688, -0.32299041748046875, 0.814300537109375, -0.147552490234375, -0.53076171875, 0.4204368591308594, -0.21756362915039062, 1.7877197265625, -0.6240081787109375, -0.6141853332519531, 0.4492073059082031, 1.1007537841796875, -0.2032318115234375, 0.05401611328125, 1.004608154296875, 0.07802772521972656, -0.09122467041015625, 0.42424774169921875, 0.529083251953125, -0.158782958984375, -0.43482208251953125, -1.04327392578125, -0.00555419921875, -0.5291290283203125, 1.0942764282226562, -0.144195556640625, -0.8353862762451172, -0.09869766235351562, 0.3758392333984375, 4.47332763671875, -0.3103790283203125, -0.493011474609375, 2.72039794921875, 1.486785888671875, 0.3554229736328125, -0.484588623046875, -0.019145965576171875, 0.2838096618652344, -0.4677886962890625, -0.36651611328125, 2.7297210693359375, -0.3062400817871094, 0.03917694091796875, -0.5570793151855469, 0.8828926086425781, 0.22492218017578125, 1.0314254760742188, 0.2596435546875, 0.2313079833984375, 0.30226898193359375, 1.1367263793945312, 0.0024089813232421875, -0.35748863220214844, -0.8747940063476562, 0.885467529296875, 0.36138916015625, -0.14714813232421875, 0.05706214904785156, 0.3903312683105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000058.npy"}
|
|
{"epoch": 0.08767951625094482, "step": 59, "batch_size": 64, "mean": 0.2893882989883423, "std": 0.7511101961135864, "min": -1.1438980102539062, "p10": -0.5605182647705077, "median": 0.20956993103027344, "p90": 1.306884765625, "max": 2.4943695068359375, "pos_frac": 0.59375, "sample": [-0.12524032592773438, 0.2013702392578125, 0.37143707275390625, -0.7413215637207031, -0.05281829833984375, -0.3357067108154297, 0.7216453552246094, -1.1438980102539062, 0.788055419921875, 0.7921600341796875, 1.706024169921875, 0.4143486022949219, 0.663299560546875, 1.318328857421875, 0.33112335205078125, -0.1605224609375, 1.607086181640625, -0.6862030029296875, -0.0452728271484375, 0.15258026123046875, 0.3481712341308594, -0.20916366577148438, -0.154876708984375, -0.21387481689453125, -0.00910186767578125, 0.8720169067382812, 0.0117340087890625, 1.3339080810546875, 2.48150634765625, 0.43096160888671875, -0.09131622314453125, 0.15058135986328125, 0.3423633575439453, 0.01390838623046875, -0.480743408203125, 0.098114013671875, -0.35408782958984375, 0.43205833435058594, -0.8276290893554688, -0.1346416473388672, 2.4943695068359375, 0.5135269165039062, 1.280181884765625, 0.4515972137451172, -0.6125030517578125, 0.8155937194824219, -0.3426856994628906, 1.154541015625, -0.10915184020996094, 1.8631134033203125, -0.5947074890136719, 0.3040046691894531, -0.18526077270507812, -0.478759765625, 0.8487472534179688, 0.21776962280273438, -1.1136474609375, -0.2024078369140625, 0.2646217346191406, 0.5428619384765625, 0.3762016296386719, 0.607177734375, 0.7578887939453125, -0.14858627319335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000059.npy"}
|
|
{"epoch": 0.08919123204837491, "step": 60, "batch_size": 64, "mean": 0.08231207728385925, "std": 0.6883484721183777, "min": -1.6351470947265625, "p10": -0.9016542434692382, "median": 0.19085121154785156, "p90": 0.8668533325195313, "max": 1.48541259765625, "pos_frac": 0.578125, "sample": [0.19498062133789062, 0.11856842041015625, 0.6957893371582031, -0.022979736328125, -0.44010162353515625, 0.8841400146484375, -1.4163055419921875, -0.36407470703125, -0.23104476928710938, -1.6351470947265625, 0.3439216613769531, 0.5731048583984375, -0.36429786682128906, -0.04554176330566406, -1.2370071411132812, 0.6367874145507812, -0.37312889099121094, 1.0816783905029297, 1.2300662994384766, 0.248565673828125, 0.1867218017578125, -0.00368499755859375, 0.3661651611328125, -0.6364593505859375, 0.3131904602050781, 0.28260040283203125, 0.5492897033691406, 0.17255210876464844, 0.8716964721679688, -0.9509010314941406, 0.22454071044921875, -0.11703872680664062, 1.1332168579101562, 0.6214981079101562, -0.2601776123046875, 0.6819000244140625, 0.6279621124267578, -1.3009490966796875, -0.020137786865234375, -0.6944713592529297, -0.6967849731445312, 0.6246051788330078, 0.5768623352050781, -1.0417213439941406, 0.08957290649414062, 1.1382598876953125, -0.14373779296875, -0.464508056640625, 0.6524581909179688, -0.1480998992919922, 0.8555526733398438, 0.25714874267578125, 0.47029685974121094, 0.19559478759765625, 1.48541259765625, -0.0689849853515625, 0.34964752197265625, -0.009777069091796875, 0.30558013916015625, 0.3121967315673828, -1.4839324951171875, -0.7867450714111328, 0.8425045013427734, 0.031084060668945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000060.npy"}
|
|
{"epoch": 0.09070294784580499, "step": 61, "batch_size": 64, "mean": 0.12104541063308716, "std": 0.8488274812698364, "min": -2.0301437377929688, "p10": -1.021011734008789, "median": 0.1339893341064453, "p90": 1.137556076049805, "max": 2.283294677734375, "pos_frac": 0.625, "sample": [0.5330963134765625, 0.5174102783203125, 1.7321090698242188, 1.9532470703125, 0.13235855102539062, 0.03680419921875, 0.597747802734375, 1.8340911865234375, 0.6800155639648438, 0.15412521362304688, 0.19915008544921875, -0.4283866882324219, -0.2770652770996094, 0.0315093994140625, 0.5653839111328125, -0.3004894256591797, 0.5990066528320312, -0.33123016357421875, -0.089202880859375, 0.6960582733154297, 0.1356201171875, 0.6370697021484375, 0.4486541748046875, 0.03240966796875, 0.03950309753417969, -0.5034751892089844, -1.0706939697265625, -1.2293548583984375, 0.5656204223632812, -0.3606300354003906, 0.6292991638183594, -0.4183635711669922, -0.9050865173339844, 0.665924072265625, -0.5555877685546875, 0.4118194580078125, 0.03153228759765625, -0.607940673828125, -0.7437667846679688, 1.1610107421875, 2.283294677734375, -1.3415908813476562, 0.33188629150390625, -1.4251289367675781, 0.49292755126953125, 1.2162551879882812, 0.48590087890625, 0.2743206024169922, -0.2543373107910156, -0.2694683074951172, 1.0828285217285156, -0.7775344848632812, 0.09500503540039062, 0.1093292236328125, 0.7226181030273438, -2.0301437377929688, 1.4078216552734375, 0.292236328125, -0.3463592529296875, -0.22154998779296875, 0.8787002563476562, 0.44165802001953125, -1.5792198181152344, -1.3218460083007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000061.npy"}
|
|
{"epoch": 0.09221466364323508, "step": 62, "batch_size": 64, "mean": 0.18723803758621216, "std": 0.8288217782974243, "min": -2.840850830078125, "p10": -0.7077938079833984, "median": 0.1724834442138672, "p90": 1.1335292816162117, "max": 2.2802886962890625, "pos_frac": 0.609375, "sample": [0.7406234741210938, -1.1582412719726562, -0.7675437927246094, 0.3257484436035156, 0.3652496337890625, 0.1165924072265625, 0.9719047546386719, -0.04837799072265625, -1.6135635375976562, -0.08984375, 1.7504119873046875, -0.21076011657714844, 0.5938034057617188, -1.3212432861328125, 1.2027969360351562, 0.687469482421875, 0.6648483276367188, 0.4496307373046875, -0.23034286499023438, 0.9543952941894531, -0.25705718994140625, -0.5171852111816406, -0.32916259765625, 0.048641204833984375, 0.32132530212402344, -0.498687744140625, -0.29392242431640625, 0.3611602783203125, 0.4107017517089844, 0.9022369384765625, -0.6562118530273438, 0.042499542236328125, -0.12633895874023438, 0.44268798828125, 1.5363502502441406, -2.840850830078125, -0.24333953857421875, 1.5119094848632812, -0.23742103576660156, 0.150604248046875, 0.19436264038085938, 0.6860885620117188, 0.7412338256835938, 0.19988632202148438, 0.8618984222412109, 0.12410354614257812, 0.039459228515625, -0.7299003601074219, 0.5380935668945312, 1.5157012939453125, -0.16133880615234375, 0.7786102294921875, 0.5044002532958984, 0.09585380554199219, -0.4725494384765625, -0.2988433837890625, 1.736358642578125, 2.2802886962890625, 0.20526885986328125, -0.01195526123046875, 0.43381500244140625, -0.009052276611328125, -0.89178466796875, 0.5117378234863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000062.npy"}
|
|
{"epoch": 0.09372637944066516, "step": 63, "batch_size": 64, "mean": 0.5547426342964172, "std": 0.7468184232711792, "min": -2.01348876953125, "p10": -0.10814094543457031, "median": 0.48380184173583984, "p90": 1.5482742309570323, "max": 2.42388916015625, "pos_frac": 0.84375, "sample": [0.3996124267578125, 0.24924468994140625, -0.1044158935546875, 0.15662002563476562, 2.42388916015625, -0.1700572967529297, -0.13902664184570312, 1.948760986328125, 1.2162399291992188, 0.15010833740234375, -2.01348876953125, -0.10973739624023438, 0.28028106689453125, -1.6140594482421875, 0.8044548034667969, 0.8078765869140625, 1.6724700927734375, 0.48571014404296875, 0.7259292602539062, 0.992156982421875, -0.505157470703125, 0.4512443542480469, 1.189971923828125, 1.7362060546875, 0.9510498046875, 0.8777256011962891, 0.4906463623046875, 0.1857738494873047, 0.3678569793701172, 0.36721038818359375, 0.4946136474609375, 0.8619766235351562, 1.2459526062011719, 0.7954063415527344, 0.1875457763671875, 0.2398529052734375, 0.3464508056640625, 0.03730010986328125, 0.2646923065185547, 0.5504188537597656, 0.48189353942871094, 2.1519775390625, 0.3033323287963867, 1.25848388671875, 0.5838584899902344, 0.8455486297607422, 0.988494873046875, 1.6984291076660156, 0.9297332763671875, 0.087860107421875, 0.10058021545410156, 0.8605804443359375, 0.35926055908203125, -0.301971435546875, 0.56573486328125, 0.302490234375, 0.8771247863769531, 0.33048057556152344, -0.014699935913085938, 0.07799530029296875, 0.7000656127929688, 2.0682220458984375, 1.03692626953125, -0.0881805419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000063.npy"}
|
|
{"epoch": 0.09523809523809523, "step": 64, "batch_size": 64, "mean": 0.11583459377288818, "std": 0.7384891510009766, "min": -2.6441650390625, "p10": -0.5935295104980468, "median": 0.10938358306884766, "p90": 1.011444091796875, "max": 2.439178466796875, "pos_frac": 0.59375, "sample": [1.2797775268554688, 2.439178466796875, -0.7707672119140625, -0.5744476318359375, -0.04615020751953125, 0.344268798828125, 0.7191848754882812, -0.07264137268066406, 0.33599090576171875, 1.2030410766601562, 0.2674541473388672, 0.30034637451171875, 0.71307373046875, 0.00365447998046875, 1.0295257568359375, -0.1609344482421875, 0.134521484375, 0.04496002197265625, 0.3036842346191406, 0.5485763549804688, 1.0214080810546875, 0.21316146850585938, -0.13483428955078125, 0.07128334045410156, -0.33415985107421875, -0.22870254516601562, 0.015949249267578125, -2.6441650390625, -0.6711082458496094, 0.25820159912109375, 0.109222412109375, -0.20404052734375, 0.45388031005859375, -0.6017074584960938, 0.08226585388183594, 0.5323944091796875, 0.5455856323242188, -2.146484375, -0.7343807220458984, 0.6169662475585938, 0.9972381591796875, 0.3315887451171875, 0.6354598999023438, 0.10954475402832031, 0.5290603637695312, -0.454498291015625, -0.05161094665527344, -0.339752197265625, -0.3725242614746094, 1.0175323486328125, -0.20547866821289062, -0.2148456573486328, 1.2276611328125, 0.2725067138671875, 0.18512344360351562, 0.2528266906738281, 0.666778564453125, -0.3030815124511719, -0.41375732421875, -0.2522430419921875, -0.19222259521484375, -0.8886795043945312, -0.19462966918945312, 0.8083839416503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000064.npy"}
|
|
{"epoch": 0.09674981103552532, "step": 65, "batch_size": 64, "mean": 0.49063026905059814, "std": 1.0196012258529663, "min": -2.3158340454101562, "p10": -0.7122734069824218, "median": 0.4277639389038086, "p90": 1.5058305740356446, "max": 3.976959228515625, "pos_frac": 0.75, "sample": [0.26989269256591797, 0.516571044921875, 0.9807929992675781, 0.7498893737792969, 0.0645294189453125, 1.0581550598144531, -0.1756591796875, 0.282562255859375, -0.117584228515625, 1.4835357666015625, -0.7528839111328125, 0.6412010192871094, 0.8325080871582031, 1.281158447265625, 0.4443359375, 0.4904899597167969, 0.7230014801025391, 0.4391803741455078, -0.9727935791015625, 2.465484619140625, -0.0572662353515625, 0.29380035400390625, -1.16778564453125, 1.7042999267578125, 1.2750892639160156, 0.4163475036621094, -2.3158340454101562, 0.1909027099609375, -0.19495391845703125, 0.5032958984375, 0.1663227081298828, 0.130126953125, -0.6175155639648438, -1.5181236267089844, 0.7269363403320312, 2.595062255859375, 0.23431396484375, 0.9750823974609375, 1.4964218139648438, 1.060598373413086, 1.5098628997802734, 0.08017730712890625, 1.5412216186523438, 1.0024948120117188, -0.2717437744140625, -0.2890167236328125, 0.2951068878173828, 0.050251007080078125, -1.053253173828125, -0.4167289733886719, 0.10570526123046875, 0.7920207977294922, 0.00090789794921875, 1.0534553527832031, 0.5509414672851562, 1.0917625427246094, 2.838226318359375, -0.03903388977050781, 1.062652587890625, 1.4762802124023438, 0.2605743408203125, -1.08001708984375, 3.976959228515625, 0.2600421905517578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000065.npy"}
|
|
{"epoch": 0.0982615268329554, "step": 66, "batch_size": 64, "mean": 0.36180832982063293, "std": 0.9561251997947693, "min": -2.274810791015625, "p10": -0.6126235961914062, "median": 0.32207775115966797, "p90": 1.634418869018555, "max": 3.2096405029296875, "pos_frac": 0.671875, "sample": [0.2710838317871094, 3.035858154296875, 0.7199783325195312, 0.4852409362792969, 0.17609405517578125, 0.653533935546875, 0.42649269104003906, 0.0828399658203125, -0.21245193481445312, 0.3384513854980469, -0.08245849609375, 0.9410209655761719, 2.2404327392578125, 0.3156890869140625, -0.68194580078125, -0.5111312866210938, 0.07149505615234375, 0.31790924072265625, 2.4137115478515625, 1.6755294799804688, 0.2975921630859375, 1.5384941101074219, -1.306304931640625, -0.16703414916992188, 0.41115570068359375, 1.0704097747802734, 0.32090187072753906, 1.2677688598632812, 0.7417984008789062, 0.7157440185546875, 0.6350479125976562, 0.3232536315917969, 0.3395347595214844, -0.163299560546875, 0.1615161895751953, -0.17987442016601562, 0.4194183349609375, 0.5948410034179688, -0.5248031616210938, 0.25494384765625, -2.274810791015625, -1.8537750244140625, -0.17738723754882812, -0.1902923583984375, -0.22360992431640625, -1.025848388671875, 1.1259689331054688, -0.05587005615234375, 0.5586395263671875, 0.6422882080078125, -0.21286582946777344, 0.02175140380859375, 0.45845794677734375, 3.2096405029296875, 1.7005081176757812, -0.7974433898925781, 0.5595855712890625, -0.6502609252929688, -0.30377197265625, 1.6960296630859375, 0.7597885131835938, -0.20642471313476562, 0.32975006103515625, 0.63720703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000066.npy"}
|
|
{"epoch": 0.09977324263038549, "step": 67, "batch_size": 64, "mean": 0.26213228702545166, "std": 0.7651592493057251, "min": -1.55731201171875, "p10": -0.6241046905517578, "median": 0.16071510314941406, "p90": 1.0877916336059574, "max": 2.3321533203125, "pos_frac": 0.671875, "sample": [-0.034912109375, -0.7585601806640625, -0.24718856811523438, -0.4404277801513672, -1.2574691772460938, 0.3673839569091797, 1.5610923767089844, 0.9617061614990234, 0.035614013671875, -0.0097808837890625, 0.9776763916015625, 0.11370086669921875, -0.6478042602539062, -0.7679443359375, 1.1265487670898438, 0.9973583221435547, -0.2580127716064453, 0.7598533630371094, -0.40778350830078125, 0.7794876098632812, -0.4036102294921875, 0.8571701049804688, -1.55731201171875, 2.0767059326171875, 0.0666656494140625, 0.106842041015625, 0.32222747802734375, 0.32050323486328125, 1.3657760620117188, -0.5688056945800781, 0.16504287719726562, 0.49777793884277344, 0.04058074951171875, 0.9803695678710938, 0.6353683471679688, 0.12979698181152344, 0.6329307556152344, 0.5235385894775391, -0.5206375122070312, 0.8093833923339844, 2.3321533203125, -0.14560699462890625, 1.2366790771484375, 0.9783859252929688, 0.13690185546875, -0.021419525146484375, -0.0513458251953125, 0.3591156005859375, 0.13349151611328125, -0.9486503601074219, 0.3159217834472656, 0.865814208984375, 0.48810386657714844, 0.10079002380371094, 0.12978363037109375, -0.43387603759765625, 0.7715682983398438, 0.5838775634765625, -0.48461151123046875, -1.5523300170898438, 1.6001548767089844, 0.21622467041015625, 0.6781005859375, 0.1563873291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000067.npy"}
|
|
{"epoch": 0.10128495842781557, "step": 68, "batch_size": 64, "mean": 0.32676631212234497, "std": 1.0771362781524658, "min": -3.384002685546875, "p10": -0.6255317687988281, "median": 0.3075904846191406, "p90": 1.0770217895507814, "max": 4.79022216796875, "pos_frac": 0.640625, "sample": [4.79022216796875, 0.97332763671875, 0.2940502166748047, 0.78662109375, -0.6205596923828125, 1.9392318725585938, 0.9384918212890625, -0.05182647705078125, 0.47629547119140625, -0.001739501953125, 1.0555648803710938, -3.384002685546875, 1.0841445922851562, 0.8682861328125, -0.15288543701171875, 0.9350547790527344, -0.31722259521484375, -0.0992431640625, 0.04654693603515625, 1.3157882690429688, 0.7225875854492188, 0.20630836486816406, 0.301910400390625, 1.0604019165039062, 0.6734123229980469, 0.2636852264404297, 0.6616973876953125, -0.27276039123535156, 1.6198348999023438, -0.2294921875, -0.07887840270996094, 0.7071132659912109, 0.48239707946777344, 0.0330810546875, -0.6276626586914062, 0.31327056884765625, 2.931304931640625, 0.260101318359375, -0.21015167236328125, 0.2612762451171875, -0.9466400146484375, -0.5706901550292969, 0.6915283203125, 0.572540283203125, -1.3595504760742188, -2.2198944091796875, 0.5530319213867188, -0.01853179931640625, -0.555511474609375, -0.31336212158203125, 0.983367919921875, 0.7337188720703125, -0.7547607421875, 1.1585845947265625, 0.90362548828125, 0.20983123779296875, 0.453155517578125, 0.33002471923828125, 0.9125518798828125, 0.9964351654052734, 0.9038963317871094, -0.185028076171875, -0.23274612426757812, -1.288116455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000068.npy"}
|
|
{"epoch": 0.10279667422524566, "step": 69, "batch_size": 64, "mean": 0.6069809198379517, "std": 1.0346497297286987, "min": -1.5861129760742188, "p10": -0.43097534179687497, "median": 0.5538845062255859, "p90": 1.7893608093261721, "max": 4.370880126953125, "pos_frac": 0.703125, "sample": [0.037189483642578125, -0.3556175231933594, 2.118885040283203, 0.28862762451171875, 1.0915679931640625, -0.3714256286621094, -0.12621116638183594, -0.11926841735839844, 0.9974517822265625, 1.2863006591796875, 0.3606853485107422, 0.3491401672363281, 0.5436439514160156, 1.3055915832519531, 0.7430801391601562, 1.7510528564453125, 0.37596893310546875, 0.8480300903320312, 2.196044921875, -1.3092193603515625, 2.1886444091796875, -1.5861129760742188, 1.6748809814453125, 0.2050933837890625, 1.391693115234375, -0.157745361328125, 0.2016754150390625, 0.9520492553710938, 0.3062591552734375, -0.2301025390625, 0.7989921569824219, -0.17352294921875, 0.5898914337158203, 0.801910400390625, 0.5666618347167969, -0.4681854248046875, 1.6325111389160156, 0.6266803741455078, -0.23699951171875, 0.5641250610351562, 1.1880950927734375, 0.4218940734863281, 0.8862457275390625, 0.424560546875, 1.366119384765625, -0.16594696044921875, 1.6863441467285156, 0.760894775390625, -0.4219017028808594, -1.12713623046875, 0.18532180786132812, 0.8754348754882812, -1.3216476440429688, -0.4348640441894531, 1.0778694152832031, -0.18453216552734375, 2.10540771484375, 0.9905738830566406, 1.8057785034179688, -0.5446262359619141, 4.370880126953125, -0.014711380004882812, 3.1516571044921875, 0.10515213012695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000069.npy"}
|
|
{"epoch": 0.10430839002267574, "step": 70, "batch_size": 64, "mean": 0.32000866532325745, "std": 1.0011906623840332, "min": -2.7131500244140625, "p10": -0.9322603225708007, "median": 0.5009078979492188, "p90": 1.2894958496093751, "max": 2.64495849609375, "pos_frac": 0.6875, "sample": [0.3368988037109375, -0.9004878997802734, 1.21221923828125, -1.23394775390625, 0.5065841674804688, -0.7033443450927734, -0.4426116943359375, 0.9137420654296875, 1.0967559814453125, -0.47097206115722656, 0.5929183959960938, 0.803070068359375, -2.7131500244140625, 1.1070404052734375, 0.0376129150390625, -0.8690528869628906, 1.2919998168945312, -1.7512588500976562, -0.797027587890625, 0.07385635375976562, 2.5313186645507812, 0.5343914031982422, 1.1062126159667969, 0.6790561676025391, -1.098541259765625, 1.1881866455078125, 0.4008522033691406, 2.02703857421875, -0.9458770751953125, 2.64495849609375, 0.11920928955078125, 0.5003166198730469, -0.6323776245117188, 0.01662445068359375, 0.16558074951171875, 1.2836532592773438, 1.7107620239257812, 0.8172607421875, 0.6356735229492188, 1.0691795349121094, 0.5016498565673828, -0.9477176666259766, -0.2561187744140625, 0.7425460815429688, 0.9828376770019531, 0.7379341125488281, -0.8869056701660156, 1.764007568359375, 0.3880901336669922, 1.2426490783691406, 0.5332107543945312, 1.06146240234375, -0.13065338134765625, -0.320892333984375, 0.5014991760253906, 0.35977935791015625, 0.0867919921875, -1.5299129486083984, 1.34637451171875, 0.6781406402587891, -0.2285919189453125, 0.3570518493652344, -0.5196399688720703, 1.172637939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000070.npy"}
|
|
{"epoch": 0.10582010582010581, "step": 71, "batch_size": 64, "mean": 0.16859376430511475, "std": 1.1077276468276978, "min": -3.05523681640625, "p10": -1.1994014739990233, "median": 0.16757583618164062, "p90": 1.370297431945801, "max": 2.422698974609375, "pos_frac": 0.5625, "sample": [-0.34801673889160156, 1.0885696411132812, 0.8110618591308594, 0.6080360412597656, -0.08453750610351562, -0.2598457336425781, 0.9267807006835938, -1.2606582641601562, -0.1161651611328125, 2.422698974609375, 0.3685741424560547, -1.5320205688476562, 0.3896026611328125, 0.37735748291015625, -1.4436721801757812, -0.06260871887207031, 0.30096435546875, 1.7521743774414062, 0.3709259033203125, 1.256683349609375, -3.02044677734375, 2.40325927734375, -0.0816650390625, -0.6544837951660156, 1.2038288116455078, 1.8583526611328125, 0.157562255859375, -0.024793624877929688, 0.7222499847412109, -0.5789337158203125, -3.05523681640625, -0.550872802734375, -1.6280593872070312, -0.997467041015625, -0.07673454284667969, 0.1699371337890625, 0.5107383728027344, 0.18011474609375, 1.3371524810791016, -0.6523513793945312, -0.20075225830078125, -1.9859161376953125, 0.9583587646484375, 2.101409912109375, -0.23656654357910156, -0.156280517578125, -0.9625396728515625, 0.8882064819335938, -0.113525390625, -0.13948631286621094, 1.1123771667480469, 1.3845024108886719, -0.50091552734375, 1.135406494140625, 0.6409759521484375, 0.9370803833007812, 0.5351829528808594, 0.0551605224609375, 0.16521453857421875, 0.5422420501708984, 1.0833091735839844, 0.11768150329589844, 1.6972885131835938, -1.0564689636230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000071.npy"}
|
|
{"epoch": 0.1073318216175359, "step": 72, "batch_size": 64, "mean": 0.6240211129188538, "std": 1.0387990474700928, "min": -2.1306915283203125, "p10": -0.6156593322753906, "median": 0.6079387664794922, "p90": 1.7903860092163089, "max": 3.5405044555664062, "pos_frac": 0.71875, "sample": [1.2634773254394531, 1.1055831909179688, 0.585205078125, 1.1702957153320312, -2.1306915283203125, 0.5422515869140625, 0.3694610595703125, 3.5405044555664062, -1.6075897216796875, -0.11726951599121094, 1.532888412475586, 1.5931396484375, 0.15470123291015625, 1.7009086608886719, 0.3929290771484375, -0.2930316925048828, -0.37807273864746094, 0.7664031982421875, 1.3457260131835938, 0.1682586669921875, 2.60986328125, 0.5077362060546875, 1.8287334442138672, 0.3785228729248047, 0.41381072998046875, 0.25832366943359375, 0.2768707275390625, -0.7316036224365234, -0.930572509765625, -0.09344482421875, 2.423583984375, 0.8628196716308594, 0.6527328491210938, 2.36090087890625, -0.6331253051757812, 0.3490009307861328, 0.7302036285400391, 1.1750335693359375, 0.6306724548339844, -0.14271163940429688, 1.29559326171875, 0.8523063659667969, 1.6136398315429688, -0.08841705322265625, 0.984619140625, -0.110382080078125, 0.9635162353515625, 1.1845703125, -0.9011611938476562, 0.6804046630859375, -0.4526214599609375, 1.50390625, 2.6280441284179688, -0.4370708465576172, 2.0544967651367188, -0.14586830139160156, 1.4968605041503906, 0.4813079833984375, -1.1167144775390625, 0.22485923767089844, -0.5749053955078125, 1.22686767578125, 1.1709060668945312, 0.7701644897460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000072.npy"}
|
|
{"epoch": 0.10884353741496598, "step": 73, "batch_size": 64, "mean": 0.1251724660396576, "std": 1.1242974996566772, "min": -2.627532958984375, "p10": -1.0537982940673827, "median": -0.03379058837890625, "p90": 1.4850437164306642, "max": 4.3165283203125, "pos_frac": 0.484375, "sample": [0.8176383972167969, 0.23820877075195312, -0.1937713623046875, -0.6984519958496094, -1.16583251953125, -0.5384521484375, -0.7061386108398438, -0.8764839172363281, -2.627532958984375, 1.5112037658691406, -0.47429656982421875, 0.183685302734375, -0.20085906982421875, -0.8836593627929688, -0.9134063720703125, 0.14004898071289062, -1.0006675720214844, 0.12130355834960938, -0.1337738037109375, 0.0749664306640625, -0.9159088134765625, -0.2973518371582031, -0.6735877990722656, -0.35604095458984375, 4.3165283203125, 1.8580780029296875, -0.4070243835449219, -0.410858154296875, -0.1582965850830078, 0.4686927795410156, 0.44292259216308594, -0.14800071716308594, 0.6836166381835938, 0.2177276611328125, -1.076568603515625, 0.17862701416015625, 1.3409442901611328, 1.986724853515625, -1.5318374633789062, -1.1976242065429688, 0.1781158447265625, 1.5686798095703125, 0.201995849609375, -1.2939987182617188, -0.36350250244140625, 1.4240036010742188, 0.48459625244140625, -0.4042186737060547, 0.5600051879882812, -0.1824188232421875, -1.133758544921875, 0.6328964233398438, 0.9692268371582031, 0.5335769653320312, 1.1922950744628906, 0.7201995849609375, 0.7819709777832031, -0.034759521484375, -0.0328216552734375, -0.495452880859375, 0.21866416931152344, 2.4187278747558594, 3.340667724609375, -0.2681446075439453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000073.npy"}
|
|
{"epoch": 0.11035525321239607, "step": 74, "batch_size": 64, "mean": 0.5881245136260986, "std": 1.019234299659729, "min": -1.8679656982421875, "p10": -0.6656307220458983, "median": 0.6991291046142578, "p90": 1.8876121520996094, "max": 3.6298065185546875, "pos_frac": 0.71875, "sample": [-0.33132171630859375, 1.0045089721679688, -0.3251800537109375, 0.2205963134765625, 1.0916099548339844, -0.08831787109375, 1.6446151733398438, 3.6298065185546875, 1.2638702392578125, 2.344074249267578, 1.519643783569336, 0.8639106750488281, 0.714508056640625, 1.1082992553710938, -0.5116386413574219, 1.0964736938476562, 0.87750244140625, 0.40020751953125, 0.11664772033691406, -1.5669708251953125, 0.83013916015625, 1.1288909912109375, 0.6897964477539062, 0.5433502197265625, 0.6665153503417969, 0.05535316467285156, -0.5305194854736328, 0.7614173889160156, 1.9030914306640625, -0.926300048828125, 0.5764579772949219, 0.9417819976806641, 1.5243968963623047, -0.8829803466796875, -0.6810035705566406, 0.755706787109375, -0.6239604949951172, 0.8725128173828125, -0.7470550537109375, -0.3946723937988281, 0.7306537628173828, -0.7331218719482422, 0.7996673583984375, 0.7084617614746094, 0.4532432556152344, 2.4437103271484375, 2.2500152587890625, -0.48926544189453125, 1.8905715942382812, 1.3212432861328125, -1.8679656982421875, -0.2630939483642578, 0.7867927551269531, 0.27381134033203125, -0.6297607421875, 0.19319915771484375, -0.02346038818359375, 1.880706787109375, 2.1979751586914062, 1.6554641723632812, 1.5837669372558594, 0.5706748962402344, 0.36554527282714844, 0.00537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000074.npy"}
|
|
{"epoch": 0.11186696900982615, "step": 75, "batch_size": 64, "mean": 0.6123708486557007, "std": 1.1242945194244385, "min": -2.489612579345703, "p10": -0.7263595581054687, "median": 0.4658365249633789, "p90": 2.120983123779298, "max": 4.020721435546875, "pos_frac": 0.765625, "sample": [-0.5669517517089844, 0.027008056640625, 0.3071269989013672, 0.6031570434570312, 2.2716217041015625, 0.0394744873046875, 0.33681488037109375, 0.9807891845703125, 1.6277999877929688, 2.3854598999023438, 0.17246246337890625, 0.17966461181640625, -0.762115478515625, 0.510528564453125, -0.17222976684570312, 1.2905101776123047, 0.69171142578125, -0.8024845123291016, 0.5331430435180664, 0.1390972137451172, 2.9151611328125, 1.0512123107910156, 1.6755905151367188, 1.7694931030273438, 0.7505416870117188, 0.3182868957519531, 1.6994438171386719, -0.804595947265625, 1.2487258911132812, 0.6818809509277344, 0.49715232849121094, 2.477550506591797, -0.0624542236328125, 1.6235637664794922, 0.5918045043945312, 0.09902381896972656, 4.020721435546875, 0.292083740234375, 1.514129638671875, 0.9131984710693359, -0.6289291381835938, 0.9327602386474609, -2.489612579345703, 0.4345207214355469, 0.9885330200195312, -0.13635635375976562, -0.042881011962890625, 0.09148025512695312, 0.17188644409179688, 0.12252616882324219, 0.6875476837158203, 1.7453231811523438, -0.9927768707275391, -0.6429290771484375, 0.15771865844726562, -1.0617218017578125, 2.986236572265625, -0.48583412170410156, -0.7962570190429688, 0.3796195983886719, 0.7812995910644531, 0.19667625427246094, 3.0223846435546875, 0.7054176330566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000075.npy"}
|
|
{"epoch": 0.11337868480725624, "step": 76, "batch_size": 64, "mean": 0.24112600088119507, "std": 1.1088132858276367, "min": -2.4835739135742188, "p10": -0.920493698120117, "median": 0.0836944580078125, "p90": 1.7560230255126956, "max": 3.6191024780273438, "pos_frac": 0.578125, "sample": [-2.3574371337890625, 1.8000907897949219, -2.2610702514648438, 0.2177734375, 0.58697509765625, -0.7940673828125, -0.21257972717285156, 0.8187980651855469, 0.25921058654785156, 2.1628036499023438, 1.9770679473876953, 3.6191024780273438, -0.28264427185058594, -2.4835739135742188, 0.636077880859375, -0.9607162475585938, 0.10353851318359375, 0.9534835815429688, 0.2154998779296875, 0.23266220092773438, 0.50360107421875, 1.8075027465820312, -1.3821945190429688, 1.825225830078125, 0.0128936767578125, 0.847869873046875, 0.08026123046875, 1.6514358520507812, -0.5837955474853516, -0.2665996551513672, -0.0419769287109375, -0.5430164337158203, -1.3051910400390625, 0.087127685546875, -0.3950309753417969, 0.10201263427734375, 1.2545089721679688, -0.2504119873046875, -0.9682388305664062, 0.5231399536132812, 0.04737663269042969, 0.03912353515625, -0.14180374145507812, -0.3937721252441406, 0.5762939453125, 0.8969974517822266, 0.054798126220703125, -0.8266410827636719, -0.007617950439453125, 0.4147796630859375, -0.2784576416015625, 0.6251449584960938, -0.21903228759765625, 1.268707275390625, 2.5221099853515625, 1.0976734161376953, -0.3500404357910156, -0.34419822692871094, -0.7651443481445312, 1.6531982421875, 1.448028564453125, -0.035221099853515625, 1.1034698486328125, -0.1438274383544922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000076.npy"}
|
|
{"epoch": 0.11489040060468632, "step": 77, "batch_size": 64, "mean": 0.5327691435813904, "std": 1.0243560075759888, "min": -2.166595458984375, "p10": -0.821771240234375, "median": 0.3774986267089844, "p90": 1.9200965881347656, "max": 2.95037841796875, "pos_frac": 0.734375, "sample": [-0.01563262939453125, 0.8354873657226562, -0.7167778015136719, -1.2064056396484375, 1.2609424591064453, 1.5895156860351562, -0.7716445922851562, 0.27463340759277344, -2.166595458984375, 0.5612258911132812, 1.1445541381835938, 1.0758781433105469, 0.15517425537109375, -0.8432540893554688, 0.6054229736328125, 2.2702484130859375, 0.3668212890625, 1.0887451171875, 0.7823257446289062, 2.3676605224609375, -0.015356063842773438, 0.1491565704345703, 1.9222640991210938, 1.1850013732910156, 0.15686416625976562, 0.007907867431640625, -0.14553260803222656, 0.849578857421875, 0.2413330078125, 0.24288177490234375, -0.43544769287109375, -0.9260978698730469, -0.16866683959960938, 0.3554649353027344, 0.9270095825195312, 1.9150390625, 1.2833099365234375, 0.8512859344482422, 0.8841781616210938, 2.13165283203125, 1.1011199951171875, 0.2868080139160156, 0.8822784423828125, 0.5146541595458984, 0.23713302612304688, 2.95037841796875, 0.01834869384765625, -0.003265380859375, 2.454559326171875, 0.9207916259765625, 2.1453094482421875, -0.29709625244140625, 0.25902366638183594, 1.8671398162841797, 1.7627944946289062, 0.2996101379394531, -1.6324920654296875, -1.2970428466796875, 0.20699310302734375, -0.21280288696289062, 0.38817596435546875, 1.394073486328125, 0.6638813018798828, -0.88330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000077.npy"}
|
|
{"epoch": 0.1164021164021164, "step": 78, "batch_size": 64, "mean": 0.3835884630680084, "std": 1.0587424039840698, "min": -2.3094253540039062, "p10": -1.0999717712402342, "median": 0.308502197265625, "p90": 1.7789436340332032, "max": 2.4023265838623047, "pos_frac": 0.671875, "sample": [1.9388160705566406, -1.365234375, 0.7626495361328125, 1.8522872924804688, 0.15914154052734375, 2.4023265838623047, 0.9365158081054688, -0.252197265625, 0.5195884704589844, -0.7210159301757812, 1.7425689697265625, -1.2958526611328125, -0.07606697082519531, 1.434112548828125, -0.0333404541015625, -0.7627410888671875, -1.2247238159179688, 2.2514495849609375, -0.8088836669921875, 0.24503326416015625, -0.5626068115234375, -0.6457595825195312, 0.0810089111328125, -0.3037986755371094, 0.578704833984375, 0.28925323486328125, 0.3004302978515625, -2.3094253540039062, 1.4874801635742188, -1.3682708740234375, 1.3471260070800781, 1.6415481567382812, 0.39394378662109375, 2.1324691772460938, 0.432586669921875, -0.580780029296875, -1.6663742065429688, 0.163116455078125, 1.242868423461914, 1.6097049713134766, 1.3207664489746094, 0.47423553466796875, 1.011749267578125, 0.072174072265625, -0.6922225952148438, 1.3006973266601562, -1.3298263549804688, 0.09354400634765625, 0.3165740966796875, 1.0822067260742188, -0.4687061309814453, 1.7945327758789062, 0.0599212646484375, -0.35607147216796875, 0.1581573486328125, 0.2628021240234375, 0.3481426239013672, -0.1663970947265625, 1.2901039123535156, 1.0742950439453125, 1.0972442626953125, 0.5958786010742188, 1.9747886657714844, 1.2674121856689453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000078.npy"}
|
|
{"epoch": 0.11791383219954649, "step": 79, "batch_size": 64, "mean": 0.8774796724319458, "std": 1.3495033979415894, "min": -2.2635955810546875, "p10": -0.5821037292480469, "median": 0.7214241027832031, "p90": 2.464149093627931, "max": 5.856170654296875, "pos_frac": 0.765625, "sample": [0.1209716796875, 0.5469818115234375, -1.1363067626953125, 1.2056465148925781, -0.310546875, 0.8429756164550781, -2.2635955810546875, -0.3045063018798828, 0.9204559326171875, 0.16819000244140625, -0.5365619659423828, 0.71002197265625, 1.44464111328125, 5.856170654296875, -0.8137741088867188, 0.2702789306640625, 1.6817512512207031, 0.6777706146240234, -0.6233158111572266, 0.019443511962890625, 0.65106201171875, 3.8593597412109375, 0.97088623046875, 3.17681884765625, 0.7001800537109375, -0.7646579742431641, 2.969623565673828, 1.600921630859375, 1.6270637512207031, 1.3014373779296875, -0.6594696044921875, -0.6016216278076172, 1.3974609375, 1.3843765258789062, 1.4363784790039062, 0.5775051116943359, 1.4879341125488281, 1.269775390625, -0.3297004699707031, 1.418670654296875, -0.11280441284179688, 2.0569915771484375, 0.5271453857421875, -0.17414474487304688, 0.2431354522705078, 0.9160327911376953, 1.3475570678710938, 0.35862159729003906, 0.8011436462402344, 0.7632942199707031, 0.7621612548828125, -0.41208648681640625, 3.393035888671875, 0.2130889892578125, 0.4806976318359375, 4.692291259765625, 0.41831207275390625, 1.1044540405273438, -0.01418304443359375, 1.9499702453613281, 0.7328262329101562, 1.5011024475097656, 2.6386451721191406, 0.02071380615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000079.npy"}
|
|
{"epoch": 0.11942554799697656, "step": 80, "batch_size": 64, "mean": 0.46766987442970276, "std": 1.3027317523956299, "min": -2.5001068115234375, "p10": -1.3178829193115233, "median": 0.2536640167236328, "p90": 2.4421905517578124, "max": 3.4301681518554688, "pos_frac": 0.640625, "sample": [-0.2436046600341797, 2.7348861694335938, 0.6912841796875, 1.6282806396484375, 1.1080474853515625, -1.8595733642578125, -0.24625396728515625, -0.13643264770507812, -1.3159255981445312, -1.8230133056640625, 1.7705326080322266, 1.7017669677734375, -0.1609344482421875, -0.9679031372070312, -0.0675811767578125, 3.4301681518554688, -0.43412017822265625, 0.36466217041015625, 0.9763965606689453, 0.08072853088378906, 0.2639350891113281, -0.3416748046875, -0.24350738525390625, 0.9020767211914062, 2.9390411376953125, 2.613016128540039, 1.7400665283203125, -0.000518798828125, 2.2685699462890625, 2.7863616943359375, -0.6599960327148438, 1.5225067138671875, 0.184967041015625, 0.508026123046875, 0.80902099609375, 1.7794876098632812, -0.7569522857666016, 0.2433929443359375, 2.440460205078125, -2.5001068115234375, 1.07403564453125, -0.7778244018554688, -1.757232666015625, 1.6554183959960938, 0.103485107421875, 0.6492538452148438, 0.7884063720703125, 0.10537528991699219, 0.27036285400390625, -1.3320674896240234, 2.44293212890625, 0.12516403198242188, 0.546875, -0.038578033447265625, 0.030300140380859375, 0.05425071716308594, 0.4990081787109375, -0.5693607330322266, 1.2560405731201172, -1.3187217712402344, 0.19824981689453125, -1.3375701904296875, 2.528961181640625, 1.0045242309570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000080.npy"}
|
|
{"epoch": 0.12093726379440665, "step": 81, "batch_size": 64, "mean": 0.7495482563972473, "std": 1.1332889795303345, "min": -1.1980056762695312, "p10": -0.3146381378173828, "median": 0.4983644485473633, "p90": 1.9502904891967776, "max": 5.120025634765625, "pos_frac": 0.71875, "sample": [0.9310569763183594, 1.0547161102294922, 1.6177864074707031, -0.6592941284179688, 1.8517227172851562, -1.1980056762695312, 0.5013885498046875, 0.13393402099609375, -0.3065338134765625, -0.14140701293945312, 2.4976348876953125, 0.7280044555664062, -0.02919769287109375, 1.1239471435546875, 0.3821601867675781, 0.706512451171875, 0.36969757080078125, 0.033931732177734375, 1.8058624267578125, 1.8527870178222656, 0.7513389587402344, -0.17980575561523438, 1.181060791015625, 0.2398681640625, 1.9111480712890625, 1.0079879760742188, 0.12422943115234375, 0.29132843017578125, -0.07057571411132812, 0.13262939453125, 0.7721786499023438, -0.22425079345703125, 1.2756500244140625, 0.0858306884765625, -0.23394012451171875, 2.332763671875, 0.0448455810546875, 5.120025634765625, 1.336822509765625, 3.3223419189453125, -0.06549835205078125, 1.094808578491211, 1.0230484008789062, -0.3181114196777344, 1.6614913940429688, -0.14628219604492188, 1.9670658111572266, 0.5853652954101562, 1.5617523193359375, 1.5275421142578125, 3.470306396484375, 0.4238433837890625, 1.7635478973388672, 0.088043212890625, -0.3644256591796875, -0.1282196044921875, -0.9620838165283203, 0.49534034729003906, 0.38217926025390625, 2.095287322998047, -0.13806533813476562, 1.2872962951660156, -1.1004562377929688, -0.7108688354492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000081.npy"}
|
|
{"epoch": 0.12244897959183673, "step": 82, "batch_size": 64, "mean": 0.8737020492553711, "std": 1.0189237594604492, "min": -1.3101043701171875, "p10": -0.268099021911621, "median": 0.8170242309570312, "p90": 2.2654045104980476, "max": 4.025093078613281, "pos_frac": 0.828125, "sample": [0.6137142181396484, 1.3815746307373047, 0.28356170654296875, -0.016725540161132812, 1.9663238525390625, 0.11180686950683594, 0.5800457000732422, 1.78369140625, 2.5983734130859375, 1.2327423095703125, 1.1596527099609375, 0.4500408172607422, 0.479766845703125, 2.5755386352539062, 0.6002540588378906, 0.8786888122558594, 0.6992340087890625, 0.22046279907226562, 1.1380805969238281, 0.81683349609375, 0.161468505859375, 0.6200027465820312, 1.0110721588134766, 0.12457656860351562, -0.3499603271484375, 2.3194503784179688, 1.0438690185546875, 0.91864013671875, 1.4175357818603516, 1.3418292999267578, 0.5301856994628906, -0.12836647033691406, 4.025093078613281, 0.48162078857421875, -0.149871826171875, 2.607574462890625, 1.899627685546875, 0.9066390991210938, 1.3046875, 3.3381080627441406, 0.55853271484375, -0.7204818725585938, 1.960784912109375, -1.0061607360839844, 1.494852066040039, 0.24477005004882812, 1.3305072784423828, 0.1099853515625, 2.5149459838867188, 0.8172149658203125, -0.5179290771484375, 0.06659317016601562, 1.3660125732421875, 2.1392974853515625, 0.045867919921875, -0.16507530212402344, 1.0783615112304688, 0.8270797729492188, 1.7102813720703125, 0.6079158782958984, -0.7372512817382812, -1.3101043701171875, -0.3122520446777344, 0.8357391357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000082.npy"}
|
|
{"epoch": 0.12396069538926682, "step": 83, "batch_size": 64, "mean": 0.9089135527610779, "std": 1.3108036518096924, "min": -3.2047176361083984, "p10": -0.48127136230468737, "median": 0.9485492706298828, "p90": 2.1106605529785165, "max": 5.46954345703125, "pos_frac": 0.796875, "sample": [2.1806182861328125, 1.32928466796875, -0.0220184326171875, 0.5019721984863281, 0.9423637390136719, -0.22052001953125, 0.19476318359375, 0.5938377380371094, 1.6418838500976562, 3.26593017578125, -0.5403823852539062, 0.6514453887939453, -1.3026847839355469, 0.7736473083496094, 1.4078083038330078, 0.8060073852539062, 0.386932373046875, 1.7124786376953125, 1.591461181640625, 1.029754638671875, 1.83404541015625, -0.015869140625, 2.5804595947265625, 0.01752471923828125, -0.6475067138671875, 1.0102386474609375, 2.4006195068359375, 0.9547348022460938, 0.25866127014160156, -0.03297996520996094, 0.3508148193359375, 0.32741737365722656, 2.6183395385742188, -3.2047176361083984, -0.7255439758300781, 1.4490604400634766, 1.3462677001953125, -0.9484481811523438, 1.1545124053955078, -0.9116668701171875, 0.08492469787597656, 1.1507415771484375, 1.2009239196777344, 1.2884082794189453, -0.25016021728515625, 0.38369178771972656, 0.2534923553466797, 1.0422592163085938, 1.2854995727539062, 1.490478515625, 5.201995849609375, 1.4356327056884766, 1.6059494018554688, 0.45008087158203125, 0.4629974365234375, 1.5602836608886719, 1.1243515014648438, 1.8558464050292969, 1.9474258422851562, 5.46954345703125, 1.576324462890625, 0.7843475341796875, 0.3682270050048828, -0.34334564208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000083.npy"}
|
|
{"epoch": 0.1254724111866969, "step": 84, "batch_size": 64, "mean": 0.6354624032974243, "std": 1.1710755825042725, "min": -2.82275390625, "p10": -0.762204360961914, "median": 0.5398960113525391, "p90": 2.159643936157227, "max": 3.0323486328125, "pos_frac": 0.71875, "sample": [0.7299041748046875, 1.7807960510253906, 0.8594703674316406, 1.7371482849121094, 1.663543701171875, 1.6149940490722656, 2.7930755615234375, -0.5506782531738281, 0.23727035522460938, -0.5320053100585938, -0.7792510986328125, 2.1720848083496094, 1.8101463317871094, 0.511199951171875, 0.7077178955078125, -0.12413406372070312, 0.04827117919921875, -0.7686691284179688, 2.3906326293945312, -0.9159202575683594, 0.69384765625, 0.2281951904296875, 1.0042896270751953, 1.7490768432617188, 0.294219970703125, 1.2138671875, -0.7471199035644531, 2.0285797119140625, 0.1983661651611328, 0.0138397216796875, 1.1639862060546875, 0.3509368896484375, -0.22789764404296875, 2.9501953125, -0.9942703247070312, 1.2019882202148438, 1.6428794860839844, 1.6861515045166016, 0.57232666015625, -0.6984176635742188, -2.82275390625, 3.0323486328125, 1.0230636596679688, 0.3331165313720703, 1.8800201416015625, 0.06214714050292969, 0.027841567993164062, 2.581684112548828, 0.4127960205078125, -0.6801834106445312, 0.16736602783203125, -1.0325546264648438, 2.264680862426758, 0.734222412109375, 0.5685920715332031, -0.6644668579101562, 1.5470428466796875, 0.41925048828125, -0.042266845703125, -0.6729736328125, 2.130615234375, -0.05789947509765625, -1.0280799865722656, 0.7753448486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000084.npy"}
|
|
{"epoch": 0.12698412698412698, "step": 85, "batch_size": 64, "mean": 0.9045511484146118, "std": 1.2941683530807495, "min": -2.639850616455078, "p10": -0.9047950744628905, "median": 0.9805259704589844, "p90": 2.45031967163086, "max": 3.9407424926757812, "pos_frac": 0.78125, "sample": [2.2684974670410156, -0.8122329711914062, 0.6995830535888672, 1.4890499114990234, 0.24846649169921875, 1.2969436645507812, 1.0946578979492188, 0.5385017395019531, -0.37709999084472656, 2.1579017639160156, 3.04486083984375, -1.1835174560546875, 1.9463119506835938, 1.465911865234375, 1.0884838104248047, 1.7538604736328125, 1.9427413940429688, 2.2856216430664062, 1.4881362915039062, 2.103790283203125, -0.8631439208984375, 1.377950668334961, -1.9227294921875, 1.8927841186523438, 0.3637886047363281, 1.244232177734375, -0.15459060668945312, 1.0377120971679688, 2.8955421447753906, 0.024766921997070312, -1.132965087890625, 1.2981452941894531, 0.8896331787109375, -0.03741455078125, 0.5103168487548828, -0.04703521728515625, 2.6603012084960938, 2.0599937438964844, 0.42376708984375, -0.15584945678710938, -2.639850616455078, 1.5583267211914062, 1.0043563842773438, 1.4138832092285156, 3.9407424926757812, 2.8101634979248047, 2.2000885009765625, 1.3576679229736328, 0.956695556640625, 0.7954177856445312, 0.48946380615234375, -0.9226455688476562, 0.26153564453125, 0.7613906860351562, 0.6751174926757812, 1.0658416748046875, -1.0424423217773438, -1.1984939575195312, 2.520904541015625, 0.10506057739257812, 3.6028900146484375, 0.24040985107421875, 0.42406463623046875, 0.6050090789794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000085.npy"}
|
|
{"epoch": 0.12849584278155707, "step": 86, "batch_size": 64, "mean": 0.7151964902877808, "std": 1.3528366088867188, "min": -2.4392623901367188, "p10": -0.9205026626586914, "median": 0.5449104309082031, "p90": 2.472543716430664, "max": 3.9616546630859375, "pos_frac": 0.703125, "sample": [2.7019500732421875, 0.5304660797119141, -0.7305068969726562, 0.25251007080078125, 2.1021289825439453, 0.5714645385742188, 0.793243408203125, 0.4077301025390625, -2.4392623901367188, 0.4383869171142578, -0.5148773193359375, -1.4167022705078125, 2.4806976318359375, 3.9616546630859375, 0.08394622802734375, 1.8804702758789062, 1.1685962677001953, -0.9226970672607422, -1.748138427734375, 0.0620880126953125, 2.54547119140625, -0.47693634033203125, -0.0654296875, 2.0421619415283203, 1.9294891357421875, 0.12648963928222656, 1.6869659423828125, 0.9551315307617188, 2.5858230590820312, 0.20906448364257812, 2.3256797790527344, 1.801483154296875, 1.402801513671875, -0.7232589721679688, 1.7867431640625, 1.504974365234375, -1.2951469421386719, 0.7105598449707031, 0.5511970520019531, 0.9587860107421875, -0.9153823852539062, 1.2255992889404297, 1.3783187866210938, -0.3370475769042969, 0.3522300720214844, 0.5386238098144531, 2.067098617553711, -0.33641624450683594, 1.6696052551269531, -0.6675357818603516, 3.168670654296875, 1.7454910278320312, -0.6459579467773438, -0.18943214416503906, 2.4535179138183594, 3.691162109375, -1.1220073699951172, 0.24124908447265625, 0.19329071044921875, -1.43365478515625, 0.3689155578613281, -0.20529747009277344, 1.4059524536132812, 0.9003829956054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000086.npy"}
|
|
{"epoch": 0.13000755857898716, "step": 87, "batch_size": 64, "mean": 0.8272853493690491, "std": 1.6700708866119385, "min": -3.7592544555664062, "p10": -1.1792551040649413, "median": 0.8526763916015625, "p90": 2.9815994262695313, "max": 4.9195556640625, "pos_frac": 0.703125, "sample": [1.1045341491699219, 4.9195556640625, 3.9075851440429688, 0.48757171630859375, 2.1467437744140625, 1.3269119262695312, -0.250030517578125, -1.5203399658203125, 2.2080078125, 2.177328109741211, -0.4769744873046875, -0.9540786743164062, 1.0348472595214844, -1.238739013671875, -3.3013916015625, 3.3017349243164062, -0.647186279296875, 2.6270904541015625, 1.2361583709716797, 1.6474227905273438, -1.85748291015625, 2.3343124389648438, 1.5201358795166016, -0.9493598937988281, 3.0695114135742188, -3.7592544555664062, -0.5365486145019531, -0.2391185760498047, 2.0702590942382812, -1.2163677215576172, -0.6761360168457031, 0.16388702392578125, 3.012420654296875, -1.0219268798828125, 0.963134765625, -1.0926589965820312, 0.44138336181640625, 0.6456851959228516, 1.7514820098876953, 1.4426708221435547, 0.6276969909667969, 1.40472412109375, 1.0445098876953125, -1.6377792358398438, 0.399078369140625, -0.202728271484375, 1.2361621856689453, 2.6324691772460938, 0.2721824645996094, 1.77093505859375, 1.8900222778320312, 1.4932403564453125, 3.968017578125, 0.6668548583984375, -0.06653213500976562, 0.43454742431640625, 0.2681083679199219, 0.3153533935546875, 3.035858154296875, 2.9096832275390625, 0.557342529296875, 0.9753532409667969, 0.742218017578125, 2.4061660766601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000087.npy"}
|
|
{"epoch": 0.13151927437641722, "step": 88, "batch_size": 64, "mean": 0.709831953048706, "std": 1.7511093616485596, "min": -3.7398605346679688, "p10": -1.3497306823730468, "median": 0.6646299362182617, "p90": 2.5987110137939453, "max": 5.40911865234375, "pos_frac": 0.6875, "sample": [2.7384033203125, 0.5653018951416016, -1.3526535034179688, 0.041961669921875, 4.6431884765625, 1.8454437255859375, 4.760841369628906, 0.3964080810546875, 0.8547592163085938, -0.03423309326171875, 2.5801124572753906, 2.0801010131835938, 1.8475608825683594, 1.2079753875732422, -0.9141159057617188, 0.961761474609375, 2.068328857421875, -0.6197700500488281, -2.308441162109375, 1.5146102905273438, 2.1681079864501953, 0.6282367706298828, 0.4045143127441406, -0.5516815185546875, -1.4783973693847656, 1.744476318359375, 1.0573348999023438, 5.40911865234375, -0.6476478576660156, -0.7304935455322266, -3.7398605346679688, -0.9335403442382812, 2.2155914306640625, 2.330291748046875, 1.1628646850585938, -0.09053421020507812, -0.25312042236328125, 1.4767704010009766, 2.2183189392089844, 0.36982154846191406, 0.04427337646484375, 1.3841094970703125, 1.8984546661376953, 0.7010231018066406, -1.3429107666015625, 0.12908935546875, 0.007350921630859375, 0.045383453369140625, 2.626354217529297, 0.38693809509277344, 1.6166629791259766, 1.0615081787109375, 3.95941162109375, 1.3217926025390625, 0.8902130126953125, -1.45916748046875, 1.0902481079101562, -0.4890785217285156, 0.2108917236328125, 2.6066818237304688, -0.4728221893310547, -3.557708740234375, -1.1057510375976562, -1.7614192962646484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000088.npy"}
|
|
{"epoch": 0.1330309901738473, "step": 89, "batch_size": 64, "mean": 0.8791057467460632, "std": 1.5689276456832886, "min": -2.4858016967773438, "p10": -0.762198448181152, "median": 0.6978702545166016, "p90": 2.693825149536133, "max": 6.78057861328125, "pos_frac": 0.765625, "sample": [0.6276931762695312, 0.9464302062988281, -0.06519889831542969, 1.2930793762207031, 3.1174468994140625, 3.0356006622314453, 0.1528167724609375, -0.05003166198730469, 0.6300048828125, 2.8509140014648438, 1.6487655639648438, 2.2653350830078125, 1.493408203125, 1.0302581787109375, 0.24452972412109375, 0.8174419403076172, 0.27384185791015625, 1.282257080078125, -0.052242279052734375, -2.159688949584961, 1.96820068359375, -0.3983783721923828, 0.2467041015625, 2.7479934692382812, 0.13599395751953125, 6.78057861328125, 0.7050247192382812, 0.8791122436523438, -0.34943389892578125, 1.52197265625, -0.29096221923828125, 1.3007659912109375, 0.6907157897949219, 0.236419677734375, 1.81341552734375, 0.04843711853027344, 2.3254013061523438, 0.7525749206542969, -1.360107421875, -0.05706787109375, 2.5514774322509766, 2.0447044372558594, 3.81280517578125, 4.202507019042969, 0.47779273986816406, 0.037067413330078125, 0.47187042236328125, 2.42144775390625, 2.567432403564453, -2.053375244140625, -0.918121337890625, 1.1161937713623047, 0.4354209899902344, 0.5782051086425781, 0.8839797973632812, -2.0485610961914062, 1.029012680053711, 0.30789947509765625, -1.6507434844970703, -2.4858016967773438, 2.1680755615234375, -0.1634979248046875, 0.9749889373779297, 0.4219646453857422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000089.npy"}
|
|
{"epoch": 0.1345427059712774, "step": 90, "batch_size": 64, "mean": 0.8717068433761597, "std": 1.4769046306610107, "min": -4.092315673828125, "p10": -0.997804832458496, "median": 0.8429050445556641, "p90": 2.9093528747558604, "max": 3.85015869140625, "pos_frac": 0.75, "sample": [-1.023172378540039, 2.297433853149414, 0.113555908203125, 1.4419898986816406, 2.122333526611328, 0.5570640563964844, 1.9594154357910156, 3.5031051635742188, -1.363616943359375, -4.092315673828125, 3.167388916015625, 2.4075965881347656, 1.9715213775634766, 0.9269275665283203, 2.239654541015625, -0.5828971862792969, 0.29698944091796875, 3.2651824951171875, -1.0584716796875, 0.5902061462402344, 2.6938552856445312, 0.04931640625, 3.5262908935546875, 3.6562576293945312, 3.001708984375, 0.8378067016601562, -0.06695556640625, 3.85015869140625, 0.9632072448730469, 1.4253768920898438, 1.2469711303710938, 0.4557952880859375, 0.5562839508056641, 2.4984130859375, 0.9642257690429688, 1.1169281005859375, -0.9386138916015625, -1.2676315307617188, 1.211395263671875, 1.243520736694336, 0.201934814453125, 0.6616668701171875, 1.557891845703125, -0.316131591796875, 2.4656295776367188, 0.4207019805908203, 0.7598991394042969, 1.0189208984375, -0.10701942443847656, -0.2024993896484375, -1.4972953796386719, 1.0556373596191406, 1.10968017578125, -0.18450927734375, 0.16411590576171875, 0.668243408203125, 0.40061187744140625, -2.096811294555664, -0.06170654296875, 1.6228713989257812, 1.2234039306640625, 0.37225341796875, -0.06045722961425781, 0.8480033874511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000090.npy"}
|
|
{"epoch": 0.1360544217687075, "step": 91, "batch_size": 64, "mean": 0.9410272836685181, "std": 1.5511263608932495, "min": -3.399566650390625, "p10": -0.657350158691406, "median": 0.6823453903198242, "p90": 3.2002143859863286, "max": 4.822288513183594, "pos_frac": 0.703125, "sample": [1.92840576171875, 2.2220993041992188, 1.2163238525390625, -0.9732036590576172, 0.7430267333984375, 4.069831848144531, -0.12439155578613281, 0.67291259765625, 2.7558822631835938, 4.822288513183594, 3.6658935546875, -0.4329071044921875, 1.466318130493164, -0.1800861358642578, 0.9122848510742188, -0.7523899078369141, 0.3297576904296875, 0.35418701171875, 1.0570068359375, -1.5758285522460938, 2.1038570404052734, 4.037567138671875, 0.1733551025390625, 0.5707244873046875, 1.8502578735351562, 1.6305694580078125, 1.9092655181884766, -0.3533439636230469, 0.05353546142578125, 1.5786056518554688, 0.8378524780273438, -0.20479965209960938, -0.2614898681640625, 0.6917781829833984, 3.101320266723633, 0.6669807434082031, -0.092254638671875, 1.639739990234375, -0.4355907440185547, -3.399566650390625, 0.8581314086914062, -0.9412460327148438, 0.17101287841796875, -2.0160293579101562, 0.43264198303222656, 1.0322914123535156, -0.206695556640625, 3.663166046142578, 1.7231597900390625, 1.33587646484375, 0.40087127685546875, -0.9735984802246094, -0.16275787353515625, 0.08088874816894531, 2.876110076904297, 3.2425975799560547, 2.1224822998046875, -0.07161903381347656, 3.4625244140625, 2.4869003295898438, -0.0125885009765625, 0.1988964080810547, 1.6286125183105469, 0.6183395385742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000091.npy"}
|
|
{"epoch": 0.13756613756613756, "step": 92, "batch_size": 64, "mean": 0.9327565431594849, "std": 1.7385667562484741, "min": -3.481639862060547, "p10": -1.3191661834716795, "median": 0.7219009399414062, "p90": 3.2319797515869144, "max": 5.0035552978515625, "pos_frac": 0.703125, "sample": [1.4146347045898438, -0.3891887664794922, -2.3245010375976562, -0.2292346954345703, -0.45767974853515625, 0.2788848876953125, 0.28388023376464844, 0.9573688507080078, 0.49395751953125, -0.09954452514648438, -0.9302291870117188, 1.4209861755371094, 2.4329566955566406, -2.0615692138671875, 3.1925315856933594, 2.3586368560791016, 1.0480537414550781, -0.031108856201171875, 3.564237594604492, -0.049961090087890625, 1.5364990234375, -0.6216888427734375, 1.5811920166015625, 0.82208251953125, 2.983917236328125, 0.077972412109375, -0.15726089477539062, 1.2217941284179688, -1.5129928588867188, 0.6217193603515625, -1.3768043518066406, 2.5006179809570312, 2.8612747192382812, 0.014774322509765625, 1.2637252807617188, 0.2815399169921875, 1.0215682983398438, 2.57537841796875, -1.8984756469726562, -0.0497283935546875, 0.34833526611328125, 0.4277915954589844, 3.7678375244140625, 2.1620025634765625, 1.7287311553955078, -1.4981155395507812, 2.6934890747070312, 3.2488861083984375, 2.984210968017578, -1.1846771240234375, -0.8191795349121094, 4.790435791015625, 1.56707763671875, 0.039699554443359375, 1.73187255859375, -3.481639862060547, 5.0035552978515625, 2.1686019897460938, 0.5387058258056641, 3.44415283203125, 3.4646472930908203, 0.5395431518554688, 0.9710464477539062, 0.4391937255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000092.npy"}
|
|
{"epoch": 0.13907785336356765, "step": 93, "batch_size": 64, "mean": 0.7055187225341797, "std": 1.6545441150665283, "min": -2.3877105712890625, "p10": -1.35965576171875, "median": 0.6301183700561523, "p90": 2.71775131225586, "max": 6.488258361816406, "pos_frac": 0.671875, "sample": [0.6908531188964844, 1.8762893676757812, 2.780517578125, 1.3678092956542969, -2.352741241455078, 0.7769126892089844, 0.351470947265625, -0.7554473876953125, -0.2144927978515625, 2.2574234008789062, 2.4963722229003906, 2.343252182006836, 6.488258361816406, -1.1867713928222656, 0.12229156494140625, 0.4919090270996094, 0.9442710876464844, 1.6812782287597656, -0.0879974365234375, -2.3877105712890625, 1.45831298828125, 2.320526123046875, -1.0983428955078125, 0.5044021606445312, 0.7943019866943359, -0.4257221221923828, 2.9069976806640625, -0.34038734436035156, -1.3395671844482422, 0.837921142578125, 0.649139404296875, -0.3383064270019531, 0.2634086608886719, 3.5493927001953125, -0.39099884033203125, 0.28719329833984375, -1.0797233581542969, 0.6254539489746094, -1.5603408813476562, 0.5002059936523438, 1.0512046813964844, 0.5750389099121094, 1.9095344543457031, 1.3768386840820312, 0.6161308288574219, -0.616302490234375, 0.6347827911376953, 0.20763015747070312, 3.4866485595703125, 0.7525691986083984, 3.6975975036621094, 1.229990005493164, 2.5712966918945312, 1.3631439208984375, -1.1600761413574219, -1.368265151977539, 2.9527816772460938, -1.8773193359375, -1.8802719116210938, 2.1265182495117188, 2.3313140869140625, -1.6834869384765625, -0.23183441162109375, 1.280120849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000093.npy"}
|
|
{"epoch": 0.14058956916099774, "step": 94, "batch_size": 64, "mean": 1.1229921579360962, "std": 2.027146816253662, "min": -6.1747589111328125, "p10": -0.8367506027221678, "median": 1.3811664581298828, "p90": 3.1287429809570315, "max": 6.49542236328125, "pos_frac": 0.734375, "sample": [2.1272659301757812, 1.0818138122558594, 2.8933639526367188, 2.7657833099365234, 1.4600830078125, -3.8160324096679688, -0.4656505584716797, 0.6610107421875, 0.4676666259765625, 2.71258544921875, 0.3257465362548828, 4.173927307128906, 0.534942626953125, -6.1747589111328125, 1.3792648315429688, 2.4737472534179688, -1.3604164123535156, -0.07897377014160156, 2.968505859375, 1.179962158203125, 2.230602264404297, 1.8804550170898438, 1.6657180786132812, 1.5091514587402344, 1.8834075927734375, 0.6580753326416016, 0.4835681915283203, -0.5944137573242188, -1.9581565856933594, 2.3065414428710938, 2.183246612548828, -0.2207622528076172, 0.6033191680908203, -0.05877685546875, 2.054107666015625, 0.7712059020996094, 1.6972503662109375, 3.1419219970703125, 1.3830680847167969, 3.721343994140625, 2.9690818786621094, 3.9494476318359375, -0.19135284423828125, 3.097991943359375, 3.1876144409179688, 3.0155792236328125, -0.89306640625, 4.613494873046875, 2.7113189697265625, 1.6106185913085938, 6.49542236328125, 0.7308139801025391, 0.694183349609375, -0.7053470611572266, 1.4713897705078125, 0.5167179107666016, -3.8621139526367188, 1.4642314910888672, 1.0428314208984375, 1.591552734375, -0.0265960693359375, -0.579498291015625, -1.6621322631835938, -0.021394729614257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000094.npy"}
|
|
{"epoch": 0.1421012849584278, "step": 95, "batch_size": 64, "mean": 1.0689538717269897, "std": 1.6568758487701416, "min": -3.831024169921875, "p10": -0.8090164184570312, "median": 1.0625476837158203, "p90": 3.331169891357423, "max": 4.9079742431640625, "pos_frac": 0.75, "sample": [4.3511810302734375, -1.9743537902832031, 1.3834152221679688, 0.3364982604980469, -0.8113555908203125, 0.336639404296875, 4.4681549072265625, 0.6696968078613281, 1.5720367431640625, 1.8984718322753906, 0.19201278686523438, 1.42999267578125, 2.265106201171875, 0.6593055725097656, -0.47701263427734375, 0.4657783508300781, 0.73712158203125, -0.28777313232421875, 0.5373020172119141, 1.1804542541503906, 1.60455322265625, 3.4651870727539062, -1.1070404052734375, 1.7864646911621094, -1.7772064208984375, -0.6047515869140625, 4.0966796875, 2.5729827880859375, -0.17380523681640625, -3.831024169921875, 1.5926628112792969, 3.467742919921875, 0.3025779724121094, 1.9464950561523438, -0.795166015625, 0.7444000244140625, 2.9189910888671875, 0.94464111328125, -1.4223785400390625, 0.54791259765625, 0.66741943359375, 0.2695465087890625, 1.7739791870117188, 1.5763053894042969, -0.803558349609375, 1.9795303344726562, 3.018463134765625, -0.08703041076660156, 1.3573684692382812, 3.744049072265625, 1.381866455078125, 2.7677078247070312, -0.6258811950683594, 1.6213455200195312, 2.4227294921875, 4.9079742431640625, -0.00160980224609375, 0.22254180908203125, 1.5469188690185547, 2.014801025390625, 0.560333251953125, 1.4180374145507812, 2.489471435546875, -1.0218524932861328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000095.npy"}
|
|
{"epoch": 0.1436130007558579, "step": 96, "batch_size": 64, "mean": 1.1744756698608398, "std": 2.0061964988708496, "min": -3.6428451538085938, "p10": -1.0515573501586912, "median": 0.9847068786621094, "p90": 3.974174499511719, "max": 6.918701171875, "pos_frac": 0.75, "sample": [2.06121826171875, -2.3239974975585938, 4.3506011962890625, 0.9806594848632812, 4.00408935546875, -1.907012939453125, 1.71234130859375, -1.0949573516845703, 1.8050918579101562, 0.8467941284179688, 2.5030975341796875, -0.4691047668457031, 1.2808456420898438, 5.2069244384765625, 1.0585517883300781, 2.27642822265625, -1.5167160034179688, -0.5096778869628906, 1.5819854736328125, -0.40143585205078125, 1.9012298583984375, 2.0678253173828125, 2.2622756958007812, -3.6428451538085938, 0.5317840576171875, 0.5007228851318359, 2.3004379272460938, 0.9887542724609375, 0.009674072265625, 0.20672607421875, 2.3350753784179688, 3.9043731689453125, 6.918701171875, 2.24383544921875, -0.6695270538330078, 0.558563232421875, -2.7308578491210938, 0.2206439971923828, -0.26715850830078125, 1.5861701965332031, 1.9705810546875, -0.5822334289550781, 1.6710052490234375, 0.59869384765625, 5.1246185302734375, 0.6281585693359375, 2.704082489013672, 0.137847900390625, 2.4205245971679688, -0.8422203063964844, 2.3268814086914062, -0.9502906799316406, 0.8532905578613281, 4.199150085449219, 4.951118469238281, 1.7726898193359375, 0.30808448791503906, 0.37589073181152344, 0.2120494842529297, 3.7158660888671875, 2.6439361572265625, 0.2155303955078125, -0.3095245361328125, -1.6514244079589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000096.npy"}
|
|
{"epoch": 0.14512471655328799, "step": 97, "batch_size": 64, "mean": 0.8221681118011475, "std": 2.204930305480957, "min": -4.580841064453125, "p10": -1.6679927825927734, "median": 0.8261804580688477, "p90": 2.835289764404297, "max": 9.100921630859375, "pos_frac": 0.671875, "sample": [-3.43560791015625, 0.46169281005859375, -0.7514457702636719, 5.415130615234375, 0.9310951232910156, 5.5856781005859375, -0.6243705749511719, -1.985382080078125, -0.48262786865234375, -4.580841064453125, -2.1741809844970703, 0.048397064208984375, 0.8686752319335938, 1.8810310363769531, -1.7657241821289062, -3.9231033325195312, -0.05356788635253906, 2.262399673461914, 2.120758056640625, 0.23769187927246094, 1.0268440246582031, 1.6849937438964844, 0.580413818359375, -1.0404205322265625, 0.9578170776367188, -0.49478912353515625, 0.01607513427734375, 2.6752166748046875, 0.16498947143554688, 2.7559127807617188, 1.3387985229492188, -1.6336822509765625, 2.3525924682617188, 0.6476211547851562, 0.7872562408447266, 1.68304443359375, 4.228799819946289, 2.0895614624023438, -0.4686775207519531, 3.316802978515625, 1.6522560119628906, -1.6566734313964844, 9.100921630859375, 1.6519622802734375, 1.7416000366210938, -0.7771072387695312, 0.8651046752929688, 1.3330154418945312, 4.12554931640625, -1.6728439331054688, -0.34926605224609375, 0.4796905517578125, 1.945098876953125, -0.46761322021484375, 2.2497940063476562, 2.8693084716796875, 2.5452442169189453, 0.023395538330078125, 1.3981304168701172, 1.9728469848632812, 1.2572669982910156, -0.28398895263671875, 0.4250602722167969, -0.5148601531982422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000097.npy"}
|
|
{"epoch": 0.14663643235071808, "step": 98, "batch_size": 64, "mean": 0.6566237211227417, "std": 1.4832130670547485, "min": -2.037067413330078, "p10": -1.427488136291504, "median": 0.80645751953125, "p90": 2.4794181823730472, "max": 4.3321685791015625, "pos_frac": 0.703125, "sample": [-0.35404205322265625, 0.8806743621826172, 1.7631301879882812, 1.4216995239257812, 1.1739730834960938, 0.9483108520507812, 0.9502182006835938, 0.48958587646484375, 0.5204315185546875, 3.2053070068359375, -1.486358642578125, -1.0446090698242188, 1.367340087890625, -0.5171699523925781, 0.8967018127441406, 0.688934326171875, 0.3877716064453125, 2.7535018920898438, 0.9939708709716797, -0.8379058837890625, 1.5073699951171875, 3.5934829711914062, -1.6823883056640625, 1.35595703125, 0.5238819122314453, -0.5220947265625, 0.8383102416992188, 0.16526412963867188, -0.06217765808105469, -0.4243316650390625, 1.0053787231445312, 1.2887344360351562, 2.194915771484375, 2.3942413330078125, -1.4816169738769531, 0.9346694946289062, 1.0738983154296875, 2.5159225463867188, 0.5555305480957031, 0.04721832275390625, 1.9059906005859375, 0.29624176025390625, -1.4044876098632812, 1.2248783111572266, -2.0338134765625, -1.4373455047607422, 1.4729175567626953, 0.4481697082519531, -1.90386962890625, 4.1324005126953125, -0.28905296325683594, 3.5484619140625, -1.0311126708984375, -1.3439922332763672, -1.22222900390625, 1.6904220581054688, 1.1757354736328125, 0.06508636474609375, 0.4237518310546875, 4.3321685791015625, 0.7746047973632812, 1.9582481384277344, 1.254180908203125, -2.037067413330078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000098.npy"}
|
|
{"epoch": 0.14814814814814814, "step": 99, "batch_size": 64, "mean": 0.8841932415962219, "std": 2.0832149982452393, "min": -4.0574951171875, "p10": -1.3007186889648437, "median": 0.49585914611816406, "p90": 3.7614227294921894, "max": 6.2571258544921875, "pos_frac": 0.6875, "sample": [5.0898590087890625, 4.719430923461914, 3.9663314819335938, 4.702491760253906, 0.4540290832519531, 0.1722869873046875, -0.20760726928710938, -1.033487319946289, 0.3307037353515625, 4.999477386474609, 0.3360595703125, 1.866851806640625, 1.8222618103027344, 3.068805694580078, 0.14554786682128906, 0.139984130859375, 0.537689208984375, 0.2080669403076172, 0.28079986572265625, 2.8975830078125, 2.3761672973632812, -0.3551750183105469, -0.6834449768066406, -1.0375480651855469, -1.147003173828125, -0.6021461486816406, 4.142425537109375, -2.900482177734375, -1.3824310302734375, 2.9356460571289062, 0.6453781127929688, -0.5003662109375, -0.3322334289550781, 0.2698211669921875, 1.0943031311035156, 1.5576171875, 1.6404266357421875, -1.8446083068847656, 1.142557144165039, 3.261821746826172, 0.83868408203125, -4.0574951171875, 3.231901168823242, 6.2571258544921875, 0.02878570556640625, 1.6934814453125, 3.2833023071289062, 0.044567108154296875, -4.0465087890625, 1.4354381561279297, 1.2191810607910156, 1.7800178527832031, 1.7459774017333984, 1.9781227111816406, -1.361083984375, 0.0823974609375, -0.2999000549316406, 2.0647811889648438, 0.7941246032714844, -1.12957763671875, 0.8317718505859375, -0.07131576538085938, -1.1598663330078125, -1.3734359741210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000099.npy"}
|
|
{"epoch": 0.14965986394557823, "step": 100, "batch_size": 64, "mean": 0.9223723411560059, "std": 2.598998785018921, "min": -6.715728759765625, "p10": -1.60368537902832, "median": 0.8030233383178711, "p90": 4.122004699707031, "max": 9.95843505859375, "pos_frac": 0.671875, "sample": [1.603057861328125, -0.2637481689453125, -1.867767333984375, 2.4110031127929688, 1.949127197265625, -0.6622314453125, 0.7965068817138672, 2.7170352935791016, 4.051338195800781, 1.725006103515625, 0.201904296875, 1.4339447021484375, 2.2747879028320312, 4.152290344238281, 1.0997238159179688, 4.662635803222656, -1.4745101928710938, 0.5719451904296875, 0.5690345764160156, 0.8375091552734375, 0.5891399383544922, 0.3220062255859375, -2.1802940368652344, 0.4010486602783203, -1.0569076538085938, 2.4503326416015625, -0.1452789306640625, 2.7452850341796875, 1.02398681640625, -3.7532958984375, 0.31781005859375, -1.932891845703125, -1.3771514892578125, -1.1882286071777344, -0.057830810546875, -0.5039024353027344, 1.384063720703125, -0.6482200622558594, -5.9055633544921875, 4.368013381958008, 0.3278045654296875, 6.5526580810546875, -1.6590461730957031, 0.8502731323242188, -0.5892181396484375, 2.300983428955078, 2.5863418579101562, 0.2508392333984375, -0.4454326629638672, -1.1324996948242188, 4.9078521728515625, 2.8120574951171875, 1.5875396728515625, 1.61419677734375, 0.5228271484375, 1.6066932678222656, -0.86236572265625, 5.5192413330078125, 3.39703369140625, 1.2999191284179688, 9.95843505859375, 0.809539794921875, -6.715728759765625, 1.89117431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000100.npy"}
|
|
{"epoch": 0.15117157974300832, "step": 101, "batch_size": 64, "mean": 1.3472540378570557, "std": 1.8091360330581665, "min": -2.12109375, "p10": -0.8981773376464842, "median": 1.2157840728759766, "p90": 3.711928749084473, "max": 5.622215270996094, "pos_frac": 0.765625, "sample": [-0.028675079345703125, 0.827972412109375, 1.2563934326171875, -0.6994476318359375, 4.126960754394531, 3.6316452026367188, 1.9043350219726562, 0.5718727111816406, -0.7588043212890625, 0.7023220062255859, -0.07030868530273438, 2.326141357421875, -2.12109375, 2.2072525024414062, 3.602081298828125, -0.9579086303710938, 3.127056121826172, 1.4429435729980469, -0.4549140930175781, -1.7694282531738281, 2.330036163330078, 0.793243408203125, 1.95263671875, 0.013668060302734375, 1.24957275390625, 1.6147994995117188, 3.2499542236328125, 3.4642333984375, 0.9207630157470703, 3.9590587615966797, 1.898834228515625, 4.57177734375, 3.5180530548095703, 0.5384483337402344, -0.7068710327148438, -2.015054702758789, 1.2086181640625, -1.7282028198242188, -0.7093410491943359, 0.9995651245117188, -0.2294464111328125, 3.2745437622070312, 0.8613739013671875, 0.6211738586425781, 5.0955352783203125, 1.45538330078125, 1.1985015869140625, -1.7857513427734375, 0.19609832763671875, 1.2148590087890625, 4.2006378173828125, 0.314361572265625, 3.746335983276367, 2.554758071899414, 1.03759765625, 1.224822998046875, 2.2221908569335938, 1.5334014892578125, -1.228424072265625, 1.2167091369628906, 2.7115936279296875, 2.4991817474365234, 5.622215270996094, 0.6764163970947266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000101.npy"}
|
|
{"epoch": 0.15268329554043839, "step": 102, "batch_size": 64, "mean": 0.8391384482383728, "std": 2.3546595573425293, "min": -4.3010711669921875, "p10": -1.8823331832885741, "median": 0.5877056121826172, "p90": 3.593142700195313, "max": 7.5030517578125, "pos_frac": 0.625, "sample": [2.95831298828125, -1.0113906860351562, -0.540557861328125, 1.5126075744628906, -3.5791168212890625, -0.15662384033203125, -0.8557662963867188, 1.636810302734375, -3.2826004028320312, 0.1134796142578125, 2.253204345703125, 1.7470855712890625, 0.376800537109375, 0.6053199768066406, 3.8818511962890625, -0.24657440185546875, -1.9079227447509766, 7.317962646484375, -0.9190216064453125, 3.669708251953125, -1.5095634460449219, 0.5003147125244141, 1.341339111328125, 0.0877532958984375, 2.336435317993164, 1.6632728576660156, 1.4233551025390625, 2.5324058532714844, -3.8393783569335938, 0.7196884155273438, 3.2294769287109375, 2.5166854858398438, -1.8226242065429688, 2.1892738342285156, -0.10291290283203125, 0.5700912475585938, 2.0611534118652344, 4.996543884277344, 3.982269287109375, -0.8693618774414062, -0.10626983642578125, 4.54998779296875, -2.5039138793945312, 0.4427947998046875, -0.8175544738769531, 0.4949951171875, 0.9158172607421875, 1.6111640930175781, 0.9237918853759766, 0.13021469116210938, 2.479368209838867, 2.539581298828125, -0.3232383728027344, -2.228130340576172, -0.4897804260253906, -1.3718070983886719, -0.7015304565429688, 7.5030517578125, -0.8088302612304688, -4.3010711669921875, 3.41448974609375, 2.7310638427734375, 3.147167205810547, 0.8937129974365234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000102.npy"}
|
|
{"epoch": 0.15419501133786848, "step": 103, "batch_size": 64, "mean": 0.9789341688156128, "std": 1.9287223815917969, "min": -3.7064208984375, "p10": -1.1597152709960936, "median": 0.8534765243530273, "p90": 3.3403961181640627, "max": 6.608573913574219, "pos_frac": 0.6875, "sample": [-1.7971992492675781, 1.1413421630859375, 3.0643310546875, -2.773193359375, 1.5586700439453125, 1.0452041625976562, 0.8236160278320312, -0.3135528564453125, 1.0877227783203125, 2.3637008666992188, -0.2866706848144531, 1.3177490234375, -1.0338592529296875, 0.7407684326171875, -1.213653564453125, 2.3689041137695312, 2.2322311401367188, 2.0264511108398438, -3.7064208984375, 3.829296112060547, 0.05806732177734375, 1.0136375427246094, 0.21829986572265625, 1.9125022888183594, 1.447723388671875, -0.4952201843261719, -0.6709823608398438, 2.935577392578125, -1.4941024780273438, 1.2454910278320312, 0.8472976684570312, 4.548309326171875, 4.436279296875, -0.20893478393554688, -0.06699371337890625, 0.46103668212890625, -1.6898727416992188, 1.87103271484375, 1.3870620727539062, 0.19901084899902344, 3.3684959411621094, 0.4662609100341797, 0.8596553802490234, 1.3701133728027344, 2.031963348388672, -0.396759033203125, 3.274829864501953, -0.21023178100585938, 4.456031799316406, -0.2574443817138672, 5.819183349609375, 2.8306427001953125, -2.4562225341796875, 0.33008575439453125, -0.396453857421875, 0.06854248046875, 0.09976959228515625, -0.2644004821777344, -0.6120662689208984, 0.7079429626464844, 1.5461921691894531, 1.8976516723632812, 6.608573913574219, 1.0787734985351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000103.npy"}
|
|
{"epoch": 0.15570672713529857, "step": 104, "batch_size": 64, "mean": 1.1605031490325928, "std": 2.4262502193450928, "min": -3.9758987426757812, "p10": -1.0071998596191405, "median": 0.7022619247436523, "p90": 4.099092102050782, "max": 10.447479248046875, "pos_frac": 0.65625, "sample": [1.24725341796875, 2.3517913818359375, 1.3245773315429688, -0.01677703857421875, -2.0676803588867188, 0.060638427734375, 0.788055419921875, 1.1722831726074219, 2.7952804565429688, -1.02130126953125, 0.0016307830810546875, -3.2684249877929688, 10.447479248046875, 3.1239166259765625, -1.1058731079101562, -0.9448165893554688, 3.960845947265625, 0.5227031707763672, -0.2561759948730469, 1.4175186157226562, -0.0899505615234375, -3.6688232421875, 4.37811279296875, 3.8095703125, -0.22216796875, -0.18339157104492188, 1.1288566589355469, -1.9548263549804688, 3.205472946166992, -0.3673381805419922, 0.5613975524902344, 0.07376861572265625, -0.7825603485107422, 0.6932582855224609, 3.488780975341797, 1.6151866912841797, 0.7616119384765625, -0.9742965698242188, 3.660858154296875, 0.9095993041992188, 1.7620620727539062, -0.6002960205078125, 6.977142333984375, -0.23563385009765625, 2.227222442626953, 4.305244445800781, 2.4276504516601562, 0.23222923278808594, -3.9758987426757812, -0.8704071044921875, 1.977193832397461, 1.138986587524414, 3.381824493408203, 3.2018280029296875, 0.5537223815917969, 0.7112655639648438, -0.11119461059570312, 0.315216064453125, 0.5584697723388672, -0.12701416015625, -0.3497047424316406, 4.58050537109375, 4.1583404541015625, 5.457405090332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000104.npy"}
|
|
{"epoch": 0.15721844293272866, "step": 105, "batch_size": 64, "mean": 0.8696127533912659, "std": 1.9885191917419434, "min": -3.803497314453125, "p10": -1.661147308349609, "median": 0.9072713851928711, "p90": 3.583062744140625, "max": 5.540914535522461, "pos_frac": 0.640625, "sample": [1.251150131225586, 0.9023818969726562, 1.066558837890625, 1.8611564636230469, 1.1581840515136719, 2.5890331268310547, -3.740203857421875, 4.888946533203125, -2.395294189453125, 1.141122817993164, -0.5850868225097656, 3.0167617797851562, 1.0920562744140625, 2.9809951782226562, 0.056568145751953125, 4.940238952636719, -0.4913959503173828, -3.803497314453125, -0.3022289276123047, 3.8536605834960938, 1.0302963256835938, -0.2616729736328125, 0.728851318359375, -1.4946975708007812, 0.38291168212890625, -0.363525390625, 1.4838104248046875, 2.0787353515625, 2.1363372802734375, -1.73248291015625, -0.48543548583984375, 1.1477909088134766, -1.8202362060546875, -0.33766937255859375, -1.338165283203125, 3.8359375, 0.9121608734130859, 1.1511383056640625, -0.31021881103515625, 0.8479118347167969, -0.19224166870117188, -0.5026969909667969, -0.18407821655273438, 2.162508010864258, 2.03570556640625, 2.9579010009765625, -2.52252197265625, 0.3540802001953125, 0.4074554443359375, -0.5488967895507812, -0.0751800537109375, 1.1210212707519531, 0.057666778564453125, 4.494293212890625, 3.565032958984375, 0.4960479736328125, 5.540914535522461, 2.35504150390625, -0.7686519622802734, 1.9950275421142578, -1.7728729248046875, 3.590789794921875, 2.7226409912109375, 1.2933425903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000105.npy"}
|
|
{"epoch": 0.15873015873015872, "step": 106, "batch_size": 64, "mean": 0.9669829607009888, "std": 2.1716175079345703, "min": -6.17681884765625, "p10": -1.5191864013671872, "median": 1.2117767333984375, "p90": 3.537300109863282, "max": 5.298698425292969, "pos_frac": 0.71875, "sample": [-0.11883544921875, 2.3682308197021484, 1.98095703125, 2.49517822265625, -2.1927566528320312, -0.6532325744628906, 5.298698425292969, 2.7593860626220703, 0.07333755493164062, 1.596405029296875, 1.211181640625, 2.6326332092285156, 5.189628601074219, 1.3016586303710938, -0.14563560485839844, -1.6807861328125, 0.867706298828125, 1.5389862060546875, 0.29488372802734375, 0.03167724609375, -0.3572273254394531, 0.04644012451171875, -1.142120361328125, 1.0803680419921875, -0.5809736251831055, -0.5293922424316406, 0.2557849884033203, -3.0165557861328125, 1.212371826171875, 5.140411376953125, -0.910552978515625, 0.3819561004638672, -0.5670623779296875, 0.7082405090332031, 2.7466812133789062, 3.8629226684570312, 2.046276092529297, -0.7244644165039062, 3.6082305908203125, 4.4998626708984375, 2.3537635803222656, 1.7441215515136719, 4.41032600402832, 0.8040523529052734, -3.0173492431640625, 0.4455718994140625, 0.8158035278320312, 1.3877220153808594, -6.17681884765625, 1.2866764068603516, -2.95111083984375, 1.3580970764160156, 2.2481842041015625, 3.1371536254882812, 2.2210464477539062, 2.1113510131835938, 0.4502067565917969, 2.149106979370117, -0.30566978454589844, 3.371795654296875, -3.925018310546875, 1.5016098022460938, 1.6059379577636719, 2.2498512268066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000106.npy"}
|
|
{"epoch": 0.1602418745275888, "step": 107, "batch_size": 64, "mean": 0.5366103649139404, "std": 2.450985908508301, "min": -6.738067626953125, "p10": -2.2779943466186525, "median": 0.6010017395019531, "p90": 3.901789093017581, "max": 6.650634765625, "pos_frac": 0.65625, "sample": [0.4952812194824219, -3.6484222412109375, 0.6020050048828125, 4.349006652832031, 1.9459991455078125, 1.849029541015625, -3.037261962890625, 3.0769805908203125, -2.2762680053710938, 1.1466827392578125, -6.738067626953125, 1.0249862670898438, 1.4241085052490234, 0.59808349609375, 1.093221664428711, -2.2787342071533203, -0.00980377197265625, 1.203460693359375, 0.40386390686035156, -0.813568115234375, -0.85479736328125, 6.650634765625, 5.086090087890625, -0.1509246826171875, 2.433277130126953, -4.411537170410156, 2.6984500885009766, -1.7512283325195312, -1.0921478271484375, 0.735076904296875, 1.590087890625, -1.9693527221679688, 0.7806663513183594, 1.9150238037109375, 1.0264739990234375, 0.466888427734375, 4.502166748046875, 0.381927490234375, 0.9330902099609375, 5.40423583984375, 2.507568359375, 3.1225509643554688, -1.0264205932617188, -2.2180709838867188, 1.913726806640625, 1.5278396606445312, 0.3576850891113281, 1.5018157958984375, 0.8848819732666016, -2.7456207275390625, 4.235748291015625, 2.60888671875, 0.11895942687988281, -1.985647201538086, -0.5423316955566406, 0.4429779052734375, -0.17340087890625, 4.2598114013671875, 0.5999984741210938, -1.09722900390625, -1.5732955932617188, -4.3069000244140625, 0.37286376953125, 0.7719802856445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000107.npy"}
|
|
{"epoch": 0.1617535903250189, "step": 108, "batch_size": 64, "mean": 1.1162045001983643, "std": 2.1924288272857666, "min": -3.7652587890625, "p10": -1.4415374755859374, "median": 1.0558490753173828, "p90": 4.269588851928712, "max": 6.15362548828125, "pos_frac": 0.609375, "sample": [1.7064590454101562, 2.0680313110351562, 4.603118896484375, 1.8967208862304688, -0.017541885375976562, 3.9565963745117188, 1.80072021484375, 2.4771289825439453, 4.789886474609375, -1.7069931030273438, 0.22646331787109375, 5.106426239013672, 1.7644195556640625, -0.2923126220703125, 2.0316314697265625, 0.9745750427246094, -1.4535903930664062, -0.3596019744873047, 0.5465087890625, 1.6421699523925781, 2.927978515625, -0.29052734375, -0.5205135345458984, -0.153106689453125, 1.2533111572265625, -2.3110580444335938, 2.472034454345703, -3.6817169189453125, 6.15362548828125, 1.2047271728515625, -0.573822021484375, -0.6291351318359375, 2.852142333984375, -0.0695037841796875, 2.5168380737304688, 4.403728485107422, -0.24381446838378906, 0.009790420532226562, -3.7652587890625, 2.77130126953125, -1.4134140014648438, 5.004402160644531, 0.5130558013916016, -0.4570121765136719, -0.05211639404296875, 0.8574028015136719, -0.5654888153076172, 3.5000686645507812, 0.0296173095703125, -1.9247207641601562, -2.6288795471191406, -0.1631317138671875, -0.992889404296875, 2.1819915771484375, 2.7205123901367188, 1.1371231079101562, 1.9927825927734375, 5.893768310546875, 1.6044197082519531, 2.0293731689453125, -0.15001869201660156, 3.0618019104003906, -0.38666534423828125, 3.5572662353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000108.npy"}
|
|
{"epoch": 0.16326530612244897, "step": 109, "batch_size": 64, "mean": 1.4941083192825317, "std": 2.0251553058624268, "min": -1.9766387939453125, "p10": -0.8178327560424804, "median": 1.1255664825439453, "p90": 3.861911010742188, "max": 8.733657836914062, "pos_frac": 0.8125, "sample": [1.00384521484375, 3.9305801391601562, 5.90576171875, -0.23511123657226562, 2.8469696044921875, 1.6254253387451172, 3.6852684020996094, 2.6561126708984375, 1.624847412109375, 2.563579559326172, 1.091054916381836, 0.0479888916015625, 3.0097732543945312, 3.7016830444335938, 0.4814453125, 1.09881591796875, 2.2946834564208984, 0.4768104553222656, 0.1669769287109375, 1.7896671295166016, 1.2815093994140625, 3.500490188598633, -1.478759765625, 0.0022516250610351562, 0.6357574462890625, 8.733657836914062, -0.8267745971679688, 0.08752632141113281, 0.8038902282714844, 7.29840087890625, 1.0274295806884766, 1.1746368408203125, 0.3032951354980469, -0.31215667724609375, 0.9195480346679688, 1.2616443634033203, -0.9108543395996094, 1.01348876953125, 0.7309646606445312, 2.6938323974609375, -1.0014381408691406, 1.2920112609863281, 0.7952384948730469, 1.338531494140625, -0.1571369171142578, -0.8902511596679688, 0.0589599609375, 1.1523170471191406, -1.5355072021484375, -1.9766387939453125, 1.0680885314941406, 1.28961181640625, 1.49517822265625, 4.23876953125, 3.1044464111328125, 3.9848251342773438, 5.917724609375, 2.1195907592773438, 1.8151626586914062, 0.9288101196289062, -0.7396469116210938, -0.7969684600830078, 1.6884765625, 2.7268218994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000109.npy"}
|
|
{"epoch": 0.16477702191987906, "step": 110, "batch_size": 64, "mean": 0.6937993764877319, "std": 2.195509910583496, "min": -5.9715728759765625, "p10": -1.7986053466796874, "median": 0.7059326171875, "p90": 3.216325378417969, "max": 7.52081298828125, "pos_frac": 0.640625, "sample": [0.6608924865722656, 0.5560073852539062, -2.664520263671875, 1.1400070190429688, 0.19683837890625, 1.7305259704589844, 2.1476211547851562, 2.7479782104492188, -1.443450927734375, 3.6807937622070312, 3.1837692260742188, 1.8081283569335938, 0.5790557861328125, 3.39886474609375, -0.24132156372070312, -1.8419036865234375, -0.40148162841796875, 4.955108642578125, -0.7050418853759766, -3.7079734802246094, 0.4745025634765625, 0.8366165161132812, 1.1027069091796875, 2.5149765014648438, 0.17458724975585938, 1.02581787109375, 0.7227859497070312, -0.83917236328125, -0.1923065185546875, -1.8263092041015625, 1.1479721069335938, 2.6441192626953125, 1.6915817260742188, -1.1794471740722656, -1.3197593688964844, 1.0382614135742188, -1.7339630126953125, 3.6932220458984375, -0.14776229858398438, 1.6346855163574219, 3.2302780151367188, 1.4496078491210938, 0.715057373046875, -0.7665786743164062, -0.10999679565429688, 1.7939300537109375, 2.3927230834960938, -2.40869140625, -0.540130615234375, -5.9715728759765625, 0.696807861328125, 1.4509429931640625, 0.327880859375, -1.2856903076171875, 0.9658203125, 7.52081298828125, 2.905811309814453, 2.5253353118896484, 0.6017684936523438, 4.9437713623046875, -1.4805526733398438, 1.7758407592773438, -0.1722412109375, -3.400787353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000110.npy"}
|
|
{"epoch": 0.16628873771730915, "step": 111, "batch_size": 64, "mean": 1.0197620391845703, "std": 2.1787171363830566, "min": -3.966827392578125, "p10": -1.7176061630249022, "median": 1.2777891159057617, "p90": 3.931864929199219, "max": 6.324729919433594, "pos_frac": 0.6875, "sample": [-0.40669822692871094, -2.795389175415039, -0.6363525390625, 5.391143798828125, 2.1225013732910156, 1.3402023315429688, -1.1801033020019531, 2.41204833984375, 2.21063232421875, 1.7514839172363281, 2.3963470458984375, 1.7184524536132812, -1.6173934936523438, 1.2239303588867188, -1.6380462646484375, 1.312326431274414, 0.042987823486328125, 1.1890888214111328, -1.5548782348632812, 3.5348548889160156, 0.7348709106445312, 1.6893463134765625, 2.6944732666015625, 4.194400787353516, 2.426849365234375, 2.042612075805664, 1.7242965698242188, 4.105937957763672, 1.2432518005371094, -0.161346435546875, -0.5997600555419922, -1.7517032623291016, -3.966827392578125, -2.36468505859375, 1.3183021545410156, 0.70855712890625, 1.368865966796875, 3.0234298706054688, -1.1236495971679688, 3.9747467041015625, 3.959747314453125, 3.6201934814453125, 3.8668060302734375, -0.638671875, 0.6299839019775391, 0.21688461303710938, 1.7734737396240234, 6.324729919433594, -2.4038925170898438, 1.9560089111328125, 4.123268127441406, 3.218952178955078, 2.295797348022461, -0.6711292266845703, -3.8963279724121094, 0.5107154846191406, -1.8824462890625, 0.18415069580078125, -0.4329833984375, 2.0412673950195312, 2.8865966796875, -0.9400253295898438, 0.24369430541992188, 0.17887115478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000111.npy"}
|
|
{"epoch": 0.16780045351473924, "step": 112, "batch_size": 64, "mean": 1.3116695880889893, "std": 2.103163003921509, "min": -5.2965240478515625, "p10": -1.188433265686035, "median": 1.4011201858520508, "p90": 3.870156860351563, "max": 5.5740814208984375, "pos_frac": 0.765625, "sample": [3.928375244140625, 4.63916015625, 5.24200439453125, 1.8499946594238281, -0.24974822998046875, 1.6392440795898438, 5.5740814208984375, 1.9814529418945312, 3.4860916137695312, 3.635345458984375, -0.12531280517578125, 1.3214874267578125, -1.0807228088378906, 2.1219711303710938, 0.377410888671875, -3.6540069580078125, 0.464141845703125, 0.97760009765625, -3.2071304321289062, 0.2881202697753906, 1.8805770874023438, -1.560272216796875, 2.90411376953125, 3.1144866943359375, 0.38883209228515625, 2.396739959716797, 0.858367919921875, 0.6292228698730469, 2.934814453125, 1.3295707702636719, 3.931060791015625, 1.4039382934570312, -1.207326889038086, 0.752349853515625, 3.1505661010742188, 0.5441436767578125, 1.3983020782470703, -0.6092758178710938, 4.9559173583984375, 0.4541893005371094, -0.6415081024169922, 2.5477294921875, 2.8024253845214844, 1.4186420440673828, -1.14434814453125, 4.503326416015625, 3.73431396484375, 3.3220748901367188, -0.6824817657470703, 0.5323829650878906, -5.2965240478515625, 2.6160812377929688, -0.17557525634765625, 1.7059478759765625, 2.8280487060546875, 1.2729949951171875, 0.06546783447265625, -1.6028575897216797, 0.97674560546875, 1.48992919921875, 1.7183990478515625, -1.2495574951171875, 1.8267593383789062, 2.5185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000112.npy"}
|
|
{"epoch": 0.1693121693121693, "step": 113, "batch_size": 64, "mean": 1.0235655307769775, "std": 2.1895675659179688, "min": -7.1575469970703125, "p10": -1.4108566284179687, "median": 0.9699125289916992, "p90": 3.826091194152833, "max": 4.8688507080078125, "pos_frac": 0.734375, "sample": [-0.8274078369140625, -2.6619720458984375, 0.10747528076171875, 0.422210693359375, -0.7144660949707031, 4.465496063232422, 1.5520172119140625, -1.41473388671875, 1.5943984985351562, 3.645925521850586, 0.7824935913085938, -0.0374755859375, -0.8934249877929688, 3.0780715942382812, -2.043975830078125, 1.0259857177734375, 0.9138393402099609, 4.698040008544922, -1.7248153686523438, -2.611623764038086, 0.45343780517578125, 1.1959304809570312, 3.2049102783203125, -2.555419921875, -1.1434288024902344, 1.5139617919921875, 3.0776596069335938, 1.6773052215576172, 2.1623458862304688, -0.0895538330078125, 3.2975406646728516, 4.049041748046875, 0.010141372680664062, -1.246429443359375, 2.089508056640625, 4.820402145385742, -7.1575469970703125, 1.7291316986083984, 3.158905029296875, 0.14572906494140625, 0.8564224243164062, 4.117399215698242, 0.50103759765625, 3.3090591430664062, -1.0950126647949219, 1.876068115234375, 1.455169677734375, 0.2747650146484375, -1.4018096923828125, 3.463207244873047, 1.176513671875, 0.79071044921875, 1.3168888092041016, 3.532318115234375, 3.9033050537109375, -0.3090381622314453, 2.3215103149414062, 0.2931022644042969, 1.1420135498046875, 0.5694503784179688, 2.0662765502929688, 0.5871009826660156, 0.14326095581054688, 4.8688507080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000113.npy"}
|
|
{"epoch": 0.1708238851095994, "step": 114, "batch_size": 64, "mean": 1.692500352859497, "std": 2.203599214553833, "min": -2.390420913696289, "p10": -0.829126739501953, "median": 1.0442848205566406, "p90": 4.706035232543946, "max": 7.5013885498046875, "pos_frac": 0.75, "sample": [-0.8533401489257812, 0.6898422241210938, 1.14141845703125, 2.0688514709472656, 3.787372589111328, 0.5220108032226562, 2.3239898681640625, 0.9471511840820312, 3.532623291015625, -1.5629291534423828, -0.3755607604980469, 3.010162353515625, 2.3326263427734375, -0.3092803955078125, 0.3661155700683594, -1.1850814819335938, 2.5912628173828125, 0.8351821899414062, 0.4080333709716797, 0.4373741149902344, 2.5101356506347656, 0.7155380249023438, 7.5013885498046875, 0.25219154357910156, 0.4377269744873047, 3.8602752685546875, 2.0684890747070312, 3.4149646759033203, -0.7726287841796875, 4.067914962768555, -0.0543060302734375, 1.23193359375, 5.502368927001953, 2.7653656005859375, 3.3352127075195312, 0.5291862487792969, 3.3622360229492188, 2.4051589965820312, 4.273643493652344, -2.2533798217773438, 6.565948486328125, 4.638912200927734, 0.9142913818359375, -0.37394142150878906, 0.5001106262207031, 0.38457489013671875, 5.431732177734375, 0.7759628295898438, -1.1185359954833984, -0.44451904296875, -0.317657470703125, 4.27734375, 2.6636600494384766, 4.42132568359375, 4.896583557128906, 4.73480224609375, 1.1512832641601562, -0.012004852294921875, -1.1494827270507812, -2.390420913696289, 4.809059143066406, -0.540740966796875, 1.8537216186523438, 0.786773681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000114.npy"}
|
|
{"epoch": 0.17233560090702948, "step": 115, "batch_size": 64, "mean": 1.2447755336761475, "std": 2.2418124675750732, "min": -3.1519927978515625, "p10": -1.1872596740722656, "median": 0.8868179321289062, "p90": 3.902999877929689, "max": 8.331527709960938, "pos_frac": 0.71875, "sample": [-0.6206817626953125, 1.2052841186523438, 2.2268447875976562, -3.1519927978515625, 5.806976318359375, 2.2530574798583984, 1.4979400634765625, 2.783283233642578, 0.9551162719726562, 8.331527709960938, 1.630666732788086, -1.084320068359375, 8.276763916015625, -1.2313766479492188, -0.30196380615234375, 0.8185195922851562, 0.021860122680664062, 1.54034423828125, 0.306182861328125, 0.4898681640625, 2.9425277709960938, 0.7875747680664062, -0.447509765625, -0.4164581298828125, -2.199848175048828, 1.930105209350586, -0.4756317138671875, 1.267669677734375, -1.7659225463867188, 4.416912078857422, -1.2417144775390625, 0.4673576354980469, 0.5451221466064453, 1.8286590576171875, 1.1936492919921875, 3.53887939453125, -0.225799560546875, 2.4914493560791016, 0.6517105102539062, 1.6531982421875, 3.136871337890625, -2.9828453063964844, 2.7246017456054688, 0.71270751953125, 4.059051513671875, 1.4769840240478516, 3.0321731567382812, 2.1270313262939453, 0.3052177429199219, 1.316925048828125, 0.7210960388183594, 0.6361179351806641, 5.582490921020508, -0.3495006561279297, 0.2455425262451172, -0.44751930236816406, -0.49919891357421875, -0.5128440856933594, -1.8005142211914062, 1.4389114379882812, 2.278430938720703, 0.5606136322021484, 1.9377574920654297, 5.2696990966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000115.npy"}
|
|
{"epoch": 0.17384731670445955, "step": 116, "batch_size": 64, "mean": 1.5374069213867188, "std": 1.859175682067871, "min": -3.4962730407714844, "p10": -0.5148702621459961, "median": 1.4324455261230469, "p90": 3.8296775817871094, "max": 5.8302154541015625, "pos_frac": 0.8125, "sample": [2.85498046875, 0.0624237060546875, 1.4402732849121094, 5.506660461425781, 1.6960220336914062, 0.366851806640625, 0.293487548828125, 0.6729068756103516, 2.327016830444336, 3.2400970458984375, 1.8509521484375, -0.6238193511962891, -0.39162445068359375, 3.496124267578125, 1.1464366912841797, -0.5445442199707031, -0.0004634857177734375, 0.1036834716796875, 3.8028945922851562, 0.46222686767578125, -3.4962730407714844, 2.3332443237304688, 3.841156005859375, 1.4246177673339844, -0.0015735626220703125, 2.168659210205078, 1.86663818359375, 4.718822479248047, 2.95428466796875, 1.5267181396484375, -0.5669784545898438, 0.4592113494873047, -0.4456310272216797, 0.7679405212402344, 5.8302154541015625, 1.140411376953125, 3.462646484375, 3.594573974609375, 0.3479576110839844, 0.4378089904785156, 1.2762603759765625, 1.58154296875, 3.7864761352539062, 1.6609420776367188, 4.3949737548828125, 2.6907196044921875, 3.8994140625, -0.36861419677734375, 0.07689666748046875, 2.2291336059570312, 0.22236251831054688, 3.6545867919921875, 0.3698272705078125, -1.1342315673828125, 0.8600807189941406, 2.2077255249023438, 0.09495162963867188, 2.4495773315429688, 0.03143119812011719, 2.816650390625, 3.6729278564453125, -1.2406635284423828, 4.6845703125, -1.6505413055419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000116.npy"}
|
|
{"epoch": 0.17535903250188964, "step": 117, "batch_size": 64, "mean": 1.2247787714004517, "std": 2.15067195892334, "min": -6.159233093261719, "p10": -0.853026008605957, "median": 1.2459297180175781, "p90": 4.143684387207032, "max": 6.7253570556640625, "pos_frac": 0.75, "sample": [2.258331298828125, 4.266210556030273, 1.302764892578125, 0.042987823486328125, 1.251556396484375, 2.078287124633789, 2.4748611450195312, 1.3086318969726562, 0.189788818359375, 1.8667545318603516, 0.10694122314453125, -0.738739013671875, 3.26947021484375, -0.632171630859375, 1.2403030395507812, 3.204315185546875, -0.0642547607421875, 1.89093017578125, 3.0630111694335938, 3.9844818115234375, -0.77191162109375, 1.7672309875488281, 2.0659408569335938, 4.2479400634765625, -0.8292217254638672, -0.40274810791015625, 0.444061279296875, 2.1395797729492188, 0.7146778106689453, 6.7253570556640625, -0.02570343017578125, 5.39337158203125, 3.8403892517089844, -0.9177150726318359, 1.6553802490234375, 2.1669883728027344, 2.158061981201172, 4.402885437011719, 1.4141387939453125, 4.2119140625, 0.8091049194335938, 0.32111358642578125, -0.5981788635253906, 1.7565879821777344, -0.8632278442382812, -6.159233093261719, 0.8220539093017578, 0.5717811584472656, 3.578125, 0.9540176391601562, 0.1187744140625, 1.5959854125976562, 2.327850341796875, -1.9246482849121094, 4.7890167236328125, -3.149383544921875, -1.6089935302734375, -0.4989433288574219, -2.613433837890625, 0.19863128662109375, 3.2475433349609375, 0.978546142578125, 0.04554557800292969, 0.9221267700195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000117.npy"}
|
|
{"epoch": 0.17687074829931973, "step": 118, "batch_size": 64, "mean": 1.601295828819275, "std": 2.0311434268951416, "min": -3.0253448486328125, "p10": -0.7811887741088868, "median": 1.8787651062011719, "p90": 4.095365905761719, "max": 5.821903228759766, "pos_frac": 0.734375, "sample": [2.3430328369140625, 1.1931915283203125, 3.40301513671875, -0.9327850341796875, -3.0253448486328125, 1.9791755676269531, 2.6165695190429688, 0.963165283203125, 2.9433021545410156, -0.7857246398925781, 2.05206298828125, 2.5135955810546875, 0.4662971496582031, 4.979228973388672, 0.7294998168945312, -0.6244049072265625, -2.4710540771484375, 1.94757080078125, -0.19194412231445312, 1.8099594116210938, 0.30786895751953125, 2.7578582763671875, 2.6544342041015625, 4.088775634765625, 1.6052093505859375, 0.08014297485351562, -0.4516448974609375, -0.6883316040039062, 2.8286571502685547, -0.24218368530273438, 1.7819442749023438, 2.096761703491211, 4.7093505859375, -0.627227783203125, 0.8463897705078125, 1.999908447265625, 0.7058944702148438, 5.815582275390625, 3.6890716552734375, -1.8447799682617188, -1.6125869750976562, 4.0981903076171875, -0.42950439453125, 3.262542724609375, 0.5138015747070312, 2.6966934204101562, -0.7463912963867188, 3.21875, 3.4681396484375, 3.5848846435546875, 2.5166397094726562, 2.9530982971191406, 0.0959625244140625, 5.821903228759766, 4.764240264892578, 1.2215042114257812, -0.8145751953125, 3.0449676513671875, 4.060441970825195, 4.180398941040039, 2.847930908203125, -0.7706050872802734, 0.7791538238525391, -0.29473876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000118.npy"}
|
|
{"epoch": 0.17838246409674982, "step": 119, "batch_size": 64, "mean": 1.0430182218551636, "std": 2.3803887367248535, "min": -4.3639984130859375, "p10": -2.1548940658569338, "median": 0.8023891448974609, "p90": 4.094378662109376, "max": 5.333015441894531, "pos_frac": 0.703125, "sample": [0.8580951690673828, 3.5953216552734375, -1.3580970764160156, 2.120708465576172, -1.9713058471679688, 0.7849769592285156, 3.799184799194336, 1.46771240234375, -1.0580062866210938, 0.19878768920898438, -1.480539321899414, 0.07614898681640625, 1.369384765625, 2.359619140625, 3.537382125854492, -4.3639984130859375, 0.11882781982421875, -3.1277313232421875, 0.7742404937744141, 0.47833251953125, -1.530069351196289, 4.705425262451172, -0.124298095703125, 2.2182559967041016, 5.020088195800781, 1.3905487060546875, -2.4434165954589844, 2.9305076599121094, 4.2043609619140625, 3.8377532958984375, 3.4782562255859375, -2.16552734375, 2.1267528533935547, 2.637754440307617, -0.7122344970703125, 4.23065185546875, 0.38822174072265625, 0.6748886108398438, 0.7733860015869141, 5.333015441894531, 3.12835693359375, 2.846099853515625, -1.3126411437988281, 0.8198013305664062, -2.3265609741210938, -2.1300830841064453, 3.3399200439453125, 3.138904571533203, 3.6890411376953125, 0.8211898803710938, 3.0634765625, -4.144287109375, -2.4371566772460938, -0.9447097778320312, 0.7526741027832031, 0.5703468322753906, 2.6746063232421875, 5.073478698730469, 4.68121337890625, 0.41378021240234375, 1.1930065155029297, 0.5349578857421875, -1.1995582580566406, -0.64605712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000119.npy"}
|
|
{"epoch": 0.17989417989417988, "step": 120, "batch_size": 64, "mean": 1.5832512378692627, "std": 2.0993611812591553, "min": -3.1000900268554688, "p10": -0.874344253540039, "median": 1.2104301452636719, "p90": 4.142362213134766, "max": 7.238487243652344, "pos_frac": 0.828125, "sample": [-1.363311767578125, 1.1841163635253906, -1.9115562438964844, 3.9242477416992188, 1.7187652587890625, 0.570709228515625, 2.771240234375, 1.2367439270019531, 0.7233619689941406, -3.1000900268554688, 1.546661376953125, 0.7647361755371094, 1.0564346313476562, 1.3070068359375, 0.9695510864257812, 7.016204833984375, 3.352386474609375, 0.7325725555419922, 1.0303497314453125, 1.327850341796875, 4.328643798828125, 0.9905319213867188, 2.0418128967285156, 2.014863967895508, 0.317626953125, 1.5984611511230469, 3.2476043701171875, 2.4950408935546875, 1.5940399169921875, 3.0764694213867188, 7.238487243652344, -0.3784027099609375, 4.041206359863281, 0.20903968811035156, 4.007499694824219, 0.5512123107910156, 2.43798828125, -0.13356781005859375, 4.4331817626953125, 0.5183181762695312, -0.9189109802246094, 1.142669677734375, 0.7044677734375, 7.1147918701171875, -0.6670284271240234, 0.338897705078125, -2.7917022705078125, 3.6453094482421875, 2.6566848754882812, 0.8521575927734375, 0.7075386047363281, -0.9981803894042969, 4.1857147216796875, 0.3473663330078125, 2.8783912658691406, -0.770355224609375, 1.2587890625, 3.8959121704101562, 0.30473899841308594, -1.0287551879882812, 2.3127899169921875, 0.4929218292236328, 4.345508575439453, 1.8303298950195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000120.npy"}
|
|
{"epoch": 0.18140589569160998, "step": 121, "batch_size": 64, "mean": 1.3836126327514648, "std": 2.0267493724823, "min": -2.4830322265625, "p10": -0.8194860458374024, "median": 1.1190299987792969, "p90": 3.944544792175294, "max": 8.31390380859375, "pos_frac": 0.734375, "sample": [4.2425537109375, 2.8355712890625, -0.5110435485839844, -0.7933025360107422, 4.048980712890625, 3.490753173828125, 4.351348876953125, -0.8307075500488281, 2.5938072204589844, -0.27880096435546875, 0.5983047485351562, -0.6589813232421875, 7.3590240478515625, 0.8440208435058594, 1.3196563720703125, -0.08823966979980469, 3.7008609771728516, -2.4830322265625, 1.791534423828125, -1.2865180969238281, 1.8447647094726562, 0.9302024841308594, 2.9635467529296875, 1.4340667724609375, 1.2373886108398438, 2.4130821228027344, 0.8109855651855469, -1.4154853820800781, 1.1362075805664062, 2.0599288940429688, -0.25536346435546875, 8.31390380859375, 2.3217735290527344, 1.0338058471679688, -0.4998283386230469, 0.30687713623046875, 0.199188232421875, 2.086822509765625, 2.709625244140625, 1.0326156616210938, 0.7439651489257812, 0.4597015380859375, 1.1018524169921875, 2.597919464111328, 1.3724899291992188, 0.9391098022460938, -1.7900314331054688, 4.862266540527344, 2.2119598388671875, 1.4109134674072266, -0.5718994140625, 0.14187240600585938, 1.9125823974609375, 2.758220672607422, 1.589559555053711, 5.59814453125, 1.2168445587158203, -1.0547103881835938, 0.8418788909912109, -0.6469650268554688, -0.3160514831542969, 2.441600799560547, 0.743499755859375, -0.9234161376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000121.npy"}
|
|
{"epoch": 0.18291761148904007, "step": 122, "batch_size": 64, "mean": 1.6851723194122314, "std": 2.514207601547241, "min": -3.595672607421875, "p10": -0.8752344131469727, "median": 1.3415470123291016, "p90": 4.176995277404785, "max": 11.312881469726562, "pos_frac": 0.796875, "sample": [11.312881469726562, -1.1770095825195312, -0.7365570068359375, 5.282539367675781, -3.595672607421875, -0.8661861419677734, 2.649688720703125, 2.7852554321289062, 1.057138442993164, 1.9630851745605469, 3.198925018310547, 1.50433349609375, 0.77203369140625, 0.4033966064453125, -0.1611614227294922, 2.84356689453125, 0.7480621337890625, 2.1544570922851562, -1.8209190368652344, 3.720661163330078, 0.5275440216064453, 8.43487548828125, 1.8571281433105469, 3.6753387451171875, 6.006946563720703, 0.9631805419921875, 0.9584636688232422, -1.5427284240722656, 1.861572265625, 3.4672012329101562, 0.006011962890625, 3.5502700805664062, 0.15804481506347656, 1.18719482421875, -2.9036636352539062, 2.4313278198242188, 1.5831718444824219, -0.8791122436523438, 2.502126693725586, 0.8295745849609375, -0.33542823791503906, 1.4533462524414062, -2.074695587158203, 5.2150421142578125, 1.8407745361328125, 4.085290908813477, 1.6711654663085938, 1.33428955078125, 0.46765899658203125, 1.1349258422851562, 1.8276824951171875, 2.3013172149658203, 0.5239353179931641, 0.7149887084960938, -0.02005767822265625, 1.3488044738769531, 0.3953666687011719, 0.10800933837890625, 0.5819931030273438, 3.586528778076172, 4.216297149658203, 4.0560150146484375, -0.345367431640625, 7.0501556396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000122.npy"}
|
|
{"epoch": 0.18442932728647016, "step": 123, "batch_size": 64, "mean": 1.4770545959472656, "std": 2.4226620197296143, "min": -6.204376220703125, "p10": -1.4750797271728515, "median": 1.4754257202148438, "p90": 4.122979354858399, "max": 8.028480529785156, "pos_frac": 0.765625, "sample": [4.545341491699219, 1.4415435791015625, 0.35614013671875, 0.873199462890625, -2.3177528381347656, -0.17313575744628906, 1.3451805114746094, -0.0156402587890625, 8.028480529785156, 4.16230583190918, 0.32366180419921875, 3.374298095703125, -0.26496124267578125, -2.17071533203125, 3.6664276123046875, 2.90997314453125, 1.683013916015625, 2.6642284393310547, -2.1697540283203125, 2.529937744140625, 2.0020294189453125, 5.792640686035156, 2.1308517456054688, 4.031217575073242, 2.6172409057617188, 3.723114013671875, 2.1965866088867188, -0.4025897979736328, 2.808868408203125, 1.57958984375, 0.012281417846679688, 6.10076904296875, -3.3079795837402344, 2.974405288696289, -1.5009193420410156, -2.139862060546875, 2.3147506713867188, -0.5153694152832031, 0.03703880310058594, 1.2934074401855469, 0.9017181396484375, 0.7126731872558594, 2.3139896392822266, 1.5498504638671875, 1.2473163604736328, 2.8014755249023438, -0.5797901153564453, 0.03352928161621094, 0.5388965606689453, 4.704458236694336, 1.509307861328125, 1.7540664672851562, 0.5448989868164062, 3.3861312866210938, -6.204376220703125, 3.51348876953125, -0.8298702239990234, 1.4292564392089844, -1.4147872924804688, 3.442554473876953, 1.013071060180664, 1.6429176330566406, 6.861724853515625, 1.1191482543945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000123.npy"}
|
|
{"epoch": 0.18594104308390022, "step": 124, "batch_size": 64, "mean": 2.0230560302734375, "std": 2.3154640197753906, "min": -3.5246238708496094, "p10": -0.6984989166259765, "median": 1.9995498657226562, "p90": 5.376912689208985, "max": 7.930450439453125, "pos_frac": 0.8125, "sample": [-0.7076072692871094, 7.7476959228515625, 2.4487686157226562, -3.5246238708496094, 1.9490642547607422, 1.9544906616210938, 7.930450439453125, 3.7970504760742188, 3.8195343017578125, 5.7183837890625, 3.1228561401367188, 2.8674964904785156, 1.076089859008789, 5.3132476806640625, 0.6123580932617188, 0.5381889343261719, 1.3252182006835938, 3.6911544799804688, 0.6797714233398438, -1.3243598937988281, 6.887939453125, 2.4877090454101562, 3.0264511108398438, -0.67724609375, 0.9165916442871094, 1.7821044921875, 1.1693248748779297, 0.1081695556640625, 2.4376907348632812, -0.9922332763671875, 2.906351089477539, 1.992095947265625, 2.56884765625, 4.25830078125, 2.441761016845703, 2.1408233642578125, 5.675289154052734, -2.0977020263671875, 6.432830810546875, 1.9871063232421875, -0.16329193115234375, 5.404197692871094, -0.22772216796875, 4.196678161621094, 2.135772705078125, 2.154815673828125, 2.2043914794921875, 1.1033248901367188, -1.0487804412841797, 3.421171188354492, 3.758544921875, 0.3623390197753906, 0.27395057678222656, 2.4963035583496094, 1.0361785888671875, 0.7448577880859375, 0.9704971313476562, -0.105987548828125, -1.5746879577636719, 2.0070037841796875, -0.5984535217285156, 2.7708663940429688, 0.4767951965332031, 3.1893768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000124.npy"}
|
|
{"epoch": 0.1874527588813303, "step": 125, "batch_size": 64, "mean": 1.709364414215088, "std": 2.418764352798462, "min": -2.368133544921875, "p10": -1.2770910263061521, "median": 1.6117944717407227, "p90": 4.668737030029297, "max": 8.630523681640625, "pos_frac": 0.71875, "sample": [4.268096923828125, 4.213584899902344, 2.344635009765625, 1.6082916259765625, 1.192840576171875, -1.1327152252197266, 8.630523681640625, 0.06476020812988281, 2.315399169921875, 2.282003402709961, 6.825592041015625, 0.18578338623046875, -1.00091552734375, 3.257110595703125, 0.7025909423828125, 4.983577728271484, 3.2691650390625, 3.4483795166015625, 1.2934951782226562, -0.11622238159179688, 6.022525787353516, -0.2717437744140625, 2.6219406127929688, 3.620086669921875, -0.3801155090332031, 1.746114730834961, 4.702064514160156, -0.8670234680175781, -2.0191879272460938, 0.861846923828125, 3.2103271484375, 0.27814483642578125, -1.1035003662109375, 4.590972900390625, 3.6126651763916016, -1.3389663696289062, 3.3929309844970703, 1.1857223510742188, 4.322723388671875, 4.365631103515625, 5.8576507568359375, -0.02516937255859375, 1.0482940673828125, 2.6303939819335938, 2.339344024658203, -1.9450302124023438, 0.6966400146484375, 5.834381103515625, 0.07023048400878906, -0.00030517578125, -0.622711181640625, 2.4109554290771484, 2.650737762451172, 0.39540863037109375, -2.1136398315429688, 1.9545364379882812, 0.32114410400390625, -1.6666793823242188, 2.219451904296875, -0.7462291717529297, 1.6152973175048828, -1.4821205139160156, -2.368133544921875, 3.1357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000125.npy"}
|
|
{"epoch": 0.1889644746787604, "step": 126, "batch_size": 64, "mean": 1.2727999687194824, "std": 2.912670612335205, "min": -5.838890075683594, "p10": -1.5759771347045897, "median": 1.0704154968261719, "p90": 4.2773395538330075, "max": 13.284652709960938, "pos_frac": 0.6875, "sample": [1.944915771484375, 0.2408294677734375, 4.286296844482422, 1.314260482788086, -0.4845123291015625, 4.754352569580078, 5.1046905517578125, 0.0072460174560546875, 0.9766006469726562, -1.1139717102050781, 3.755460739135742, 3.9171600341796875, -4.628654479980469, 1.0560760498046875, 3.500885009765625, 1.3455123901367188, -0.4240875244140625, -1.2658958435058594, -0.17339706420898438, -0.9610824584960938, -0.2804412841796875, 3.9794998168945312, 1.3440494537353516, 1.433823585510254, -0.19069862365722656, -0.1581878662109375, 1.820648193359375, -1.8611831665039062, 1.0847549438476562, -1.6485309600830078, 0.027191162109375, 0.3414306640625, -2.423858642578125, 2.417325973510742, -1.4066848754882812, -1.1134452819824219, -0.5470867156982422, 13.284652709960938, 2.6053638458251953, -5.838890075683594, 0.6821060180664062, 2.3883895874023438, 1.6993045806884766, 1.8197822570800781, 2.3921432495117188, 2.7365570068359375, 3.3032684326171875, 7.1435089111328125, 1.2323532104492188, 2.8783950805664062, 0.9116783142089844, 5.66302490234375, 2.1078262329101562, 2.3663063049316406, 0.6526641845703125, 0.5811004638671875, -1.9803619384765625, 4.256439208984375, 0.9280929565429688, 1.97222900390625, -1.0129852294921875, 7.086189270019531, -4.527494430541992, 0.1562652587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000126.npy"}
|
|
{"epoch": 0.19047619047619047, "step": 127, "batch_size": 64, "mean": 1.9217140674591064, "std": 2.2346746921539307, "min": -3.1268768310546875, "p10": -0.7038274765014647, "median": 1.7993106842041016, "p90": 5.358852767944336, "max": 7.360618591308594, "pos_frac": 0.796875, "sample": [-0.925750732421875, 5.16229248046875, 2.1009063720703125, 3.371978759765625, 1.1230545043945312, 2.819915771484375, 1.9727935791015625, 5.277027130126953, 0.28891754150390625, 5.6036224365234375, 3.7783031463623047, 1.9980010986328125, 1.790313720703125, 4.795478820800781, 3.292877197265625, 1.0703353881835938, 3.0670318603515625, 2.564617156982422, -0.8239517211914062, 1.8083076477050781, 2.9700775146484375, -3.1268768310546875, 2.7615585327148438, 5.650760650634766, 2.4809951782226562, -2.0150070190429688, 2.0890579223632812, 0.8705024719238281, -1.0322113037109375, 1.329925537109375, 0.14747238159179688, 1.3305435180664062, 0.2336292266845703, 3.7746505737304688, -0.13051795959472656, 4.307682037353516, 0.08267021179199219, 5.5807647705078125, 4.89019775390625, 7.360618591308594, 0.4101104736328125, 3.3463668823242188, 0.6413192749023438, -1.4511356353759766, 3.0158233642578125, 1.2584457397460938, -0.22259521484375, -0.6032180786132812, 6.083580017089844, -0.7362823486328125, 1.8261566162109375, 5.3939208984375, 1.4115333557128906, 0.31298828125, 0.4614906311035156, 1.2212982177734375, 1.6133880615234375, 0.41162109375, -0.19933700561523438, 2.035175323486328, 2.4349327087402344, -0.6280994415283203, 5.607532501220703, -0.3478736877441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000127.npy"}
|
|
{"epoch": 0.19198790627362056, "step": 128, "batch_size": 64, "mean": 1.5790255069732666, "std": 2.917693614959717, "min": -6.045310974121094, "p10": -1.8094669342041014, "median": 1.7289180755615234, "p90": 5.86676483154297, "max": 7.3140716552734375, "pos_frac": 0.6875, "sample": [1.3965911865234375, 4.83935546875, 1.1958332061767578, 2.733152389526367, 1.9411277770996094, 1.2341766357421875, 1.963226318359375, 2.36639404296875, 1.2353248596191406, 6.0497283935546875, 1.5682373046875, -4.764312744140625, 4.640541076660156, -1.0613231658935547, 2.5403270721435547, -1.6188430786132812, 0.6059761047363281, -2.7395248413085938, -3.0538101196289062, 5.439849853515625, 4.1464691162109375, -3.269287109375, -1.5407638549804688, 7.018463134765625, 6.989288330078125, 2.758331298828125, 0.11016082763671875, 6.558841705322266, -0.06173896789550781, 4.552423477172852, 0.3959503173828125, 3.2323150634765625, 1.3642501831054688, 3.2069015502929688, 3.293750762939453, 1.811126708984375, -0.06296157836914062, 6.551326751708984, -2.7786598205566406, -1.3178253173828125, -1.8911628723144531, 1.7243156433105469, 2.117664337158203, 0.08574104309082031, -1.2125988006591797, -0.04494476318359375, 2.140338897705078, 3.3050155639648438, 3.6555862426757812, -1.2216567993164062, 2.3962326049804688, 1.7335205078125, 7.3140716552734375, -0.0010833740234375, 3.534564971923828, 1.184286117553711, -0.362457275390625, 1.978759765625, 6.770538330078125, -0.5544719696044922, -6.045310974121094, -0.7598037719726562, 3.8136253356933594, 1.9264678955078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000128.npy"}
|
|
{"epoch": 0.19349962207105065, "step": 129, "batch_size": 64, "mean": 1.3946131467819214, "std": 2.657816171646118, "min": -5.503669738769531, "p10": -1.4773643493652344, "median": 1.0461349487304688, "p90": 4.864838600158691, "max": 9.421985626220703, "pos_frac": 0.640625, "sample": [3.1065826416015625, -0.38909912109375, 2.8078060150146484, -1.4328079223632812, -0.4953937530517578, 5.440341949462891, -2.453155517578125, -1.566009521484375, -0.732574462890625, 4.6436767578125, 3.889606475830078, 1.5750312805175781, 2.9659194946289062, -0.4604949951171875, -1.1321868896484375, -1.8920822143554688, 4.685874938964844, 0.4441967010498047, 5.355079650878906, 4.7562713623046875, 0.3230133056640625, -1.4964599609375, 1.332672119140625, 0.9880752563476562, -1.2276878356933594, 6.1212158203125, 3.274782180786133, -0.06346893310546875, 2.396686553955078, 3.9234619140625, 2.553131103515625, 0.2402496337890625, -1.8805084228515625, 4.911367416381836, -0.06592941284179688, 1.7792510986328125, -2.8106689453125, 0.50909423828125, 0.49941062927246094, 5.921850204467773, 3.0303115844726562, 0.022571563720703125, -0.5487747192382812, 1.1041946411132812, 2.5132064819335938, 1.9443435668945312, 1.2936935424804688, 9.421985626220703, 3.4962387084960938, 4.9766693115234375, 1.4377021789550781, 4.013679504394531, 4.291725158691406, -0.8284263610839844, -0.011472702026367188, -1.2438507080078125, 3.3235397338867188, -5.503669738769531, -1.355712890625, -0.34604835510253906, 0.5367050170898438, -0.692474365234375, 0.1591968536376953, 1.8737907409667969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000129.npy"}
|
|
{"epoch": 0.19501133786848074, "step": 130, "batch_size": 64, "mean": 1.6954035758972168, "std": 2.3036868572235107, "min": -2.5506134033203125, "p10": -1.1087615966796873, "median": 1.5344047546386719, "p90": 4.413844871520997, "max": 8.698677062988281, "pos_frac": 0.765625, "sample": [2.7471580505371094, 0.8134498596191406, 6.682762145996094, 3.363800048828125, 4.996501922607422, 3.2234649658203125, -0.95928955078125, 4.31151008605957, 1.3184661865234375, 0.23780250549316406, -1.172821044921875, 3.0181427001953125, 3.164276123046875, 0.5492916107177734, -0.89764404296875, 2.14923095703125, 1.986440658569336, -0.7687759399414062, 1.8948287963867188, 4.0865020751953125, 3.1359119415283203, 3.2884979248046875, 5.281410217285156, 4.066680908203125, -0.7479400634765625, -1.4232902526855469, -0.3068084716796875, -0.3462066650390625, -2.002756118774414, 0.8618240356445312, 3.0825958251953125, 4.45770263671875, 3.1455154418945312, 2.007120132446289, 0.02860260009765625, 8.698677062988281, 3.2747039794921875, 1.3845672607421875, 5.64764404296875, 0.3856086730957031, -0.3800544738769531, 4.114830017089844, -1.5989608764648438, -2.5506134033203125, 2.0043258666992188, 2.1839866638183594, 1.6685867309570312, 0.13519287109375, 3.4616317749023438, 0.01644134521484375, -1.9432201385498047, 1.9133129119873047, 0.225616455078125, 2.7049026489257812, -2.159881591796875, 1.3245582580566406, -0.07758331298828125, 1.3314476013183594, 0.5793609619140625, 1.4002227783203125, 5.880622863769531, 2.194059371948242, 0.8271274566650391, 0.5847549438476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000130.npy"}
|
|
{"epoch": 0.1965230536659108, "step": 131, "batch_size": 64, "mean": 2.0382332801818848, "std": 3.0714690685272217, "min": -6.16278076171875, "p10": -1.9039886474609373, "median": 2.0890865325927734, "p90": 5.369071197509766, "max": 8.229940414428711, "pos_frac": 0.765625, "sample": [0.3162040710449219, 2.6494083404541016, 0.016139984130859375, -0.3880290985107422, -1.5107383728027344, 8.137908935546875, 1.6962642669677734, 2.6989707946777344, -1.0900516510009766, 5.1605072021484375, 2.6574935913085938, 0.7049102783203125, -1.9430770874023438, -1.8127822875976562, 1.4015312194824219, 3.537649154663086, -3.7223358154296875, 1.7813339233398438, 4.5426788330078125, 0.243804931640625, -2.0091323852539062, -2.6865997314453125, 5.2780914306640625, 3.7023544311523438, 2.592071533203125, 0.36470794677734375, 2.4367752075195312, -3.5215911865234375, 0.6963996887207031, -0.5135955810546875, 3.321575164794922, 5.1981048583984375, 6.3129425048828125, 4.546133041381836, 5.1296234130859375, 5.2799835205078125, 0.8456153869628906, 5.960174560546875, 2.148387908935547, 5.3936004638671875, 2.1994590759277344, 8.186637878417969, -1.2539615631103516, 5.076263427734375, 2.2767181396484375, 0.17371368408203125, 0.8283329010009766, 5.311836242675781, -2.5112152099609375, 4.901824951171875, -0.35967063903808594, -0.06146240234375, 3.524860382080078, 0.36095428466796875, -6.16278076171875, 4.715721130371094, 0.9206447601318359, 1.1904869079589844, 4.7851409912109375, 0.221405029296875, 2.02978515625, 5.803985595703125, 8.229940414428711, 4.504886627197266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000131.npy"}
|
|
{"epoch": 0.1980347694633409, "step": 132, "batch_size": 64, "mean": 1.7893847227096558, "std": 2.8031527996063232, "min": -6.318395614624023, "p10": -1.239115524291992, "median": 1.8885622024536133, "p90": 5.206947326660157, "max": 8.162155151367188, "pos_frac": 0.75, "sample": [5.9027862548828125, 1.2050952911376953, 3.6035995483398438, 3.5883331298828125, -0.7269229888916016, -1.9981155395507812, 3.1823348999023438, -0.09206390380859375, -1.0128860473632812, 4.36859130859375, 4.835290908813477, 4.562431335449219, 2.0782012939453125, 2.5489883422851562, -2.5576820373535156, 1.6623344421386719, -0.12544631958007812, 5.72357177734375, 1.2066879272460938, -0.8949813842773438, -0.9066352844238281, -6.0129241943359375, 3.524799346923828, 1.1004409790039062, -1.1384696960449219, -2.4957122802734375, 3.346294403076172, 4.993618011474609, 5.955892562866211, -1.092824935913086, 0.07892608642578125, 0.2764568328857422, -6.318395614624023, 0.7334976196289062, 1.4833297729492188, 1.97882080078125, 6.710105895996094, 3.7661666870117188, 4.369621276855469, 3.3946380615234375, -0.1434173583984375, 8.162155151367188, 0.23032379150390625, 4.757957458496094, 4.3334503173828125, 2.580352783203125, 0.8893203735351562, 1.0154190063476562, 1.7983036041259766, 2.0827274322509766, 3.2878875732421875, 0.6259307861328125, 5.573089599609375, 2.748567581176758, 5.298374176025391, 1.38397216796875, 2.186370849609375, -1.2822494506835938, 0.6044769287109375, 2.6679534912109375, -1.7718963623046875, 2.209197998046875, 4.2632293701171875, 0.211334228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000132.npy"}
|
|
{"epoch": 0.19954648526077098, "step": 133, "batch_size": 64, "mean": 1.6651947498321533, "std": 2.640791416168213, "min": -4.647125244140625, "p10": -1.1650037765502927, "median": 1.5390510559082031, "p90": 4.889500236511233, "max": 10.195465087890625, "pos_frac": 0.75, "sample": [3.8917694091796875, -0.8121185302734375, 1.0313339233398438, 5.596855163574219, 0.33029937744140625, 0.935211181640625, 0.9707908630371094, 5.162433624267578, 0.6828079223632812, -0.6714019775390625, 2.9050521850585938, 5.2051239013671875, 4.3690338134765625, 0.2562599182128906, -1.4644126892089844, 2.8139495849609375, 3.2255859375, -4.647125244140625, 3.1541366577148438, 0.109832763671875, -1.2363662719726562, 2.0254745483398438, -0.4387664794921875, 4.148456573486328, 3.1384315490722656, 0.227691650390625, 2.7962646484375, -0.25146484375, 0.8810195922851562, -0.688995361328125, 2.3585662841796875, 1.5511665344238281, -4.06536865234375, 4.291740417480469, -0.9295730590820312, 1.8086624145507812, -0.2610282897949219, 1.0025634765625, 4.408596038818359, 3.0049285888671875, 1.1561508178710938, 2.0243453979492188, 3.0439300537109375, -4.15043830871582, 1.3406295776367188, 6.8661651611328125, 3.4047393798828125, 4.043907165527344, 1.6323928833007812, 10.195465087890625, 0.6270294189453125, 0.07586669921875, -1.2471961975097656, 5.474273681640625, 2.7224197387695312, 0.4379158020019531, 3.5782394409179688, 5.095602035522461, 2.499298095703125, -0.9984912872314453, 3.44537353515625, -0.832183837890625, -2.207324981689453, 1.5269355773925781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000133.npy"}
|
|
{"epoch": 0.20105820105820105, "step": 134, "batch_size": 64, "mean": 2.4346795082092285, "std": 2.944178342819214, "min": -3.764190673828125, "p10": -1.0050445556640621, "median": 2.493824005126953, "p90": 5.421934127807618, "max": 10.632858276367188, "pos_frac": 0.78125, "sample": [1.6792144775390625, -1.196563720703125, 2.753173828125, 2.625701904296875, 1.7685928344726562, 2.114978790283203, 0.7293701171875, 3.5948562622070312, 5.529880523681641, 4.272918701171875, 2.4750289916992188, 4.2417755126953125, 3.2426528930664062, 2.8530845642089844, 1.9678573608398438, -0.2855110168457031, 4.242549896240234, 2.783905029296875, -0.4923248291015625, 2.9912109375, 4.599327087402344, 2.1506729125976562, 5.157583236694336, 0.7108726501464844, 0.8702545166015625, 5.543373107910156, 0.651458740234375, 0.9487228393554688, -1.6165714263916016, 3.842153549194336, -1.9950504302978516, -0.10661697387695312, 5.1700592041015625, 0.595703125, 0.54248046875, -3.225149154663086, 2.5126190185546875, 2.0656909942626953, 3.4023895263671875, 2.8446273803710938, 10.632858276367188, 4.499713897705078, -3.764190673828125, -3.0830307006835938, 3.8148727416992188, -0.1348419189453125, 6.55609130859375, -0.55816650390625, 8.432785034179688, 1.948516845703125, -0.052486419677734375, 3.544750213623047, 4.394001007080078, 3.9578704833984375, 9.198257446289062, 0.6405258178710938, -1.6568336486816406, 3.1687698364257812, 1.953176498413086, 3.723064422607422, 4.9561309814453125, 10.508407592773438, 0.9329833984375, -0.35068702697753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000134.npy"}
|
|
{"epoch": 0.20256991685563114, "step": 135, "batch_size": 64, "mean": 1.6170554161071777, "std": 2.8967905044555664, "min": -5.995159149169922, "p10": -1.7231307983398434, "median": 1.2064857482910156, "p90": 4.776237106323244, "max": 9.351722717285156, "pos_frac": 0.71875, "sample": [3.29022216796875, -1.1685752868652344, 4.906169891357422, 0.7267017364501953, 1.193817138671875, -1.9072456359863281, 1.2191543579101562, -2.13629150390625, -0.51898193359375, 9.351722717285156, 3.18499755859375, -0.49440765380859375, 3.3250350952148438, -0.1975536346435547, -0.28520965576171875, -2.067535400390625, 4.4053497314453125, 2.654541015625, -0.5201187133789062, 1.1150836944580078, 1.4247817993164062, -0.5904922485351562, -5.995159149169922, 1.4740219116210938, 0.9111557006835938, 5.628223419189453, 0.7890853881835938, 0.8821868896484375, 0.41670989990234375, 1.523193359375, 1.7209930419921875, 2.0011024475097656, 3.7407302856445312, 0.8408432006835938, 3.7098388671875, 1.627197265625, 0.7339248657226562, 2.7016754150390625, 4.473060607910156, -4.119085311889648, -0.16071319580078125, 7.2707672119140625, 0.1443042755126953, 1.9091339111328125, 2.4138545989990234, 0.8659210205078125, 0.11801910400390625, -3.5216217041015625, 3.4803466796875, 2.218902587890625, 4.049701690673828, -2.285247802734375, 0.30233001708984375, 3.8765926361083984, 7.704322814941406, 8.963993072509766, -1.2935295104980469, 3.5674190521240234, -0.46143531799316406, 3.070384979248047, 0.777984619140625, 3.8461532592773438, 6.832252502441406, -0.16914749145507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000135.npy"}
|
|
{"epoch": 0.20408163265306123, "step": 136, "batch_size": 64, "mean": 1.9679962396621704, "std": 4.028400421142578, "min": -6.701986312866211, "p10": -2.7791114807128903, "median": 1.9824447631835938, "p90": 8.100733375549318, "max": 12.457511901855469, "pos_frac": 0.671875, "sample": [11.447891235351562, 5.239046096801758, -4.549406051635742, -1.5200653076171875, -3.012939453125, 3.0498733520507812, 0.2421722412109375, 0.9213485717773438, -0.8461837768554688, 2.539236068725586, 2.1728973388671875, -4.8524322509765625, -0.6058826446533203, 3.2288131713867188, -0.9497299194335938, 3.1647262573242188, 8.178770065307617, 1.3000869750976562, 1.2031116485595703, -1.7406387329101562, 4.180450439453125, 1.8300437927246094, 3.8330841064453125, 7.918647766113281, -1.1149063110351562, -0.06656646728515625, 8.719322204589844, 2.4523963928222656, 2.1878585815429688, 10.928863525390625, 4.38385009765625, 0.244171142578125, 4.0256500244140625, 5.953651428222656, -1.5839080810546875, 2.14007568359375, -0.87152099609375, 2.5489883422851562, 2.6470489501953125, 1.4209976196289062, -6.701986312866211, 3.052978515625, 1.927825927734375, 4.851898193359375, 0.7463722229003906, 4.644691467285156, -0.848236083984375, 2.12225341796875, -3.361053466796875, 12.457511901855469, 2.075763702392578, -3.1209564208984375, -2.2335128784179688, 2.0370635986328125, -0.6464004516601562, 4.2891845703125, 1.569732666015625, -0.4196739196777344, 8.982000350952148, 10.318878173828125, 3.6853485107421875, -0.6538848876953125, 0.6106147766113281, -5.82354736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000136.npy"}
|
|
{"epoch": 0.20559334845049132, "step": 137, "batch_size": 64, "mean": 2.037825107574463, "std": 3.260282039642334, "min": -6.062042236328125, "p10": -1.875315475463867, "median": 1.7018318176269531, "p90": 5.875891876220703, "max": 13.276283264160156, "pos_frac": 0.6875, "sample": [-0.412628173828125, -6.062042236328125, 3.3431358337402344, 4.338905334472656, 3.7697200775146484, 6.441551208496094, 4.716762542724609, 5.754608154296875, -0.4446449279785156, 5.982635498046875, 4.462244033813477, 2.723541259765625, 6.1453399658203125, 3.2420883178710938, 5.888816833496094, 1.3211212158203125, 1.9965438842773438, 6.051567077636719, -0.680084228515625, 0.9195346832275391, -2.245513916015625, 2.613994598388672, 4.052528381347656, -3.3472366333007812, 0.8717174530029297, -0.474395751953125, 4.173774719238281, 13.276283264160156, -1.80657958984375, -0.18688201904296875, -2.9443740844726562, 0.2784919738769531, 0.2748565673828125, 3.55487060546875, 9.575492858886719, 0.4707794189453125, -0.06509208679199219, -2.1439208984375, 2.6093521118164062, -0.35898780822753906, -1.1652984619140625, 1.0360336303710938, 3.1285133361816406, 3.5257415771484375, 4.155818939208984, -0.4839344024658203, 1.2275352478027344, 4.497041702270508, 1.4071197509765625, 5.845733642578125, 0.9008808135986328, -1.9047737121582031, -0.1567535400390625, -3.0276947021484375, 4.844278335571289, 2.3281097412109375, 0.9648075103759766, -0.6151046752929688, -0.804443359375, 3.574432373046875, 0.7793121337890625, 5.508766174316406, 4.52752685546875, 2.6492919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000137.npy"}
|
|
{"epoch": 0.20710506424792138, "step": 138, "batch_size": 64, "mean": 1.6931648254394531, "std": 3.1884450912475586, "min": -7.747467041015625, "p10": -2.2218299865722657, "median": 1.8345661163330078, "p90": 5.198565673828125, "max": 10.268341064453125, "pos_frac": 0.703125, "sample": [-0.1677398681640625, 4.22589111328125, 3.1867713928222656, 5.2412109375, 2.4639739990234375, 2.407665252685547, 1.7344970703125, 3.6236038208007812, -2.4426727294921875, 1.7945632934570312, 1.1047611236572266, -4.8319549560546875, 5.09906005859375, 0.1386260986328125, 10.268341064453125, -2.1272430419921875, -2.2623672485351562, 4.688127517700195, 3.7179107666015625, 4.160739898681641, 1.6465873718261719, 2.1048812866210938, 6.620849609375, 4.061794281005859, 0.23461532592773438, 3.1325130462646484, 3.9260482788085938, 2.6625709533691406, 0.7470626831054688, 1.016225814819336, 5.29495906829834, 0.002864837646484375, 6.025339126586914, -0.6526947021484375, -1.955291748046875, -0.2976264953613281, -0.11069869995117188, -4.906333923339844, -0.13161087036132812, 3.7087326049804688, 1.944366455078125, 4.383094787597656, -0.1786956787109375, 2.0101699829101562, -1.2003211975097656, -7.747467041015625, 9.034503936767578, 1.5335521697998047, 2.369049072265625, 0.1638336181640625, 1.743133544921875, 2.3908233642578125, 3.67181396484375, -1.9927024841308594, -2.34783935546875, -0.5847930908203125, -2.2690582275390625, 2.4455490112304688, -0.08076667785644531, 2.281768798828125, 7.841346740722656, 1.8745689392089844, 4.62933349609375, 1.292724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000138.npy"}
|
|
{"epoch": 0.20861678004535147, "step": 139, "batch_size": 64, "mean": 2.1663827896118164, "std": 2.929001808166504, "min": -4.874237060546875, "p10": -1.1427722930908202, "median": 1.7873477935791016, "p90": 6.337242889404298, "max": 10.084245681762695, "pos_frac": 0.78125, "sample": [2.0987911224365234, 5.638710021972656, 1.0948486328125, 1.3980560302734375, -3.1350555419921875, 3.317371368408203, 1.5773696899414062, 5.907508850097656, 2.737895965576172, 0.46559906005859375, 0.3601875305175781, 10.084245681762695, 4.576257705688477, -1.0306816101074219, 2.0775146484375, 6.4733428955078125, 7.96282958984375, 1.3727989196777344, -0.9968719482421875, 4.625072479248047, -1.1908111572265625, -4.874237060546875, -0.35816192626953125, 1.172079086303711, 3.3316802978515625, 1.7435455322265625, 2.6731719970703125, 1.5347633361816406, 4.667659759521484, 1.7184600830078125, 2.5682334899902344, 4.64129638671875, 2.6573486328125, -0.33233642578125, -0.14009857177734375, 3.2550811767578125, 2.8938064575195312, 1.7804298400878906, 1.7942657470703125, 0.045318603515625, 5.6241912841796875, 1.3444709777832031, 7.289613723754883, -0.4596748352050781, -2.014474868774414, -0.4435234069824219, 1.86328125, 3.107013702392578, 8.078018188476562, 0.5977210998535156, 6.57843017578125, 1.1803054809570312, 2.9342880249023438, -2.5407028198242188, -2.1068115234375, 2.1904678344726562, 0.07429122924804688, 2.5064849853515625, -2.7056446075439453, 3.8043975830078125, 6.193672180175781, 6.398773193359375, 1.7345733642578125, 1.2320556640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000139.npy"}
|
|
{"epoch": 0.21012849584278157, "step": 140, "batch_size": 64, "mean": 2.775313138961792, "std": 4.251906871795654, "min": -11.754226684570312, "p10": -2.288568305969238, "median": 3.2104530334472656, "p90": 8.322595977783203, "max": 10.679189682006836, "pos_frac": 0.734375, "sample": [8.265243530273438, 3.637451171875, -0.024730682373046875, 1.0897979736328125, 0.9902191162109375, 4.459922790527344, -4.093452453613281, -3.8531570434570312, 6.013805389404297, 5.439414978027344, -11.754226684570312, 3.5356979370117188, 3.7945404052734375, 2.217254638671875, 8.347175598144531, 3.6172561645507812, -2.708415985107422, 0.9412765502929688, -1.1610946655273438, 6.9669342041015625, 3.0482959747314453, 6.78375244140625, 4.288234710693359, 4.172889709472656, 0.5631484985351562, 3.59698486328125, 2.180368423461914, 10.679189682006836, 1.3664016723632812, -2.233551025390625, -2.189361572265625, -2.0397567749023438, 4.699258804321289, -1.5294342041015625, -0.9217357635498047, -0.4735107421875, 6.979578018188477, 8.949554443359375, 8.175949096679688, 7.447174072265625, 2.9330291748046875, 6.419563293457031, 9.692359924316406, 6.78455924987793, 3.1383056640625, -3.3033218383789062, 8.441719055175781, 1.2840652465820312, 9.178398132324219, -1.4874744415283203, -4.39154052734375, 5.045598983764648, 1.7552909851074219, 3.2885169982910156, 8.4085693359375, 1.4750900268554688, 6.264930725097656, -1.0280380249023438, 4.3699188232421875, 3.0503082275390625, 0.03790283203125, 6.027492523193359, -2.3121471405029297, 3.2826004028320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000140.npy"}
|
|
{"epoch": 0.21164021164021163, "step": 141, "batch_size": 64, "mean": 2.7026472091674805, "std": 4.151407718658447, "min": -6.7509765625, "p10": -1.7568058013916013, "median": 1.7446403503417969, "p90": 8.186215972900392, "max": 12.873077392578125, "pos_frac": 0.765625, "sample": [-6.7509765625, 0.7880859375, 6.34429931640625, 0.6618995666503906, 3.7152099609375, 0.49076080322265625, 0.8719062805175781, 4.649497985839844, 0.9854145050048828, -4.47233772277832, 2.8975067138671875, 2.5180587768554688, 6.54302978515625, -1.8681888580322266, 7.452632904052734, 3.4819183349609375, 4.994831085205078, 0.43848228454589844, -0.21129226684570312, -4.22296142578125, 1.88885498046875, 1.1310863494873047, 1.0065536499023438, 3.8638381958007812, 4.485374450683594, 0.04217529296875, 1.6004257202148438, 5.8597259521484375, -1.4552001953125, -1.9763031005859375, -1.4568405151367188, -2.8529891967773438, 3.288848876953125, 3.9978866577148438, 5.418952941894531, -1.341552734375, 6.726772308349609, 0.5575408935546875, 12.542634963989258, 8.8629150390625, 7.634101867675781, 6.181652069091797, 4.933597564697266, 3.984283447265625, 8.422836303710938, 0.2056121826171875, -0.4786643981933594, 1.9517440795898438, 9.900253295898438, 6.9868316650390625, -3.5481719970703125, 0.995697021484375, 1.2324333190917969, 12.388885498046875, 4.5357666015625, 12.873077392578125, -1.4969120025634766, 9.52944564819336, 0.37647247314453125, 1.0295486450195312, 3.2429733276367188, 1.5159721374511719, -0.6076507568359375, -0.31884002685546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000141.npy"}
|
|
{"epoch": 0.21315192743764172, "step": 142, "batch_size": 64, "mean": 2.0672333240509033, "std": 3.27900767326355, "min": -4.8862457275390625, "p10": -1.869081878662109, "median": 1.7680339813232422, "p90": 6.73528118133545, "max": 10.153038024902344, "pos_frac": 0.734375, "sample": [1.5519371032714844, -3.9984092712402344, 1.2925968170166016, 4.037498474121094, 6.5348052978515625, 10.153038024902344, 3.164031982421875, -1.6578407287597656, 2.2051773071289062, 4.941612243652344, 2.255645751953125, -0.5292396545410156, 4.12257194519043, -4.8862457275390625, 0.814361572265625, 3.4745712280273438, 3.4977645874023438, -4.386037826538086, 1.3708267211914062, 1.6365795135498047, 2.1556930541992188, 4.5977935791015625, 2.4240989685058594, 3.0504188537597656, 2.2482070922851562, 4.62432861328125, -0.7039413452148438, 0.3714447021484375, 8.564613342285156, -1.9596138000488281, 1.714080810546875, -0.2173919677734375, 7.257232666015625, 9.241195678710938, 1.7638473510742188, -2.831096649169922, 1.180633544921875, -2.7772579193115234, -0.2174072265625, 1.4497604370117188, 9.269401550292969, 6.8571319580078125, 1.0584449768066406, 1.4313392639160156, 0.24977874755859375, 4.890937805175781, -2.9231491088867188, 2.0169677734375, -0.61376953125, 3.3079910278320312, -0.3983116149902344, -1.5358352661132812, 1.7611083984375, 4.2691497802734375, 1.7722206115722656, 2.4798507690429688, 5.657360076904297, 6.821199417114258, 3.3131103515625, -0.8368911743164062, 0.29793548583984375, -1.023763656616211, 4.577049255371094, 2.0717849731445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000142.npy"}
|
|
{"epoch": 0.2146636432350718, "step": 143, "batch_size": 64, "mean": 1.8766175508499146, "std": 4.13226842880249, "min": -10.905044555664062, "p10": -2.0909458160400387, "median": 1.6593894958496094, "p90": 6.819949150085451, "max": 11.243988037109375, "pos_frac": 0.75, "sample": [1.1661834716796875, 4.86553955078125, 4.292629241943359, 3.271526336669922, 2.904144287109375, -1.8063583374023438, 1.7942657470703125, -0.36603355407714844, 8.297672271728516, -4.441295623779297, 5.246244430541992, 0.6464385986328125, 8.780502319335938, 1.8717918395996094, 3.5198898315429688, 2.9976940155029297, -7.801715850830078, 2.5409393310546875, 7.7452239990234375, 0.7960205078125, 4.95831298828125, -1.7200813293457031, 11.243988037109375, 5.758609771728516, 4.142921447753906, 1.4840145111083984, 3.035186767578125, -6.702728271484375, 1.0837631225585938, 0.8932113647460938, 1.10418701171875, 1.4197139739990234, -3.9740447998046875, 5.061103820800781, 9.192573547363281, -2.1537818908691406, -1.9443283081054688, 0.4030265808105469, 0.08618927001953125, 1.9700241088867188, 0.9983367919921875, -10.905044555664062, 6.31939697265625, 1.08551025390625, -0.8136482238769531, 2.632354736328125, 3.60357666015625, 5.835113525390625, 0.456024169921875, -0.2662353515625, -1.715667724609375, 2.371419906616211, 5.909210205078125, 9.23532485961914, -7.1332550048828125, 7.03447151184082, 2.9551620483398438, 1.3796920776367188, 4.821144104003906, 3.9868812561035156, -0.8437843322753906, -0.49498558044433594, 1.5245132446289062, 0.4648551940917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000143.npy"}
|
|
{"epoch": 0.2161753590325019, "step": 144, "batch_size": 64, "mean": 2.3448359966278076, "std": 4.0658087730407715, "min": -10.327468872070312, "p10": -2.355894660949707, "median": 2.266040802001953, "p90": 8.097705650329592, "max": 11.030426025390625, "pos_frac": 0.71875, "sample": [8.625539779663086, 4.481517791748047, -2.396942138671875, 0.3554058074951172, -1.197378158569336, 1.6427173614501953, -1.263742446899414, 4.275901794433594, 0.9206695556640625, 0.48619651794433594, -2.3643569946289062, 1.3987808227539062, 2.426715850830078, 2.870637893676758, 5.462104797363281, 2.8248977661132812, 7.277412414550781, 0.5568046569824219, -2.090850830078125, 3.8435440063476562, 4.37261962890625, 3.283885955810547, 2.473388671875, -0.566131591796875, -0.3725700378417969, 1.087646484375, 4.060523986816406, 6.267879486083984, -1.2616252899169922, -0.6126174926757812, 5.448577880859375, 1.3729629516601562, -3.7112503051757812, -0.5436630249023438, 4.806854248046875, 0.11145782470703125, -1.1378250122070312, 7.341737747192383, 8.42169189453125, 9.417011260986328, 3.84063720703125, 3.5327911376953125, -3.7652587890625, 6.2355499267578125, -2.932903289794922, 2.8337631225585938, 3.111236572265625, -2.336149215698242, 6.576656341552734, 0.5819549560546875, 0.552734375, -0.99554443359375, 2.105365753173828, 10.400520324707031, -3.2560272216796875, 9.403337478637695, 10.952606201171875, 11.030426025390625, 0.72052001953125, -10.327468872070312, 2.7489395141601562, 0.8800544738769531, 4.932567596435547, 4.8470611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000144.npy"}
|
|
{"epoch": 0.21768707482993196, "step": 145, "batch_size": 64, "mean": 2.3893003463745117, "std": 4.218606948852539, "min": -10.196983337402344, "p10": -2.12298698425293, "median": 2.4047536849975586, "p90": 8.21058959960938, "max": 12.58303451538086, "pos_frac": 0.734375, "sample": [2.687042236328125, 3.5135536193847656, -2.1795578002929688, 0.45465087890625, 0.7009201049804688, -10.196983337402344, 3.6126556396484375, -0.796661376953125, 12.58303451538086, 3.668487548828125, 3.128652572631836, 4.4799041748046875, 1.1361064910888672, 3.6108169555664062, 0.8221206665039062, 2.3699493408203125, -1.1092491149902344, 7.029083251953125, -2.2840347290039062, 3.3479232788085938, 0.37429046630859375, 1.2441978454589844, -2.1348304748535156, 4.8072509765625, 9.384239196777344, -0.430877685546875, -0.15399169921875, 3.7786865234375, 6.1300048828125, 0.043056488037109375, 12.533927917480469, 5.645782470703125, 2.074249267578125, 11.06402587890625, 1.650238037109375, 9.144357681274414, 2.4395580291748047, 5.623737335205078, 2.715911865234375, 6.043306350708008, -5.432167053222656, 3.4485511779785156, 8.95654296875, 4.706735610961914, 0.08385086059570312, -1.8111114501953125, 1.592630386352539, 2.6323165893554688, -1.37884521484375, 3.1350021362304688, 4.512657165527344, 3.5138320922851562, 6.206058502197266, -0.7491188049316406, 1.661529541015625, 8.716949462890625, 1.7053680419921875, -2.0953521728515625, -6.4976654052734375, 1.9704818725585938, 5.5150604248046875, -3.690093994140625, -1.6024169921875, -0.741119384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000145.npy"}
|
|
{"epoch": 0.21919879062736206, "step": 146, "batch_size": 64, "mean": 2.6364758014678955, "std": 3.830441474914551, "min": -4.448883056640625, "p10": -2.6005813598632814, "median": 2.2596702575683594, "p90": 7.160981559753418, "max": 14.66580581665039, "pos_frac": 0.75, "sample": [5.634864807128906, 5.114053726196289, -2.4207763671875, 2.3393173217773438, 1.0091400146484375, -3.0671463012695312, -1.4643211364746094, 4.782009124755859, 4.459712982177734, 3.207275390625, -3.0725326538085938, -2.565643310546875, 5.438323974609375, 4.245161056518555, 2.180023193359375, -2.6155548095703125, 5.938289642333984, -0.04529571533203125, 6.195331573486328, -4.448883056640625, 3.2137680053710938, 8.478744506835938, 2.0565185546875, 0.4336204528808594, -3.648242950439453, 0.0205841064453125, 1.7624187469482422, 7.346466064453125, -2.6257972717285156, 2.1248321533203125, 3.4333763122558594, -3.6307525634765625, 11.487503051757812, 2.9562835693359375, 1.3540573120117188, -0.8742599487304688, 1.4441986083984375, 1.0513572692871094, 4.5408172607421875, -0.29468536376953125, 7.080230712890625, 2.691680908203125, 1.895925521850586, 4.7394256591796875, 6.0035858154296875, 9.138389587402344, 5.282474517822266, 3.442811965942383, 6.412628173828125, 1.9476852416992188, 4.4843902587890625, 1.6322021484375, 7.258840560913086, 2.8580169677734375, -2.317110061645508, -2.286102294921875, 14.66580581665039, -0.96588134765625, 2.1686439514160156, 1.7363319396972656, 6.8060302734375, 3.573932647705078, 1.8147659301757812, 7.195589065551758], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000146.npy"}
|
|
{"epoch": 0.22071050642479215, "step": 147, "batch_size": 64, "mean": 2.061326026916504, "std": 4.309901237487793, "min": -11.92095947265625, "p10": -1.5815940856933592, "median": 1.7286529541015625, "p90": 6.631776428222657, "max": 12.028602600097656, "pos_frac": 0.703125, "sample": [2.907939910888672, 3.251188278198242, 5.840188980102539, -0.7629470825195312, 2.4042587280273438, 4.9308929443359375, -3.2815780639648438, 0.7063751220703125, -1.6289520263671875, -1.1048450469970703, 4.881500244140625, 3.539020538330078, 5.2151336669921875, 6.214874267578125, -1.0763778686523438, 6.585601806640625, 10.483390808105469, -11.92095947265625, 4.843328475952148, -0.7239952087402344, 2.410043716430664, 1.2230148315429688, 0.023740768432617188, 1.59326171875, 0.16265869140625, 9.06657600402832, -1.4710922241210938, 5.227382659912109, 2.9925079345703125, 1.864044189453125, -0.5702838897705078, 1.9756698608398438, 1.97198486328125, -4.3620147705078125, -0.9959335327148438, 5.2159271240234375, 5.959238052368164, 11.086708068847656, 8.760238647460938, -0.7769508361816406, 1.132720947265625, 0.22496795654296875, 0.1486663818359375, 4.883459091186523, 4.950969696044922, -2.2604598999023438, 12.028602600097656, -0.7739486694335938, 5.513008117675781, 0.030422210693359375, 1.3286895751953125, 3.441436767578125, 2.9619903564453125, -0.7350673675537109, -1.4475555419921875, 0.6740951538085938, 1.2698650360107422, 0.46846961975097656, 6.6515655517578125, -7.79962158203125, -6.270761489868164, 2.9596710205078125, 10.146894454956055, -0.26398277282714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000147.npy"}
|
|
{"epoch": 0.2222222222222222, "step": 148, "batch_size": 64, "mean": 2.7258191108703613, "std": 4.295035362243652, "min": -6.671958923339844, "p10": -2.551520156860351, "median": 2.6404075622558594, "p90": 7.887846374511721, "max": 12.451812744140625, "pos_frac": 0.703125, "sample": [8.745208740234375, -5.061344146728516, 3.268533706665039, -3.981447219848633, 6.666526794433594, 11.067451477050781, 4.143547058105469, -6.45013427734375, 5.6007843017578125, 8.243301391601562, -1.1056461334228516, 6.595592498779297, 1.593597412109375, 2.2106246948242188, 5.803905487060547, -1.7955856323242188, -1.8346519470214844, 7.366424560546875, 3.6971969604492188, -1.0142936706542969, 4.038761138916016, -3.0229949951171875, 8.111312866210938, 1.0792732238769531, 6.226541519165039, 11.755401611328125, 5.692039489746094, -0.3970184326171875, 11.7515869140625, 2.150287628173828, 2.553110122680664, -1.592010498046875, 7.319913864135742, -2.8587493896484375, 3.1943588256835938, 6.071136474609375, 0.6676082611083984, 6.361671447753906, -6.671958923339844, -0.03800201416015625, 2.0026321411132812, -0.044864654541015625, 7.106922149658203, 12.451812744140625, -4.326559066772461, 4.017839431762695, 2.916351318359375, 2.7255191802978516, 4.2961578369140625, 1.1622161865234375, 4.640720367431641, 0.5416488647460938, 1.2920703887939453, 0.5348472595214844, -0.057270050048828125, 5.672645568847656, 4.0443115234375, 3.7206878662109375, 2.555295944213867, -0.10575103759765625, 0.8681926727294922, 4.196922302246094, -1.1617965698242188, -0.7499847412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000148.npy"}
|
|
{"epoch": 0.2237339380196523, "step": 149, "batch_size": 64, "mean": 3.2872655391693115, "std": 4.0977678298950195, "min": -8.11737060546875, "p10": -1.455462074279785, "median": 3.0267105102539062, "p90": 8.266680717468262, "max": 13.705732345581055, "pos_frac": 0.796875, "sample": [4.552177429199219, 6.8304443359375, 8.091075897216797, -0.34259033203125, 3.431671142578125, 0.19022369384765625, -0.3902130126953125, 5.316781997680664, -0.9022674560546875, 6.27479362487793, -3.5024795532226562, 2.9262962341308594, 0.3524665832519531, 3.007415771484375, 5.746551513671875, 0.9221343994140625, 1.9369773864746094, 2.215576171875, 0.6809177398681641, 3.1692352294921875, 1.4887351989746094, 9.283760070800781, 8.30473518371582, -1.5185813903808594, 13.705732345581055, 3.5686264038085938, 2.2927913665771484, -1.3081836700439453, 3.4111576080322266, -8.11737060546875, -0.671966552734375, 6.933418273925781, -2.7499618530273438, 11.926685333251953, 1.135660171508789, 9.936651229858398, 6.847429275512695, -2.3485946655273438, 8.177886962890625, -2.0162200927734375, -1.0417022705078125, 4.639495849609375, 0.7271480560302734, 6.714019775390625, 9.543933868408203, 7.452009201049805, 3.8543243408203125, 5.361093521118164, 2.187276840209961, 2.3702774047851562, 4.091915130615234, 2.552703857421875, 0.8173065185546875, -2.636505126953125, 0.11866378784179688, 5.6665802001953125, 3.69970703125, 11.13848876953125, 7.707759857177734, 2.8676013946533203, 3.0460052490234375, 6.6692657470703125, 0.5747222900390625, 3.4733200073242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000149.npy"}
|
|
{"epoch": 0.2252456538170824, "step": 150, "batch_size": 64, "mean": 4.0110015869140625, "std": 4.300034523010254, "min": -4.796867370605469, "p10": -1.2429313659667964, "median": 3.667276382446289, "p90": 9.448096466064456, "max": 14.480484008789062, "pos_frac": 0.859375, "sample": [3.6025123596191406, 0.9940891265869141, 10.5755615234375, 2.6262359619140625, -4.076591491699219, -0.63787841796875, 2.487466812133789, 7.935981750488281, 3.7320404052734375, 6.834943771362305, 0.07939910888671875, 5.466056823730469, 0.212005615234375, 4.6003875732421875, 0.3592643737792969, 3.3326339721679688, 3.2135467529296875, 12.720703125, 2.147430419921875, 4.473548889160156, 4.044914245605469, 1.0145301818847656, 8.169082641601562, -3.3959503173828125, 0.814605712890625, 7.49171257019043, 1.8143596649169922, 5.651691436767578, 1.1008453369140625, 10.867828369140625, 5.231138229370117, 5.358367919921875, 8.40084457397461, 3.2473983764648438, 13.324195861816406, -4.796867370605469, 1.784555435180664, 4.926382064819336, 5.367034912109375, 11.521835327148438, 4.165802001953125, 8.714637756347656, -0.6943206787109375, 3.2706222534179688, 9.762435913085938, 2.212554931640625, 7.098703384399414, 8.156068801879883, -2.5895614624023438, 14.480484008789062, 1.0566482543945312, 0.3866729736328125, 5.602958679199219, -1.4780502319335938, 1.9052143096923828, 8.477638244628906, -3.263256072998047, -2.0288619995117188, 7.736040115356445, 0.9841537475585938, 2.4302597045898438, 6.430755615234375, 3.9067306518554688, 7.361907958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000150.npy"}
|
|
{"epoch": 0.22675736961451248, "step": 151, "batch_size": 64, "mean": 4.227801322937012, "std": 4.6226911544799805, "min": -5.6160888671875, "p10": -1.142745018005371, "median": 4.369516372680664, "p90": 9.756410980224612, "max": 17.4794921875, "pos_frac": 0.765625, "sample": [5.627655029296875, 5.184783935546875, 8.680780410766602, -1.5559234619140625, 1.2363548278808594, 1.1100311279296875, 17.4794921875, 0.9040260314941406, 10.057464599609375, 1.2508888244628906, 3.9295425415039062, 7.3708343505859375, 4.3575592041015625, -0.948211669921875, 4.1468505859375, 6.1717987060546875, 7.972265243530273, -1.1326980590820312, 3.8278865814208984, -0.18651199340820312, -2.4319381713867188, -0.9657192230224609, 6.288177490234375, -1.8457221984863281, 13.959548950195312, -3.9514389038085938, 5.870735168457031, 4.565433502197266, -1.1470508575439453, -0.088043212890625, 3.9553394317626953, -1.252899169921875, 7.188087463378906, 5.897369384765625, 2.2051620483398438, -0.7895736694335938, 4.697071075439453, 4.406646728515625, 1.8994293212890625, 14.477935791015625, 0.6863842010498047, 4.531005859375, 9.064262390136719, 2.6695175170898438, 11.797161102294922, 9.212928771972656, 9.362869262695312, 8.426834106445312, 9.925071716308594, 3.3102951049804688, 4.381473541259766, 5.5642547607421875, 1.8220558166503906, 11.457611083984375, -5.6160888671875, 6.971437454223633, 6.075263977050781, 2.5013427734375, -1.0841255187988281, -0.02618408203125, 5.1017608642578125, 8.346668243408203, 0.5702667236328125, 7.1037750244140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000151.npy"}
|
|
{"epoch": 0.22826908541194255, "step": 152, "batch_size": 64, "mean": 3.1079225540161133, "std": 5.242221355438232, "min": -8.368270874023438, "p10": -2.7945739746093747, "median": 2.627643585205078, "p90": 10.74943733215332, "max": 13.599637985229492, "pos_frac": 0.734375, "sample": [-1.2284870147705078, 0.8429031372070312, 6.841850280761719, 2.648162841796875, -8.368270874023438, 0.6715774536132812, 5.22369384765625, 5.164794921875, 2.6237411499023438, -2.148040771484375, 2.666412353515625, -3.9862899780273438, 3.7250709533691406, 0.086273193359375, 9.578346252441406, 9.996986389160156, 1.0065116882324219, 2.6742286682128906, 1.178152084350586, -1.7638130187988281, -8.306388854980469, 10.783191680908203, 4.733451843261719, 4.562629699707031, -1.1809043884277344, 0.9055767059326172, 6.988178253173828, 2.9922752380371094, 1.5154781341552734, 3.3879165649414062, 0.4702644348144531, 13.599637985229492, 1.092336654663086, 1.4678802490234375, -0.6037979125976562, -3.14349365234375, 2.2482376098632812, -0.8790512084960938, 6.83929443359375, 2.4223480224609375, -2.9841156005859375, 11.115488052368164, 6.654754638671875, -2.3523101806640625, 6.728771209716797, 12.659591674804688, 7.5635223388671875, -0.003177642822265625, 3.3393173217773438, 2.6315460205078125, -6.967811584472656, 9.504325866699219, 0.17768287658691406, 13.037513732910156, 7.886737823486328, 4.1515045166015625, 12.168388366699219, 10.670677185058594, -1.982452392578125, 12.939369201660156, -0.8913040161132812, -4.738800048828125, 1.7035026550292969, 8.565467834472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000152.npy"}
|
|
{"epoch": 0.22978080120937264, "step": 153, "batch_size": 64, "mean": 3.422149896621704, "std": 4.318820953369141, "min": -5.231695175170898, "p10": -2.5217662811279293, "median": 2.9587793350219727, "p90": 9.418136215209962, "max": 13.222354888916016, "pos_frac": 0.796875, "sample": [5.628454208374023, 2.4520950317382812, 3.2941436767578125, 7.7467193603515625, 7.063037872314453, 10.148780822753906, 5.356193542480469, 3.2463645935058594, 2.190387725830078, 2.04150390625, 2.7446537017822266, 9.589836120605469, 1.7056884765625, 7.943874359130859, 3.682861328125, -2.717376708984375, 4.446250915527344, 0.19911956787109375, 0.8646469116210938, 10.830101013183594, 13.222354888916016, 8.096839904785156, -2.8236961364746094, 3.435699462890625, 11.967914581298828, -3.8378829956054688, 9.517948150634766, 1.7463092803955078, 0.4448280334472656, 8.886280059814453, -4.560050964355469, 2.1125011444091797, -5.231695175170898, 0.3530921936035156, 4.73779296875, 1.359609603881836, -0.7219772338867188, 8.456893920898438, 2.6937255859375, 3.1729049682617188, 0.853759765625, -0.6325302124023438, 9.82598876953125, 4.995525360107422, 3.534046173095703, -0.6414089202880859, 5.652214050292969, 2.531951904296875, 6.1127471923828125, -1.1327800750732422, 0.7581787109375, 6.226203918457031, 1.455148696899414, 0.729766845703125, 4.8131103515625, -3.3324050903320312, -3.2026596069335938, 8.475736618041992, -0.411590576171875, 9.18524169921875, 7.794094085693359, 0.40642547607421875, -2.0653419494628906, 5.5994415283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000153.npy"}
|
|
{"epoch": 0.23129251700680273, "step": 154, "batch_size": 64, "mean": 2.5362792015075684, "std": 5.220127582550049, "min": -10.783954620361328, "p10": -4.912422561645507, "median": 3.050006866455078, "p90": 8.796754837036133, "max": 12.937847137451172, "pos_frac": 0.71875, "sample": [-5.508598327636719, 2.1011199951171875, -8.339187622070312, -10.783954620361328, 10.581352233886719, 6.308172225952148, -5.762031555175781, 2.757081985473633, 2.1832733154296875, -5.115364074707031, 3.2512149810791016, -0.15018272399902344, 5.640386581420898, 0.2122955322265625, -3.578857421875, -3.8834190368652344, -1.3233528137207031, 3.4747238159179688, 0.6722507476806641, 0.4308319091796875, 7.4687042236328125, 12.937847137451172, 11.941825866699219, 3.625926971435547, -5.9056243896484375, 4.391115188598633, 10.209609985351562, 0.167633056640625, 10.984161376953125, -9.045578002929688, 4.995744705200195, 2.505373001098633, 8.591690063476562, 7.289146423339844, 3.71844482421875, -0.25424957275390625, 2.9744873046875, 6.567333221435547, 5.262361526489258, 3.583660125732422, -1.3811569213867188, -0.583953857421875, 5.369169235229492, -1.0912933349609375, 0.8480758666992188, 12.89348030090332, 4.485124588012695, 4.054677963256836, 5.8084564208984375, -4.438892364501953, -0.1763153076171875, -1.1493759155273438, 1.6613998413085938, 0.37827301025390625, 6.108184814453125, 3.1255264282226562, 8.884639739990234, 0.001384735107421875, 8.073938369750977, 8.090232849121094, 5.332130432128906, 4.248222351074219, 1.8939990997314453, 4.708574295043945], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000154.npy"}
|
|
{"epoch": 0.2328042328042328, "step": 155, "batch_size": 64, "mean": 3.458055257797241, "std": 6.089510917663574, "min": -10.809856414794922, "p10": -4.970623016357421, "median": 3.0632877349853516, "p90": 12.04724807739258, "max": 14.45669174194336, "pos_frac": 0.6875, "sample": [2.9257736206054688, -0.029632568359375, 13.791275024414062, 4.099178314208984, 1.1778411865234375, 2.8077239990234375, 0.0465545654296875, 8.2120361328125, -1.7727394104003906, 9.20541000366211, 7.8395233154296875, 8.681453704833984, -1.0692291259765625, -5.2406005859375, -0.4321098327636719, -2.270172119140625, 4.6317138671875, 14.135238647460938, 5.377830505371094, 13.260780334472656, 11.946487426757812, -0.6263542175292969, 7.148155212402344, -6.164951324462891, 7.1582794189453125, 14.45669174194336, 0.33795166015625, 7.3765869140625, -2.3572235107421875, 9.022987365722656, -8.869979858398438, 3.581674575805664, 13.779895782470703, 7.177116394042969, 13.920661926269531, 6.38446044921875, -0.30500030517578125, -1.6926651000976562, -7.3263092041015625, 7.8692626953125, -6.5963287353515625, 1.7908287048339844, 6.603767395019531, 0.7069435119628906, 11.428733825683594, 4.415912628173828, 2.5963821411132812, 9.778923034667969, -4.340675354003906, 3.2008018493652344, 4.831336975097656, 2.919219970703125, 0.45689964294433594, 0.70477294921875, 2.205883026123047, -0.18631744384765625, 5.817161560058594, -5.281646728515625, -10.809856414794922, -0.836669921875, 3.3080062866210938, 12.090431213378906, 9.671987533569336, -1.3565444946289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000155.npy"}
|
|
{"epoch": 0.23431594860166288, "step": 156, "batch_size": 64, "mean": 4.105587005615234, "std": 5.088772296905518, "min": -6.017948150634766, "p10": -2.066670799255371, "median": 3.1351547241210938, "p90": 11.562250137329103, "max": 17.40701675415039, "pos_frac": 0.78125, "sample": [3.027984619140625, -1.4904327392578125, -2.0695438385009766, -6.017948150634766, 6.8177947998046875, 13.306060791015625, 17.40701675415039, 4.510520935058594, 5.855224609375, 11.67025375366211, 2.2754592895507812, 8.779808044433594, -2.903148651123047, 6.20660400390625, 8.8018798828125, 8.77968978881836, 10.883407592773438, 7.815040588378906, -0.6799087524414062, 6.335041046142578, 1.8580741882324219, 1.2606735229492188, 5.561454772949219, 6.559051513671875, 0.4786529541015625, 2.5497894287109375, -0.3814239501953125, 7.432140350341797, 4.044460296630859, 7.380767822265625, 2.9856491088867188, 2.855377197265625, -2.059967041015625, 6.11151123046875, 4.713813781738281, 15.595344543457031, -5.748138427734375, 5.8499755859375, -3.52679443359375, 13.210182189941406, 6.4796142578125, 13.434677124023438, 2.0842132568359375, 1.3740234375, 0.09633445739746094, 1.4064273834228516, -0.3595466613769531, -2.2563705444335938, 3.2423248291015625, 0.85552978515625, 2.9502124786376953, -0.147552490234375, 5.772510528564453, 0.5905838012695312, 4.750434875488281, 2.061065673828125, -1.5870933532714844, 6.890159606933594, 12.234222412109375, 1.5242176055908203, 3.4577808380126953, 11.31024169921875, 2.9515419006347656, -2.399372100830078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000156.npy"}
|
|
{"epoch": 0.23582766439909297, "step": 157, "batch_size": 64, "mean": 3.5285744667053223, "std": 5.243298530578613, "min": -9.252426147460938, "p10": -2.4356952667236325, "median": 3.4092655181884766, "p90": 10.884242820739747, "max": 18.703857421875, "pos_frac": 0.765625, "sample": [12.27841567993164, 13.236080169677734, -1.1473541259765625, 6.1200103759765625, 7.126396179199219, -8.378753662109375, 3.0268707275390625, -0.1443023681640625, 8.500591278076172, -1.2424468994140625, 0.0746307373046875, 3.2146148681640625, 10.554840087890625, 2.331125259399414, -9.252426147460938, 0.06293106079101562, 1.174591064453125, 1.7225494384765625, 7.151865005493164, 11.025415420532227, 6.5030059814453125, 8.29330062866211, 18.703857421875, 5.398223876953125, 9.67497444152832, 5.220497131347656, -2.6693267822265625, -0.9688549041748047, 4.0779571533203125, -1.146493911743164, 8.272979736328125, 0.97174072265625, 4.490818023681641, 4.7453765869140625, 5.330577850341797, 11.145278930664062, -3.7159881591796875, 11.778800964355469, 11.035682678222656, 1.2870330810546875, -1.9583892822265625, 2.5921249389648438, -0.476776123046875, 8.079963684082031, 6.975311279296875, 1.2421150207519531, 0.3958282470703125, 5.1082763671875, 3.6113147735595703, 0.7705173492431641, 2.3689041137695312, -2.6402549743652344, 3.6039161682128906, 1.3090019226074219, 4.353096008300781, 6.54945182800293, 1.3428726196289062, 5.3268585205078125, -5.506782531738281, 6.985605239868164, 1.7208175659179688, -0.5983123779296875, -5.67474365234375, 4.4829559326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000157.npy"}
|
|
{"epoch": 0.23733938019652306, "step": 158, "batch_size": 64, "mean": 3.4958715438842773, "std": 5.310166358947754, "min": -8.253150939941406, "p10": -2.6386987686157224, "median": 3.1926755905151367, "p90": 11.0880916595459, "max": 14.6708984375, "pos_frac": 0.71875, "sample": [1.6917343139648438, 7.012046813964844, 2.0851898193359375, 4.016427993774414, -8.253150939941406, 7.686336517333984, 12.33367919921875, 7.441497802734375, -0.3946685791015625, 0.3342437744140625, 11.372795104980469, 4.82281494140625, 5.2461395263671875, 6.7735443115234375, -8.158109664916992, 4.670125961303711, 14.32476806640625, 2.827177047729492, 2.1902294158935547, 4.695838928222656, -1.2469863891601562, 0.9224853515625, 6.880102157592773, 0.2675437927246094, 11.178524017333984, 6.35797119140625, -1.6063690185546875, 11.997413635253906, 6.7882232666015625, 5.617347717285156, -1.8457489013671875, 5.6099395751953125, 10.153350830078125, 1.681976318359375, 10.877082824707031, 10.511417388916016, 8.273998260498047, -2.4599456787109375, 2.1104907989501953, -4.8158416748046875, 10.796142578125, 3.5581741333007812, 3.7147293090820312, 11.22100830078125, 5.394004821777344, 6.893194198608398, -1.3639774322509766, -0.8633804321289062, 14.6708984375, -1.5149669647216797, 2.6411972045898438, -1.0447158813476562, -3.2147254943847656, 1.437652587890625, 1.0869140625, -0.6985912322998047, -2.7153072357177734, 4.985858917236328, 6.045192718505859, 1.981760025024414, -4.8495025634765625, 1.3574867248535156, -0.49019813537597656, -5.26470947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000158.npy"}
|
|
{"epoch": 0.23885109599395313, "step": 159, "batch_size": 64, "mean": 4.122803688049316, "std": 6.130422115325928, "min": -17.639541625976562, "p10": -2.4417955398559568, "median": 3.735982894897461, "p90": 12.24169902801514, "max": 18.243179321289062, "pos_frac": 0.765625, "sample": [8.182971954345703, 7.682338714599609, 1.5854721069335938, 10.950080871582031, 4.3747711181640625, -4.616065979003906, -6.34490966796875, 13.772388458251953, 0.5530776977539062, 18.243179321289062, 14.042648315429688, 3.960845947265625, 11.38288688659668, 2.3421096801757812, -1.1677894592285156, 2.8459815979003906, 4.264228820800781, 17.359397888183594, 7.790374755859375, 0.2765846252441406, 1.1873397827148438, -6.2357635498046875, 3.175321578979492, 2.8831634521484375, 3.3900299072265625, 2.516592025756836, 2.027193069458008, -4.0369720458984375, 9.571792602539062, 13.960983276367188, -2.1109161376953125, 6.6829681396484375, 5.21527099609375, -0.11713790893554688, 2.5902023315429688, 5.252702713012695, 11.619312286376953, 4.726043701171875, 2.005645751953125, -0.29833984375, 3.3795852661132812, -4.08001708984375, 13.926109313964844, 0.7792739868164062, 5.057640075683594, 12.50843620300293, -2.5836009979248047, 6.276628494262695, -1.1014938354492188, 3.5232772827148438, 0.17496871948242188, 11.257692337036133, 5.275321960449219, -0.0489501953125, -0.1081085205078125, -17.639541625976562, 9.119163513183594, 6.094490051269531, -0.00209808349609375, 3.948688507080078, 7.3516693115234375, 4.755409240722656, 7.382171630859375, 7.1267242431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000159.npy"}
|
|
{"epoch": 0.24036281179138322, "step": 160, "batch_size": 64, "mean": 3.662567615509033, "std": 6.429784774780273, "min": -9.305885314941406, "p10": -3.375560760498047, "median": 2.925699234008789, "p90": 11.966747283935547, "max": 20.659255981445312, "pos_frac": 0.71875, "sample": [1.3279914855957031, -1.7120246887207031, 5.6121063232421875, 16.42333221435547, 1.3844718933105469, 11.726463317871094, 1.8420257568359375, -1.2255287170410156, 3.8127593994140625, 7.2451934814453125, 19.584426879882812, -2.491546630859375, 11.031576156616211, 9.5594482421875, 5.213367462158203, 12.424278259277344, 0.2879676818847656, -7.044212341308594, 6.552679061889648, 8.500516891479492, -2.2878952026367188, -4.454694747924805, 2.198822021484375, -1.643829345703125, 4.500570297241211, 2.5489540100097656, 7.099967956542969, 1.2645912170410156, 3.3024444580078125, 0.9906806945800781, 4.6887969970703125, 5.11536979675293, 20.659255981445312, 15.05051040649414, 1.484567642211914, -2.6957855224609375, 7.9037628173828125, 0.7250709533691406, -0.5257968902587891, 11.9771728515625, 0.6742439270019531, 7.711139678955078, 0.9131355285644531, -5.462806701660156, -2.0654964447021484, 5.814689636230469, -8.156784057617188, -3.4075241088867188, 7.2062225341796875, 5.052986145019531, -5.765861511230469, -1.9149150848388672, -1.81268310546875, 6.927562713623047, 7.0323638916015625, 1.493438720703125, 0.8581390380859375, 14.391510009765625, -9.305885314941406, 4.373470306396484, 9.618667602539062, 11.942420959472656, 3.62945556640625, -3.3009796142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000160.npy"}
|
|
{"epoch": 0.2418745275888133, "step": 161, "batch_size": 64, "mean": 3.8373398780822754, "std": 5.994260787963867, "min": -14.51812744140625, "p10": -2.8345424652099607, "median": 3.91019344329834, "p90": 11.359830093383792, "max": 18.457794189453125, "pos_frac": 0.75, "sample": [4.230476379394531, -8.834857940673828, 2.0474777221679688, 4.586761474609375, 2.8018798828125, -0.4594879150390625, 7.430084228515625, 15.016181945800781, -2.98284912109375, -2.4713945388793945, -5.724983215332031, 5.3228759765625, 5.821054458618164, 6.64288330078125, 12.98415756225586, 3.886228561401367, 4.622453689575195, -14.51812744140625, 7.375404357910156, -3.1063156127929688, -2.902496337890625, 1.1324310302734375, 2.7910995483398438, 0.872283935546875, 7.137908935546875, -2.117198944091797, 1.970001220703125, 6.154672622680664, 5.8637542724609375, -0.7152786254882812, 10.422779083251953, 10.072761535644531, 6.143571853637695, 10.738945007324219, 11.617172241210938, -2.675983428955078, -3.6290550231933594, 18.457794189453125, 12.836204528808594, 10.75936508178711, 2.7054595947265625, 10.057075500488281, -0.044826507568359375, 3.2224159240722656, 4.80902099609375, 0.8519668579101562, 2.0134048461914062, 5.250165939331055, 3.9341583251953125, 9.317665100097656, -1.8638687133789062, 4.179252624511719, 13.845516204833984, 2.6865692138671875, 0.01052093505859375, 0.3400402069091797, -1.221282958984375, 2.2346115112304688, 6.345672607421875, 5.2789459228515625, 17.432907104492188, -1.3630828857421875, 0.40781593322753906, 5.559001922607422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000161.npy"}
|
|
{"epoch": 0.24338624338624337, "step": 162, "batch_size": 64, "mean": 5.329469680786133, "std": 5.837019920349121, "min": -6.1250152587890625, "p10": -3.519628906249999, "median": 5.1167707443237305, "p90": 12.730003166198731, "max": 20.958511352539062, "pos_frac": 0.84375, "sample": [8.832931518554688, 7.9319000244140625, -6.1250152587890625, 2.6476593017578125, 1.8903884887695312, 8.834278106689453, 6.556936264038086, 3.059722900390625, 12.476409912109375, 14.11277961730957, 4.6196136474609375, 9.20108413696289, 4.404184341430664, 20.958511352539062, 9.972129821777344, 5.128835678100586, 4.72869873046875, 14.3504638671875, 6.062774658203125, 14.341732025146484, 11.203369140625, 9.899147033691406, 2.8712234497070312, -2.649627685546875, 6.6038970947265625, -1.8524093627929688, 7.619140625, 1.9426727294921875, -5.569915771484375, 7.8106689453125, 2.7340450286865234, 3.534027099609375, 0.8654022216796875, 11.030437469482422, 9.108291625976562, 5.104705810546875, 9.464555740356445, 9.164464950561523, 0.6028022766113281, -4.542083740234375, -4.847564697265625, 0.01061248779296875, 13.235420227050781, 12.416767120361328, 8.325952529907227, 4.1050872802734375, -5.599952697753906, 7.23004150390625, -5.672279357910156, -0.5047607421875, 3.1689834594726562, 0.7056121826171875, 5.4905853271484375, 9.113149642944336, 13.300025939941406, 8.197723388671875, 1.5376548767089844, -3.892486572265625, 2.0809173583984375, 2.9065628051757812, 3.3998470306396484, 11.831293106079102, 12.838685989379883, 2.7773361206054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000162.npy"}
|
|
{"epoch": 0.24489795918367346, "step": 163, "batch_size": 64, "mean": 5.256951808929443, "std": 6.7343573570251465, "min": -9.570762634277344, "p10": -4.600414085388183, "median": 4.776644706726074, "p90": 13.556306838989258, "max": 18.167434692382812, "pos_frac": 0.78125, "sample": [-1.1866674423217773, 6.155677795410156, -6.3787689208984375, 11.018569946289062, 5.9370880126953125, 1.7869911193847656, -9.570762634277344, 3.0939483642578125, 15.520645141601562, -5.179328918457031, 6.012489318847656, 9.206901550292969, 17.03557586669922, 3.2999649047851562, 3.6212844848632812, 8.6805419921875, 9.767131805419922, -1.043853759765625, 18.167434692382812, 5.954841613769531, 0.33702850341796875, 8.949615478515625, 12.234369277954102, -3.8975448608398438, -4.717714309692383, 14.642982482910156, 4.656467437744141, 12.189224243164062, 4.896821975708008, 2.4221115112304688, 7.829448699951172, 3.1558303833007812, 8.233924865722656, -0.3607177734375, 10.45599365234375, 10.833908081054688, -4.326713562011719, 13.344696044921875, -4.724235534667969, 2.010374069213867, 9.314228057861328, 17.054264068603516, 5.728401184082031, 12.173530578613281, 3.290191650390625, -1.7010440826416016, -0.89178466796875, 3.8095474243164062, 2.22943115234375, 0.19992828369140625, -8.076950073242188, 13.375568389892578, 10.31759262084961, 0.5056743621826172, 2.505565643310547, -4.99755859375, 12.855339050292969, 16.42926025390625, 13.633766174316406, 11.847644805908203, 7.037202835083008, 2.1733646392822266, 3.815723419189453, 3.7504501342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000163.npy"}
|
|
{"epoch": 0.24640967498110355, "step": 164, "batch_size": 64, "mean": 5.220986843109131, "std": 7.015787601470947, "min": -13.939228057861328, "p10": -2.1882387161254884, "median": 4.434076309204102, "p90": 13.474564743041993, "max": 22.76964569091797, "pos_frac": 0.78125, "sample": [-0.8983688354492188, 1.0483226776123047, -0.6031112670898438, 4.082057952880859, 3.1934280395507812, 3.8390121459960938, 19.282669067382812, 12.188941955566406, -0.08022308349609375, 2.5618515014648438, 8.065460205078125, -2.175609588623047, 10.589805603027344, -7.218538284301758, 6.612522125244141, 8.929153442382812, 11.46261978149414, 4.455852508544922, 19.193450927734375, -2.030515670776367, 4.357139587402344, -6.257026672363281, 5.861572265625, 5.8175811767578125, 0.12894058227539062, 4.361572265625, 12.460227966308594, 1.927093505859375, 7.2256622314453125, 1.0810928344726562, 12.840202331542969, 16.063583374023438, -2.1936511993408203, 7.2796783447265625, 4.9542388916015625, -5.810066223144531, 14.000869750976562, 22.76964569091797, 16.077089309692383, -7.299720764160156, 3.986379623413086, 4.412300109863281, 2.1637840270996094, 9.708295822143555, 4.336833953857422, 5.637704849243164, 12.788528442382812, 1.3540973663330078, 10.802764892578125, 1.3020744323730469, 2.0757293701171875, 2.3421897888183594, 13.644886016845703, -5.50566291809082, -0.37125587463378906, -13.939228057861328, 4.527732849121094, 12.827228546142578, 12.741500854492188, -0.054683685302734375, 13.0771484375, 4.9656524658203125, 6.126895904541016, 7.0477447509765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000164.npy"}
|
|
{"epoch": 0.24792139077853365, "step": 165, "batch_size": 64, "mean": 4.735316753387451, "std": 6.783576011657715, "min": -11.50216293334961, "p10": -3.5493707656860343, "median": 4.761894226074219, "p90": 13.129290008544924, "max": 18.783126831054688, "pos_frac": 0.75, "sample": [18.783126831054688, 6.991260528564453, 6.887393951416016, 11.37063217163086, 4.0056915283203125, -0.17050933837890625, 3.0725936889648438, -0.8792266845703125, 7.748073577880859, 3.4960975646972656, 1.6124801635742188, 6.827426910400391, 13.959686279296875, 0.7750110626220703, -1.3527069091796875, 3.224151611328125, 4.7862091064453125, 6.748180389404297, -8.815467834472656, -1.946075439453125, -8.743476867675781, -1.1009254455566406, 12.375297546386719, -1.700429916381836, 13.41845703125, 14.71902847290039, 4.121402740478516, 7.201337814331055, 15.686614990234375, 16.436981201171875, 4.737579345703125, 0.8114795684814453, 14.978256225585938, 12.454566955566406, 8.984977722167969, -11.50216293334961, 0.8219680786132812, 9.013736724853516, 8.725666046142578, 9.606964111328125, -2.6964473724365234, -0.49692535400390625, 0.588348388671875, 2.2563915252685547, 12.195516586303711, -3.9149093627929688, 2.463336944580078, 1.6991958618164062, 8.504487991333008, -1.4408493041992188, -5.5216064453125, 7.09552001953125, -10.91461181640625, 10.827667236328125, 3.905916213989258, -4.611530303955078, 5.846199035644531, 2.6125621795654297, 9.024250030517578, 5.4238433837890625, 11.627670288085938, 11.482330322265625, 8.306571960449219, 10.625999450683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000165.npy"}
|
|
{"epoch": 0.2494331065759637, "step": 166, "batch_size": 64, "mean": 3.659334182739258, "std": 6.22493314743042, "min": -6.240932464599609, "p10": -3.7297119140625, "median": 2.793142318725586, "p90": 11.813632202148439, "max": 20.442684173583984, "pos_frac": 0.65625, "sample": [8.087425231933594, -1.6174697875976562, -3.1074676513671875, 11.079750061035156, 11.44207763671875, 0.780242919921875, -2.7809524536132812, 12.247314453125, 1.2223358154296875, 3.13836669921875, -3.791015625, 4.400238037109375, 14.854106903076172, -3.3408737182617188, 8.161209106445312, 3.067626953125, -0.5968170166015625, 8.463418960571289, -0.3512115478515625, 1.6366233825683594, -2.5874691009521484, 10.698657989501953, -4.734954833984375, 4.069232940673828, 11.972869873046875, 4.84661865234375, -0.8141326904296875, 9.919281005859375, 7.184600830078125, 4.735389709472656, -4.09613037109375, -6.240932464599609, 2.518657684326172, 13.491294860839844, 9.885887145996094, -0.8732032775878906, 9.780708312988281, 1.7282867431640625, 6.938560485839844, 6.141889572143555, 1.403289794921875, 17.465423583984375, -1.9319610595703125, -4.6267242431640625, 4.101776123046875, -5.86529541015625, 0.38983154296875, -1.2568092346191406, 4.91876220703125, 20.442684173583984, 4.4320220947265625, -3.586669921875, 0.1689300537109375, 8.129352569580078, -4.220710754394531, -1.2039985656738281, 7.870796203613281, -2.9211483001708984, 15.988136291503906, -1.4007835388183594, 6.1181793212890625, 8.66162109375, 2.275064468383789, 1.28558349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000166.npy"}
|
|
{"epoch": 0.2509448223733938, "step": 167, "batch_size": 64, "mean": 5.751662731170654, "std": 6.467663288116455, "min": -7.852638244628906, "p10": -1.889270019531249, "median": 4.217382431030273, "p90": 14.180094146728518, "max": 19.906707763671875, "pos_frac": 0.8125, "sample": [3.263153076171875, 4.147373199462891, 2.134572982788086, 5.791965484619141, -6.7437896728515625, 12.041397094726562, 9.915580749511719, 2.844696044921875, 4.02653694152832, 9.452934265136719, 10.028022766113281, 3.703266143798828, 3.7902355194091797, 0.2081756591796875, -0.2608203887939453, 0.8131141662597656, 14.388626098632812, 10.573806762695312, 11.9007568359375, -0.6859283447265625, -7.852638244628906, 2.150737762451172, 3.926980972290039, 1.6461410522460938, 4.926748275756836, 1.2695465087890625, 4.287391662597656, -0.48055076599121094, 4.091423034667969, 9.448410034179688, 2.9707489013671875, 9.354167938232422, 1.3920364379882812, 8.018386840820312, 8.894454956054688, 7.53125, 17.627052307128906, 19.906707763671875, 10.6143798828125, 11.665000915527344, 1.5082130432128906, 10.992378234863281, -2.2938003540039062, 5.422710418701172, 11.798547744750977, -4.036468505859375, -6.239295959472656, 17.23381805419922, -0.9453659057617188, 11.074821472167969, 10.354267120361328, -0.13679885864257812, 0.8984317779541016, 17.939556121826172, 10.445505142211914, 15.571784973144531, -2.8865966796875, 18.555587768554688, 9.631462097167969, 13.693519592285156, 7.084266662597656, 0.7445659637451172, 1.509979248046875, -2.5367393493652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000167.npy"}
|
|
{"epoch": 0.25245653817082386, "step": 168, "batch_size": 64, "mean": 4.1670002937316895, "std": 6.2580342292785645, "min": -6.5785675048828125, "p10": -2.7356884002685544, "median": 2.638331413269043, "p90": 13.046364212036135, "max": 16.859458923339844, "pos_frac": 0.703125, "sample": [6.141578674316406, -3.9261550903320312, 13.206939697265625, -2.560626983642578, 2.8996219635009766, 12.434066772460938, 7.6316680908203125, -2.1641387939453125, -6.5785675048828125, -5.3250732421875, 4.7665863037109375, -2.464021682739258, 3.8080215454101562, 15.940113067626953, 5.5472869873046875, 1.8872146606445312, 8.32205581665039, 3.4772186279296875, 0.9756317138671875, 6.57122802734375, 8.49920654296875, -1.2796249389648438, 1.5140495300292969, 4.010931015014648, -0.4154186248779297, 6.225856781005859, 14.531558990478516, 1.2428321838378906, -4.417640686035156, 6.199798583984375, 0.7561798095703125, 1.0651321411132812, 1.2894287109375, 11.423179626464844, 12.671688079833984, -1.056121826171875, 11.87213134765625, 0.9538955688476562, -1.9727706909179688, 1.273681640625, 1.2979259490966797, 16.859458923339844, -2.8107147216796875, 15.044525146484375, 11.989280700683594, -0.9967041015625, 9.46026611328125, -2.5291595458984375, 2.8656463623046875, -1.520751953125, 7.3057861328125, 2.2943191528320312, 11.866130828857422, 2.4110164642333984, -2.0850143432617188, 14.838888168334961, -3.3093185424804688, -5.3663330078125, -0.13130569458007812, 0.8162498474121094, 11.336601257324219, 15.945701599121094, 11.917083740234375, 4.2098236083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000168.npy"}
|
|
{"epoch": 0.25396825396825395, "step": 169, "batch_size": 64, "mean": 5.4780592918396, "std": 8.399150848388672, "min": -13.011619567871094, "p10": -3.1700231552124016, "median": 4.677621841430664, "p90": 16.56385879516602, "max": 32.64540100097656, "pos_frac": 0.765625, "sample": [5.700447082519531, 12.611732482910156, 12.685256958007812, -5.590045928955078, 5.503744125366211, 2.2259178161621094, -3.552600860595703, 0.12295913696289062, 8.609210968017578, 7.939117431640625, 2.8823928833007812, 0.5516014099121094, 10.56512451171875, 15.86358642578125, 1.2841796875, -2.049283981323242, 4.883892059326172, 4.9209136962890625, 8.630897521972656, -0.20919036865234375, -1.8170318603515625, 15.254058837890625, 16.863975524902344, 6.780860900878906, -1.871429443359375, 28.96703338623047, 2.47515869140625, 5.6375274658203125, -1.2177314758300781, 7.283817291259766, 3.3623046875, 2.330169677734375, 12.03830337524414, -2.277341842651367, 11.653200149536133, 2.9778079986572266, 10.321401596069336, -13.011619567871094, 5.185541152954102, 3.5045318603515625, 0.5507583618164062, 4.471351623535156, 2.5594024658203125, 1.3471183776855469, 6.763683319091797, 3.708038330078125, 5.535198211669922, 1.48260498046875, 18.782272338867188, 10.594253540039062, -1.3175811767578125, 7.309928894042969, 6.7559661865234375, -9.475540161132812, -5.4310760498046875, 32.64540100097656, 17.280288696289062, 18.885108947753906, -9.05156135559082, 21.408466339111328, 3.0067291259765625, 11.152433395385742, -6.14031982421875, -0.24752044677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000169.npy"}
|
|
{"epoch": 0.25547996976568405, "step": 170, "batch_size": 64, "mean": 5.075137138366699, "std": 8.380029678344727, "min": -19.414411544799805, "p10": -7.348395538330078, "median": 5.523815155029297, "p90": 15.074181365966798, "max": 23.128067016601562, "pos_frac": 0.765625, "sample": [10.098419189453125, 1.3225860595703125, 5.318315505981445, 5.231529235839844, 3.3892078399658203, 21.10296630859375, 4.931737899780273, 6.196920394897461, 3.0876998901367188, 18.359582901000977, -8.44565200805664, 2.0470237731933594, 10.264389038085938, 21.695281982421875, 16.236377716064453, -2.960664749145508, 3.136892318725586, 8.860084533691406, 10.435371398925781, -1.606292724609375, 1.1188545227050781, 13.396186828613281, 7.20414924621582, 0.7143154144287109, -7.121437072753906, 5.759071350097656, -8.804420471191406, -2.5921783447265625, -19.414411544799805, 8.922393798828125, 10.918270111083984, 8.637466430664062, 13.553749084472656, 5.729314804077148, -10.443510055541992, 5.303382873535156, -7.4456634521484375, 8.348495483398438, 15.20245361328125, -0.7860336303710938, 7.282951354980469, -0.4500255584716797, 0.0760955810546875, 7.601593017578125, 6.606243133544922, 23.128067016601562, 7.63916015625, 14.774879455566406, 5.207874298095703, 0.6626434326171875, -1.0708541870117188, -3.5585689544677734, 1.4614944458007812, 17.36243438720703, -7.937774658203125, 1.9813270568847656, 10.331382751464844, 14.486076354980469, 0.5380210876464844, 6.98895263671875, 12.21688461303711, 13.113578796386719, 8.846817016601562, -9.382682800292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000170.npy"}
|
|
{"epoch": 0.25699168556311414, "step": 171, "batch_size": 64, "mean": 5.8722052574157715, "std": 9.227867126464844, "min": -20.25531005859375, "p10": -3.2963068008422844, "median": 4.131870269775391, "p90": 19.588855361938478, "max": 23.062908172607422, "pos_frac": 0.734375, "sample": [5.7041778564453125, 7.438732147216797, 16.473129272460938, 9.744834899902344, 10.81597900390625, 21.388408660888672, 9.200862884521484, 16.238563537597656, 0.3451805114746094, 2.9690704345703125, -0.6815814971923828, 2.3516998291015625, 19.908382415771484, 9.395214080810547, 23.062908172607422, -0.33959197998046875, 13.208696365356445, 3.4295883178710938, 14.130464553833008, -1.3885040283203125, -0.2747211456298828, 3.2669811248779297, 7.042026519775391, -3.92681884765625, 14.991020202636719, 0.3509178161621094, 3.486846923828125, 0.810546875, 20.408180236816406, -16.079988479614258, 8.000129699707031, 9.261138916015625, 21.264625549316406, -2.7907238006591797, 8.132682800292969, 14.794147491455078, -4.018424987792969, -7.002647399902344, 22.015335083007812, 18.843292236328125, -20.25531005859375, 1.7702198028564453, 11.995651245117188, 3.0080108642578125, 2.20635986328125, 14.617843627929688, 11.3731689453125, -0.34276580810546875, -1.6958465576171875, 8.837760925292969, 4.776893615722656, 20.070030212402344, 13.154657363891602, 1.2575149536132812, -1.7603836059570312, -3.5129852294921875, 8.22479248046875, 0.04548454284667969, -0.8721046447753906, -14.663726806640625, 13.061248779296875, 0.5101509094238281, 2.5634765625, -0.5197601318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000171.npy"}
|
|
{"epoch": 0.2585034013605442, "step": 172, "batch_size": 64, "mean": 5.705615043640137, "std": 8.182978630065918, "min": -19.312091827392578, "p10": -2.243919754028319, "median": 5.523968696594238, "p90": 16.02390651702881, "max": 23.304471969604492, "pos_frac": 0.78125, "sample": [-6.7440185546875, 6.915519714355469, 1.3289794921875, 4.008186340332031, 2.5049514770507812, 2.127542495727539, 0.10079574584960938, 8.887962341308594, 11.7237548828125, 15.807069778442383, 22.7744140625, 5.882232666015625, 4.240638732910156, -0.5812263488769531, 11.050498962402344, 23.304471969604492, 9.1781005859375, -8.587371826171875, -0.9953079223632812, 6.409278869628906, 6.909111022949219, 2.3775253295898438, -9.278568267822266, 1.4177017211914062, 5.221260070800781, 5.826677322387695, 3.72357177734375, 11.246490478515625, 9.126800537109375, -0.0950775146484375, 6.767200469970703, 11.929519653320312, 7.7570343017578125, 11.549888610839844, 0.80810546875, -1.0680503845214844, 17.528751373291016, 10.226066589355469, 15.229606628417969, 1.3851547241210938, -0.37795066833496094, 6.5562744140625, 6.10809326171875, 14.777139663696289, 14.146232604980469, -9.6383056640625, 1.706817626953125, -0.7458953857421875, 2.9807281494140625, 9.053747177124023, -5.6741943359375, 16.116836547851562, 17.360626220703125, -19.312091827392578, 2.534879684448242, 13.722702026367188, 0.7368240356445312, 3.9887008666992188, 1.3698959350585938, 18.682205200195312, -2.74786376953125, -0.10805702209472656, 21.02861785888672, 14.968185424804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000172.npy"}
|
|
{"epoch": 0.2600151171579743, "step": 173, "batch_size": 64, "mean": 6.169720649719238, "std": 10.901055335998535, "min": -15.971704483032227, "p10": -7.188521194458007, "median": 6.976128578186035, "p90": 22.37185287475586, "max": 27.493515014648438, "pos_frac": 0.6875, "sample": [-10.624788284301758, 2.146148681640625, -1.7933578491210938, 7.396492004394531, 7.2921142578125, 18.31103515625, 9.712156295776367, -1.5078125, -1.3121757507324219, 12.7679443359375, 2.6420745849609375, 7.3722076416015625, 18.963783264160156, -10.889785766601562, -5.836725234985352, 22.36621856689453, 22.837387084960938, 2.4009475708007812, 7.073728561401367, 2.592212677001953, 11.774154663085938, 23.767356872558594, -4.505191802978516, 22.374267578125, -15.971704483032227, -14.162612915039062, 9.713905334472656, 12.729057312011719, 2.7429542541503906, 1.3159561157226562, -2.711780548095703, 22.948883056640625, -5.057071685791016, 20.912376403808594, 8.250457763671875, 2.9595375061035156, 16.79977035522461, -4.856204986572266, 15.642356872558594, 15.597518920898438, 7.664329528808594, 2.7625350952148438, 3.0915279388427734, 16.806068420410156, 6.878528594970703, -15.287307739257812, -2.6141929626464844, -3.7073974609375, 27.493515014648438, 15.768424987792969, 23.4754695892334, -10.413930892944336, 7.077964782714844, 10.168174743652344, 20.309940338134766, -2.7748031616210938, -1.2276458740234375, -7.767862319946289, 7.512834548950195, 23.80670166015625, 1.1859207153320312, 12.491565704345703, -1.5648651123046875, 1.552825927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000173.npy"}
|
|
{"epoch": 0.2615268329554044, "step": 174, "batch_size": 64, "mean": 6.442006587982178, "std": 9.370983123779297, "min": -15.447860717773438, "p10": -3.9543733596801753, "median": 5.040678024291992, "p90": 19.570063781738284, "max": 27.812606811523438, "pos_frac": 0.765625, "sample": [5.05841064453125, 10.46649169921875, 15.950881958007812, 4.397970199584961, 8.16248893737793, -3.3852081298828125, 2.4430313110351562, 12.239013671875, -0.9696121215820312, 5.962825775146484, 19.050796508789062, 16.64043426513672, 2.2692413330078125, -0.38349151611328125, 12.6610107421875, -4.8124542236328125, 16.709848403930664, 11.700828552246094, 9.554267883300781, -14.609371185302734, 19.831504821777344, 7.2341461181640625, -2.456188201904297, 10.975364685058594, 26.716819763183594, -1.383321762084961, 5.5438232421875, -2.0306243896484375, 24.788414001464844, 2.598052978515625, 0.20839691162109375, 27.812606811523438, 24.612741470336914, -4.198301315307617, 9.235240936279297, 11.480255126953125, 3.5800399780273438, 8.935134887695312, 1.9661750793457031, 19.779312133789062, -2.8721160888671875, 19.081817626953125, 5.022945404052734, 0.3260078430175781, 0.20343780517578125, 13.573394775390625, 7.181037902832031, -5.081672668457031, 20.432785034179688, 4.530057907104492, -15.447860717773438, 14.073188781738281, 2.652240753173828, 1.4139137268066406, 10.934005737304688, -3.1805648803710938, 7.7787322998046875, -5.5743255615234375, 11.036277770996094, -8.623794555664062, 1.3102092742919922, 3.7966995239257812, 4.992256164550781, 0.39275360107421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000174.npy"}
|
|
{"epoch": 0.26303854875283444, "step": 175, "batch_size": 64, "mean": 7.101997375488281, "std": 10.265158653259277, "min": -16.456634521484375, "p10": -4.5735229492187495, "median": 6.497661590576172, "p90": 22.154073524475105, "max": 27.4835205078125, "pos_frac": 0.703125, "sample": [-7.815668106079102, 12.726320266723633, -3.0418853759765625, 3.201631546020508, -3.3444747924804688, 16.503028869628906, -0.3535137176513672, 13.361572265625, 8.237716674804688, -1.2675857543945312, 1.6260299682617188, 19.546356201171875, 7.290702819824219, 9.243087768554688, -3.485698699951172, 22.929426193237305, 6.4715728759765625, 17.888389587402344, 14.87432861328125, 1.45562744140625, 5.1911468505859375, 4.03108024597168, 4.799659729003906, 13.982864379882812, 14.797225952148438, 6.951362609863281, -0.29895782470703125, -4.250450134277344, 24.370256423950195, -16.456634521484375, 19.426834106445312, 0.08306121826171875, 6.646232604980469, 2.8729248046875, -0.071990966796875, -0.11118698120117188, 7.402843475341797, 25.187999725341797, -0.9458522796630859, -11.362480163574219, 8.036781311035156, -4.475982666015625, 1.71240234375, 6.523750305175781, -5.0765838623046875, -2.7498626708984375, 17.80304718017578, -7.115081787109375, 16.53026580810547, 20.34491729736328, 9.259254455566406, 20.07197380065918, 0.7830047607421875, -6.151008605957031, 2.7283706665039062, 16.94603729248047, -4.615325927734375, 25.45270538330078, 11.953056335449219, 27.4835205078125, 1.4373703002929688, 22.987274169921875, 11.422897338867188, 24.942134857177734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000175.npy"}
|
|
{"epoch": 0.26455026455026454, "step": 176, "batch_size": 64, "mean": 4.886781692504883, "std": 10.200183868408203, "min": -17.678512573242188, "p10": -6.892959594726562, "median": 3.901974678039551, "p90": 16.441704177856447, "max": 33.3369140625, "pos_frac": 0.65625, "sample": [25.44298553466797, -13.880134582519531, 9.771812438964844, 13.371665954589844, 0.700531005859375, 3.6399078369140625, -1.8227005004882812, 10.373573303222656, 4.164041519165039, 2.9622726440429688, -13.082138061523438, -7.316001892089844, -5.778774261474609, 9.665634155273438, -10.064712524414062, 17.98446273803711, -0.22802352905273438, 13.624340057373047, -3.0479736328125, -0.6033515930175781, -2.6146621704101562, -3.0546226501464844, 4.260480880737305, 27.931377410888672, 8.98822021484375, 5.785053253173828, -5.905860900878906, 9.411354064941406, 0.9940338134765625, 13.406455993652344, 10.737350463867188, -1.2951812744140625, -1.7977676391601562, 15.547752380371094, 9.691200256347656, 25.317974090576172, 1.8040924072265625, 11.011581420898438, 1.8205947875976562, -8.914939880371094, 1.7611846923828125, 2.5518798828125, 8.044256210327148, 16.69027328491211, 6.3407745361328125, 10.258647918701172, -2.6649932861328125, -4.4547882080078125, 9.991043090820312, 20.84467315673828, -0.2776336669921875, 14.396812438964844, 1.7841987609863281, 11.478256225585938, 1.9209060668945312, 6.72282600402832, 33.3369140625, -0.48050689697265625, -2.7950897216796875, 15.861709594726562, 7.162788391113281, -17.678512573242188, 6.5546722412109375, -13.598175048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000176.npy"}
|
|
{"epoch": 0.2660619803476946, "step": 177, "batch_size": 64, "mean": 9.514006614685059, "std": 9.928008079528809, "min": -17.8192138671875, "p10": -1.2208543777465817, "median": 7.665569305419922, "p90": 23.8150339126587, "max": 30.632171630859375, "pos_frac": 0.84375, "sample": [3.5277099609375, -0.015207290649414062, 15.076864242553711, 30.0640869140625, 15.926727294921875, -0.9222774505615234, 4.140289306640625, 10.631271362304688, 26.543563842773438, 8.590011596679688, 30.632171630859375, 20.388320922851562, 19.387983322143555, -1.4395561218261719, 7.138378143310547, -7.212276458740234, 24.603591918945312, 3.617889404296875, -3.098550796508789, 10.947793960571289, 8.329681396484375, 7.582244873046875, 3.3975563049316406, 19.136083602905273, 13.688032150268555, 1.4157028198242188, 6.248540878295898, 18.813674926757812, -2.891660690307617, 3.8455963134765625, 11.057769775390625, 5.946983337402344, 24.984283447265625, 8.129791259765625, 1.3726749420166016, 18.04116439819336, 21.975065231323242, 7.649681091308594, 5.05426025390625, 14.588485717773438, 3.3723602294921875, 5.96556282043457, 14.181339263916016, 3.4873809814453125, 14.698684692382812, -9.672431945800781, 1.3701019287109375, -1.34881591796875, 20.037097930908203, 0.4099388122558594, 20.207244873046875, 0.1408367156982422, -17.8192138671875, 27.954113006591797, 7.68145751953125, 15.021156311035156, 9.609199523925781, 25.046188354492188, 14.48468017578125, 7.003467559814453, 6.886173248291016, 17.983661651611328, -0.177642822265625, 5.479499816894531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000177.npy"}
|
|
{"epoch": 0.2675736961451247, "step": 178, "batch_size": 64, "mean": 5.804503440856934, "std": 9.458831787109375, "min": -16.04193115234375, "p10": -5.166732215881346, "median": 4.624468803405762, "p90": 19.070971298217778, "max": 24.871204376220703, "pos_frac": 0.765625, "sample": [4.549596786499023, 15.907516479492188, 5.860088348388672, 0.72967529296875, 14.526527404785156, 0.6860256195068359, -1.81976318359375, 9.780845642089844, 15.712493896484375, 9.44875717163086, -5.5960693359375, 3.5447998046875, 4.796180725097656, -11.531717300415039, 7.11090087890625, 6.357631683349609, 22.098094940185547, -0.4488391876220703, -2.6787643432617188, -0.30547332763671875, 22.68350601196289, 2.342233657836914, 6.6112060546875, -14.576507568359375, 3.4940719604492188, 2.5107879638671875, 2.63134765625, 8.80194091796875, 3.3439407348632812, 3.9692859649658203, 12.507720947265625, 0.10503959655761719, 2.8248062133789062, 24.871204376220703, 4.334625244140625, 9.923919677734375, 4.6993408203125, 3.8152618408203125, 24.63129425048828, 3.342864990234375, -8.660202026367188, 17.260520935058594, 18.43901824951172, 7.0942840576171875, -16.04193115234375, 19.341808319091797, -1.306549072265625, -8.849998474121094, 20.13360595703125, 5.08613395690918, 18.058982849121094, 23.317794799804688, 7.450345993041992, 10.929420471191406, 3.8802127838134766, 2.0177383422851562, 15.447097778320312, -1.8488388061523438, -12.648841857910156, -3.5049057006835938, 8.642768859863281, 8.042182922363281, 11.776100158691406, -4.164945602416992], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000178.npy"}
|
|
{"epoch": 0.2690854119425548, "step": 179, "batch_size": 64, "mean": 6.668338775634766, "std": 11.185423851013184, "min": -23.26679229736328, "p10": -7.262783813476561, "median": 6.461328506469727, "p90": 18.9242567062378, "max": 34.25468444824219, "pos_frac": 0.71875, "sample": [10.082649230957031, -2.6887664794921875, -1.7632293701171875, 7.21246337890625, -0.1464996337890625, 9.857734680175781, 8.498031616210938, 8.90283203125, 24.340240478515625, 11.914257049560547, 17.55019187927246, 19.513141632080078, 14.874458312988281, -12.778984069824219, 8.371585845947266, -7.885856628417969, -23.26679229736328, -7.617591857910156, 2.7075119018554688, 5.775108337402344, -3.223268508911133, 17.11345672607422, 5.1631317138671875, 16.680084228515625, 20.653182983398438, -13.252532958984375, -2.0507125854492188, 16.71068572998047, 3.870288848876953, 5.468027114868164, 7.147548675537109, 14.449729919433594, 2.079448699951172, -3.986846923828125, 2.1773223876953125, -8.911693572998047, 3.95025634765625, 34.25468444824219, 16.411415100097656, -2.8785133361816406, 14.393579483032227, 12.460289001464844, 13.575601577758789, 9.425281524658203, -6.434898376464844, 16.693145751953125, 22.932357788085938, 4.5049896240234375, 24.468017578125, 34.00914001464844, 11.630718231201172, 11.914987564086914, 17.046951293945312, 2.2342987060546875, -1.8593788146972656, 13.458786010742188, 3.1250076293945312, 0.701995849609375, 5.3046722412109375, -2.6884002685546875, -14.70576286315918, 0.19989395141601562, 13.851608276367188, -4.747381210327148], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000179.npy"}
|
|
{"epoch": 0.2705971277399849, "step": 180, "batch_size": 64, "mean": 6.027800559997559, "std": 11.671248435974121, "min": -20.33484649658203, "p10": -8.181195831298828, "median": 4.31048583984375, "p90": 23.734141540527343, "max": 30.75640106201172, "pos_frac": 0.734375, "sample": [17.215530395507812, 0.19205093383789062, 8.046384811401367, 1.8155593872070312, 3.3237762451171875, 11.437835693359375, -12.135101318359375, 9.570255279541016, -0.1746692657470703, -3.3174304962158203, 22.978517532348633, 11.3271484375, -5.934967041015625, -0.39046287536621094, 8.837661743164062, 0.7199554443359375, -8.427978515625, 20.772384643554688, 0.5643501281738281, -17.377304077148438, 25.26531982421875, 7.1393280029296875, 5.881599426269531, 11.905242919921875, 10.76715087890625, 15.477396011352539, 0.2917823791503906, 1.5005264282226562, 5.571123123168945, 7.666683197021484, 8.868690490722656, 20.49724006652832, -9.276412963867188, 1.1227874755859375, 8.102977752685547, 12.503860473632812, 1.9650402069091797, 26.53868865966797, -0.026580810546875, -2.371601104736328, 3.746074676513672, 14.840803146362305, 24.541536331176758, -0.5268001556396484, -4.747611999511719, 2.6990528106689453, 29.118804931640625, -1.8032417297363281, 30.75640106201172, 0.5754604339599609, 23.518051147460938, -15.200767517089844, 2.2992935180664062, -7.605369567871094, 3.1910247802734375, 4.874897003173828, 28.87207794189453, -20.33484649658203, 7.652523040771484, -15.414772033691406, 13.053695678710938, 6.411336898803711, 23.826751708984375, 3.0005416870117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000180.npy"}
|
|
{"epoch": 0.272108843537415, "step": 181, "batch_size": 64, "mean": 8.945984840393066, "std": 9.672395706176758, "min": -11.57647705078125, "p10": -2.02423858642578, "median": 8.503973007202148, "p90": 22.3006404876709, "max": 32.123497009277344, "pos_frac": 0.828125, "sample": [9.171382904052734, 15.225433349609375, -0.0737457275390625, -0.2590656280517578, 6.107265472412109, 2.2989349365234375, 9.537445068359375, -11.076507568359375, 3.581279754638672, 1.818115234375, 14.302764892578125, 4.783931732177734, 8.265674591064453, -5.1100311279296875, 15.368701934814453, 9.177780151367188, 14.361625671386719, 8.742271423339844, 21.891517639160156, -4.2026214599609375, 9.890655517578125, 23.636993408203125, 17.44534683227539, 12.663063049316406, 29.371177673339844, 12.588150024414062, 5.566947937011719, 32.123497009277344, 1.3329925537109375, -11.57647705078125, 16.74315643310547, 3.9867935180664062, 25.807586669921875, -6.946453094482422, 18.510940551757812, 1.5023994445800781, -0.68914794921875, 24.816879272460938, 1.8536529541015625, 2.3549423217773438, 17.32666015625, 3.8774261474609375, 11.945198059082031, 20.06867218017578, 17.790889739990234, 2.2149829864501953, 1.2321720123291016, 7.464399337768555, 9.338623046875, 1.7584381103515625, 3.0201034545898438, 22.47597885131836, 28.051055908203125, 5.728752136230469, 11.167999267578125, 18.779542922973633, 4.859375, 8.203842163085938, 10.249454498291016, -2.915576934814453, 10.647109985351562, -0.6862583160400391, 17.645362854003906, -2.5964202880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000181.npy"}
|
|
{"epoch": 0.273620559334845, "step": 182, "batch_size": 64, "mean": 9.865279197692871, "std": 12.85766887664795, "min": -25.405189514160156, "p10": -4.687781524658202, "median": 8.285823822021484, "p90": 26.1975341796875, "max": 32.551910400390625, "pos_frac": 0.734375, "sample": [31.979965209960938, -7.412927627563477, -5.846954345703125, 11.798698425292969, 19.697898864746094, 17.94586944580078, 14.141929626464844, -6.169059753417969, -5.120384216308594, 29.923492431640625, 13.34420394897461, 6.1007843017578125, 32.551910400390625, -0.4798126220703125, 22.560028076171875, 12.98748779296875, 10.001869201660156, 6.363075256347656, 25.652385711669922, 8.887474060058594, 12.53205680847168, 0.6873836517333984, 2.70880126953125, -0.079315185546875, 15.13848876953125, -1.2027587890625, 23.545095443725586, 7.619001388549805, 25.3851318359375, 25.569849014282227, 31.968612670898438, 7.684173583984375, 11.231552124023438, 5.182712554931641, 26.3621826171875, -2.79412841796875, 0.3817901611328125, 5.205421447753906, 24.249420166015625, -0.3490886688232422, 7.4335784912109375, -16.49245262145996, 28.780677795410156, -3.47674560546875, 23.609630584716797, 16.797042846679688, 4.772024154663086, -2.5256500244140625, 21.032760620117188, 5.824958801269531, 14.562446594238281, 25.8133544921875, -3.678375244140625, 23.464475631713867, 28.42072296142578, 4.158241271972656, -2.6434783935546875, 9.928594589233398, 17.659366607666016, 2.00921630859375, 3.8032798767089844, -11.733070373535156, -0.6718273162841797, -25.405189514160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000182.npy"}
|
|
{"epoch": 0.2751322751322751, "step": 183, "batch_size": 64, "mean": 3.947441816329956, "std": 10.665178298950195, "min": -27.910873413085938, "p10": -9.36432342529297, "median": 2.4763975143432617, "p90": 19.857604980468754, "max": 23.14997100830078, "pos_frac": 0.65625, "sample": [2.498626708984375, 21.478042602539062, -0.222900390625, -0.5536079406738281, 3.725147247314453, -2.1338043212890625, -9.242889404296875, 23.14997100830078, -10.365165710449219, -1.7611846923828125, -27.910873413085938, 0.2293243408203125, 14.676753997802734, -4.2338714599609375, 10.265121459960938, 17.899293899536133, -0.7143821716308594, 10.212165832519531, 15.620407104492188, 12.027994155883789, 10.621238708496094, -5.7716827392578125, 18.290313720703125, -7.355241775512695, 1.5337486267089844, -3.2652740478515625, 8.135086059570312, 0.8923873901367188, -12.992080688476562, -9.794265747070312, 22.14459228515625, 8.12506103515625, 1.9189529418945312, 1.387359619140625, -10.064361572265625, 9.384124755859375, 7.543769836425781, 22.717086791992188, -18.3924560546875, 22.668277740478516, -8.70760726928711, 0.8071975708007812, 21.942611694335938, -0.9490928649902344, 1.8439769744873047, 6.813024520874023, 13.454584121704102, 3.3178958892822266, -9.416366577148438, -1.1014118194580078, 2.6924362182617188, 8.613067626953125, 0.6405277252197266, 2.984405517578125, 18.574264526367188, -0.241241455078125, 7.0759735107421875, -6.738990783691406, 2.4541683197021484, 2.0669631958007812, 4.234535217285156, 20.407608032226562, 12.448162078857422, 7.04876708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000183.npy"}
|
|
{"epoch": 0.2766439909297052, "step": 184, "batch_size": 64, "mean": 7.64611291885376, "std": 10.03942584991455, "min": -7.361515045166016, "p10": -3.142018890380858, "median": 5.254756927490234, "p90": 23.79722404479981, "max": 28.167327880859375, "pos_frac": 0.765625, "sample": [1.7367401123046875, 18.83165740966797, 24.181819915771484, 1.5144691467285156, 20.38367462158203, 6.71661376953125, -1.1790008544921875, -1.9590225219726562, -1.578857421875, 3.9765548706054688, -0.019041061401367188, 25.59331512451172, 0.47571563720703125, 5.704078674316406, 18.158164978027344, -6.161705017089844, 18.92919158935547, 13.8018798828125, 7.156116485595703, 28.167327880859375, -7.1422576904296875, 27.638063430786133, 12.497299194335938, 27.75479507446289, -6.996742248535156, 21.1219482421875, 2.205850601196289, 12.667037963867188, 24.693939208984375, 2.1034317016601562, 12.467826843261719, 1.1516189575195312, 7.54876708984375, -7.361515045166016, 0.11391067504882812, 6.564720153808594, 26.112586975097656, 6.677961349487305, 1.0303573608398438, 19.58354949951172, 8.467367172241211, 3.0311279296875, 16.653961181640625, -0.6316719055175781, 17.066139221191406, 6.408576965332031, 5.5999298095703125, 5.4606170654296875, 0.14719581604003906, 22.89983367919922, -6.931760787963867, -1.2887191772460938, 4.663818359375, 4.2158050537109375, 12.94708251953125, 15.206466674804688, -6.56450080871582, 4.712646484375, -0.23207664489746094, 5.048896789550781, -3.649017333984375, 1.7260894775390625, -1.7273483276367188, 1.2579269409179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000184.npy"}
|
|
{"epoch": 0.2781557067271353, "step": 185, "batch_size": 64, "mean": 8.02737045288086, "std": 14.166238784790039, "min": -23.05743408203125, "p10": -8.200965118408202, "median": 7.583366394042969, "p90": 26.65047779083252, "max": 34.96009826660156, "pos_frac": 0.671875, "sample": [4.007373809814453, 31.441497802734375, -2.4087677001953125, 19.044450759887695, 32.97633361816406, -20.462017059326172, 12.155147552490234, 5.383464813232422, -7.0743408203125, 2.9729156494140625, 30.039749145507812, 29.856948852539062, -1.046234130859375, -6.1257171630859375, 29.3629150390625, 3.710968017578125, 34.96009826660156, 6.4729156494140625, 24.138980865478516, 24.686519622802734, -4.739112854003906, 26.952362060546875, 13.814727783203125, 6.87945556640625, 1.9897537231445312, 18.972209930419922, 11.218864440917969, 12.316278457641602, 6.8472442626953125, 15.953506469726562, -8.466924667358398, -2.1553573608398438, 12.835079193115234, 22.820526123046875, -23.05743408203125, 18.645729064941406, 18.949203491210938, 12.932992935180664, 13.395671844482422, -3.870332717895508, -16.571929931640625, 10.22857666015625, -4.175865173339844, 11.996078491210938, 21.341224670410156, -14.831497192382812, 22.7192325592041, -2.711587905883789, 20.963958740234375, -4.377712249755859, 3.8818817138671875, -14.078134536743164, 8.287277221679688, -6.149501800537109, 2.8333740234375, 8.90589714050293, -14.590816497802734, -7.580392837524414, 15.10948371887207, -3.572784423828125, -4.487369537353516, 4.555538177490234, 23.783039093017578, 25.946081161499023], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000185.npy"}
|
|
{"epoch": 0.2796674225245654, "step": 186, "batch_size": 64, "mean": 9.063056945800781, "std": 13.934813499450684, "min": -22.371826171875, "p10": -6.599268341064453, "median": 8.082980155944824, "p90": 27.665299034118654, "max": 37.81634521484375, "pos_frac": 0.71875, "sample": [19.899688720703125, 33.06158447265625, -18.88683319091797, 9.49271011352539, 13.782478332519531, -6.788299560546875, 11.14202880859375, -12.847640991210938, -6.158195495605469, 21.50714874267578, 13.25177001953125, -5.570949554443359, 12.311691284179688, 20.25324249267578, 37.81634521484375, 22.65140151977539, 6.901458740234375, 7.444099426269531, 5.128925323486328, -17.821060180664062, 4.331264495849609, 15.552061080932617, 19.99510383605957, 32.739219665527344, -22.371826171875, -4.969146728515625, -5.798866271972656, -4.720684051513672, -2.1954498291015625, 3.4082984924316406, 7.616813659667969, 24.167898178100586, 0.750946044921875, -1.944488525390625, -3.660184860229492, -1.8893604278564453, 12.065818786621094, 27.00719451904297, 22.607742309570312, 3.362335205078125, -2.1883316040039062, -11.102935791015625, 3.0709495544433594, 4.060455322265625, 15.594345092773438, 7.571678161621094, 27.947343826293945, 21.038528442382812, -8.369155883789062, 7.831398010253906, 35.44740295410156, 9.288909912109375, 7.356925964355469, 36.274993896484375, 10.6165771484375, 11.430976867675781, 35.59600830078125, 19.679685592651367, 16.375778198242188, 13.725473403930664, 6.0511322021484375, 13.68316650390625, -1.9065093994140625, 8.334562301635742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000186.npy"}
|
|
{"epoch": 0.2811791383219955, "step": 187, "batch_size": 64, "mean": 8.344643592834473, "std": 13.781630516052246, "min": -25.712968826293945, "p10": -9.677745819091795, "median": 7.918708801269531, "p90": 26.22697677612305, "max": 35.96466064453125, "pos_frac": 0.703125, "sample": [14.364875793457031, -3.75921630859375, 21.69073486328125, 30.524375915527344, 35.96466064453125, 18.848411560058594, 11.032295227050781, 5.652416229248047, 10.032363891601562, 25.42526626586914, -20.084800720214844, 8.094802856445312, -1.1397819519042969, 11.485733032226562, 15.079086303710938, 1.8909626007080078, 1.520620346069336, 24.585182189941406, 15.807754516601562, 8.60572624206543, -12.087860107421875, -25.712968826293945, 25.45417022705078, -0.901092529296875, 3.9417572021484375, -1.2646255493164062, 1.2709465026855469, 0.53662109375, 1.51885986328125, 23.74506378173828, 21.880088806152344, 26.953598022460938, -14.594711303710938, -3.1009178161621094, 11.973419189453125, 7.3883209228515625, 5.913993835449219, 1.4698638916015625, -5.521568298339844, 16.418041229248047, -2.0280723571777344, -10.107608795166016, 23.914886474609375, -0.5071582794189453, 7.74261474609375, 14.795661926269531, 28.417633056640625, -2.901519775390625, 9.873641967773438, 19.009979248046875, 26.47144317626953, -8.774787902832031, 18.677642822265625, 33.711669921875, 4.343238830566406, -3.1030654907226562, 25.65655517578125, 27.80791473388672, 3.522918701171875, 10.57086181640625, -2.4464492797851562, -16.277156829833984, -10.064727783203125, 14.848579406738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000187.npy"}
|
|
{"epoch": 0.28269085411942557, "step": 188, "batch_size": 64, "mean": 8.842605590820312, "std": 13.333514213562012, "min": -30.24329376220703, "p10": -4.3361572265625, "median": 5.4886016845703125, "p90": 29.25789165496827, "max": 37.11951446533203, "pos_frac": 0.796875, "sample": [5.496330261230469, -2.9319591522216797, 31.773685455322266, -6.897665023803711, -0.7117691040039062, 22.614120483398438, 18.195831298828125, 7.347345352172852, 23.011383056640625, 5.1442718505859375, 2.817676544189453, 34.773284912109375, 24.92770004272461, -0.8393020629882812, 0.3055000305175781, 1.4673480987548828, 12.023773193359375, 30.045257568359375, 0.6982288360595703, -4.342437744140625, 11.58043098449707, 4.3140716552734375, 0.9508209228515625, 0.4015655517578125, 8.11800765991211, -0.5416488647460938, -5.810489654541016, 21.25157928466797, 6.616947174072266, 19.590984344482422, 13.132692337036133, 2.97503662109375, 9.429855346679688, 37.11951446533203, 25.13072967529297, 0.5327358245849609, 26.349838256835938, 7.7877960205078125, -1.3495407104492188, 7.068534851074219, 34.58198928833008, 1.87359619140625, 5.480873107910156, 9.40533447265625, 1.5939407348632812, 3.468231201171875, 3.4393692016601562, 25.194679260253906, 2.963409423828125, 0.2530841827392578, 29.960739135742188, -17.035568237304688, -4.321502685546875, 8.771312713623047, 5.371307373046875, 5.341194152832031, 7.734323501586914, -9.767736434936523, 23.508163452148438, 30.191051483154297, 27.6179141998291, 6.79949951171875, -5.823190689086914, -30.24329376220703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000188.npy"}
|
|
{"epoch": 0.2842025699168556, "step": 189, "batch_size": 64, "mean": 8.326497077941895, "std": 13.460592269897461, "min": -24.536319732666016, "p10": -8.66235427856445, "median": 7.059709548950195, "p90": 28.032454681396484, "max": 35.914772033691406, "pos_frac": 0.78125, "sample": [1.1827049255371094, 10.072181701660156, 10.077938079833984, 12.465164184570312, 9.589874267578125, 25.72955322265625, 27.708717346191406, 12.640279769897461, 0.9450778961181641, 5.2407379150390625, -20.887405395507812, 28.079334259033203, -17.615234375, 2.1642837524414062, -0.6360702514648438, -1.396158218383789, 29.566360473632812, 20.62509536743164, -13.278228759765625, 12.290912628173828, 6.727996826171875, 18.399272918701172, 6.03759765625, 9.231689453125, -0.36753082275390625, 3.0375404357910156, 18.41820526123047, 12.036338806152344, 4.592710494995117, -0.5941543579101562, 28.597000122070312, -3.9641876220703125, 3.3896751403808594, 30.541873931884766, 28.129119873046875, 7.391422271728516, 0.6567459106445312, 3.8826904296875, 2.724637985229492, 18.879661560058594, 27.92306900024414, 11.37677001953125, 14.352178573608398, 7.579204559326172, 15.805526733398438, 9.52877426147461, -5.815010070800781, -19.377981185913086, 5.675323486328125, 2.3919830322265625, -0.171600341796875, 19.08702278137207, -13.452346801757812, 35.914772033691406, 17.79852294921875, 6.001556396484375, -9.882644653320312, -24.536319732666016, 11.990653991699219, 1.2437744140625, 6.725593566894531, 1.8250808715820312, 31.76392364501953, 26.834562301635742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000189.npy"}
|
|
{"epoch": 0.2857142857142857, "step": 190, "batch_size": 64, "mean": 11.939929008483887, "std": 13.702558517456055, "min": -17.079666137695312, "p10": -4.190242576599121, "median": 10.583213806152344, "p90": 32.31653327941895, "max": 41.1427001953125, "pos_frac": 0.796875, "sample": [6.429262161254883, 24.082077026367188, 39.32862091064453, -7.2734375, 17.955142974853516, 15.15850830078125, 1.1772537231445312, 1.17474365234375, -10.13946533203125, 34.07225036621094, 11.055007934570312, 22.127197265625, 7.770801544189453, 1.4579048156738281, 7.524894714355469, 5.054210662841797, -2.1142845153808594, 32.511962890625, 7.434211730957031, 9.317914962768555, 25.501285552978516, 31.860530853271484, 1.361846923828125, 0.4750633239746094, -4.902925491333008, 10.95330810546875, -17.079666137695312, 6.942150115966797, 4.9597320556640625, 16.538307189941406, 12.467201232910156, -3.1757736206054688, 10.213119506835938, 16.31390380859375, 22.549964904785156, 1.1382808685302734, 33.16552734375, -5.788236618041992, -3.5216102600097656, 13.838634490966797, -2.25750732421875, 20.410690307617188, 21.597610473632812, 41.1427001953125, 7.996854782104492, 2.6477813720703125, -3.8283557891845703, 22.179183959960938, -11.081390380859375, -4.3453369140625, 13.337739944458008, 12.889850616455078, 3.454822540283203, 19.5284423828125, 20.12384033203125, 31.600196838378906, 37.058990478515625, 4.607025146484375, 24.6895751953125, 29.60204315185547, -0.327484130859375, 32.679656982421875, 23.936721801757812, 18.59638214111328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000190.npy"}
|
|
{"epoch": 0.2872260015117158, "step": 191, "batch_size": 64, "mean": 8.750959396362305, "std": 14.268815994262695, "min": -30.738235473632812, "p10": -4.42379035949707, "median": 9.075874328613281, "p90": 25.9707633972168, "max": 38.84971618652344, "pos_frac": 0.796875, "sample": [23.06037139892578, -2.0841407775878906, 9.208343505859375, 20.717796325683594, 36.564308166503906, 0.15319061279296875, 1.2709197998046875, 32.35948944091797, 8.654966354370117, 4.527191162109375, 2.25872802734375, 38.84971618652344, 5.374427795410156, -0.7236442565917969, 11.502685546875, 3.4007740020751953, -26.378433227539062, 4.326622009277344, 25.48211669921875, 13.967035293579102, 13.723068237304688, 17.862701416015625, 15.448219299316406, 26.18018341064453, -15.941741943359375, 24.829147338867188, 12.077011108398438, 11.509841918945312, -4.331390380859375, 0.890655517578125, 13.061447143554688, 0.5795822143554688, -2.7164669036865234, 1.4543418884277344, 31.600387573242188, -30.738235473632812, -0.13462448120117188, -12.418563842773438, 9.996589660644531, 21.592254638671875, 15.776493072509766, 14.102779388427734, 8.943405151367188, 14.691459655761719, 18.01213264465332, 2.6521968841552734, 25.33472442626953, 29.249774932861328, -4.463390350341797, 2.2185516357421875, -4.328130722045898, 6.394100189208984, 27.68267822265625, 9.780773162841797, -11.519832611083984, 13.20062255859375, 23.804550170898438, -24.285049438476562, 14.02033805847168, 1.3563766479492188, 1.7213668823242188, 22.503524780273438, 3.0672988891601562, 3.127777099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000191.npy"}
|
|
{"epoch": 0.2887377173091459, "step": 192, "batch_size": 64, "mean": 5.97373104095459, "std": 12.144250869750977, "min": -31.429115295410156, "p10": -6.447689056396483, "median": 4.286251068115234, "p90": 22.908417320251466, "max": 35.24961853027344, "pos_frac": 0.65625, "sample": [-31.429115295410156, -10.082290649414062, 0.5441093444824219, 13.786178588867188, 17.852428436279297, 2.9013214111328125, -4.119029998779297, 14.940536499023438, 28.681228637695312, -7.130706787109375, -10.258296966552734, -4.853981018066406, -3.230438232421875, 0.5804996490478516, 2.7385940551757812, 8.164993286132812, 22.284679412841797, 5.291900634765625, -1.0516586303710938, 4.15057373046875, 23.17573356628418, 25.58563232421875, -3.9232215881347656, -1.8485946655273438, 24.909454345703125, -0.09429168701171875, 5.5612945556640625, 17.118160247802734, 3.3955917358398438, 3.4542198181152344, 14.828453063964844, 19.350692749023438, 12.733087539672852, 20.301671981811523, -2.1417808532714844, -4.071247100830078, 4.421928405761719, -0.9699649810791016, -4.6655120849609375, 5.331016540527344, 5.843664169311523, -0.3756904602050781, -18.8216552734375, 35.24961853027344, 13.202651977539062, 6.913784027099609, -3.72235107421875, 2.6616439819335938, 7.153099060058594, -14.430816650390625, 7.7576446533203125, 24.592487335205078, 2.1126556396484375, 12.408615112304688, -0.9637279510498047, 16.768714904785156, 16.250743865966797, -1.511810302734375, 19.125648498535156, 25.490867614746094, 13.40878677368164, -7.804677963256836, 2.6376266479492188, 6.157390594482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000192.npy"}
|
|
{"epoch": 0.29024943310657597, "step": 193, "batch_size": 64, "mean": 11.092357635498047, "std": 13.583741188049316, "min": -27.028133392333984, "p10": -2.2758392333984374, "median": 7.837071418762207, "p90": 30.109021759033205, "max": 42.03868865966797, "pos_frac": 0.8125, "sample": [0.8111305236816406, 6.673944473266602, 21.46949577331543, 6.514984130859375, 7.440523147583008, -6.5829315185546875, 0.173309326171875, 27.457435607910156, -2.212799072265625, -1.8483142852783203, 28.12938690185547, 3.3149147033691406, 15.970481872558594, 16.85772705078125, 28.909652709960938, 0.16071319580078125, 12.7705078125, -0.39725494384765625, 17.843217849731445, 23.758987426757812, -3.04583740234375, 29.732444763183594, 42.03868865966797, -4.699642181396484, 12.606727600097656, 30.23149871826172, 2.638141632080078, 5.276878356933594, -2.3028564453125, 12.688081741333008, 28.420372009277344, 14.453773498535156, 5.071846008300781, 13.266929626464844, 11.930229187011719, 9.008003234863281, -2.109222412109375, 1.1016159057617188, 15.052772521972656, 8.233619689941406, -7.69940185546875, 3.796611785888672, 28.136127471923828, 37.24699401855469, -2.695049285888672, 4.1257476806640625, 12.64187240600586, -1.5662803649902344, 5.954761505126953, 32.84236145019531, 19.829681396484375, 0.3533477783203125, 34.0699462890625, 36.09147644042969, 29.8232421875, 6.478302001953125, 16.724105834960938, -27.028133392333984, 8.390373229980469, 30.3483943939209, 2.3429603576660156, 1.5960655212402344, 1.0998497009277344, 0.19832992553710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000193.npy"}
|
|
{"epoch": 0.29176114890400606, "step": 194, "batch_size": 64, "mean": 8.594758033752441, "std": 16.43195343017578, "min": -31.59960174560547, "p10": -13.130434417724606, "median": 9.038880348205566, "p90": 31.566800498962408, "max": 37.95732116699219, "pos_frac": 0.734375, "sample": [35.990478515625, 7.014312744140625, 17.49687957763672, -14.577468872070312, 22.29686737060547, 3.0675010681152344, -22.317720413208008, 2.358417510986328, 3.8156661987304688, 13.060562133789062, 7.476356506347656, 13.696563720703125, 7.43585205078125, 26.683761596679688, -18.357086181640625, -5.2079925537109375, 16.872230529785156, -9.754020690917969, 11.516902923583984, -2.8297157287597656, 17.635101318359375, 3.253875732421875, 20.71686553955078, 26.265777587890625, -5.749973297119141, 11.203544616699219, -6.7910919189453125, 23.053131103515625, -0.3203887939453125, 16.851112365722656, 0.7874031066894531, 29.859235763549805, -17.48675537109375, -7.366344451904297, -27.036983489990234, -1.0377197265625, 37.95732116699219, 12.0546875, 14.43558120727539, 32.75286865234375, -29.394916534423828, 11.424751281738281, 5.273906707763672, 1.9839611053466797, 16.872665405273438, 37.845130920410156, 10.601404190063477, -31.59960174560547, 35.28490447998047, 17.123016357421875, 18.479936599731445, -1.238739013671875, 17.144866943359375, 31.991182327270508, 30.576576232910156, 32.360313415527344, 10.738128662109375, 0.42421722412109375, 3.239055633544922, 22.284912109375, 4.452356338500977, 3.4380569458007812, -2.743785858154297, 6.726633071899414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000194.npy"}
|
|
{"epoch": 0.29327286470143615, "step": 195, "batch_size": 64, "mean": 11.304681777954102, "std": 15.50979995727539, "min": -19.5418701171875, "p10": -7.572175598144529, "median": 10.521846771240234, "p90": 33.93747253417969, "max": 41.51261901855469, "pos_frac": 0.703125, "sample": [3.991668701171875, 9.052009582519531, -2.340850830078125, 9.079124450683594, 37.612953186035156, 24.995269775390625, -1.0249824523925781, 10.318344116210938, 14.117202758789062, 34.73274230957031, 17.032150268554688, 25.325292587280273, -8.392982482910156, -13.236083984375, -16.00408935546875, -19.5418701171875, -1.0286369323730469, 11.385799407958984, 13.633440017700195, 29.256061553955078, -3.13739013671875, 6.642875671386719, 15.116077423095703, 20.828216552734375, 13.456520080566406, 13.134017944335938, 36.054222106933594, 35.62944030761719, 6.714820861816406, 16.714569091796875, 5.287080764770508, -1.60028076171875, 36.06916046142578, 7.229728698730469, 33.06134033203125, -0.5014114379882812, -3.8930740356445312, 31.033653259277344, 22.11309051513672, 26.83293914794922, 10.725349426269531, -5.656959533691406, -1.402862548828125, -2.4185943603515625, 0.4008636474609375, 3.6114559173583984, 3.211181640625, 32.444580078125, -18.821060180664062, -0.04941749572753906, -13.880233764648438, -0.2902374267578125, 41.51261901855469, 27.657068252563477, 21.858718872070312, 21.13039779663086, 20.100845336914062, 28.05517578125, -11.804901123046875, 34.312957763671875, 12.298343658447266, 0.9670257568359375, 5.306732177734375, 18.482458114624023], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000195.npy"}
|
|
{"epoch": 0.2947845804988662, "step": 196, "batch_size": 64, "mean": 8.952545166015625, "std": 13.805590629577637, "min": -38.12971115112305, "p10": -5.102200126647949, "median": 8.959392547607422, "p90": 28.01258087158203, "max": 37.81315612792969, "pos_frac": 0.78125, "sample": [3.6786041259765625, -38.12971115112305, -5.140972137451172, 2.0486602783203125, 11.276424407958984, -9.056913375854492, 1.3625640869140625, 5.080589294433594, 7.618194580078125, -3.4650650024414062, 4.483856201171875, 29.564971923828125, 29.71503448486328, 12.212223052978516, 9.6627197265625, 6.117393493652344, -20.895984649658203, 27.140968322753906, -0.9921836853027344, 22.04644775390625, -2.8941593170166016, 5.225467681884766, 33.397804260253906, 0.7475795745849609, -7.584423065185547, 5.221565246582031, 10.1009521484375, 8.973808288574219, 8.944976806640625, 37.81315612792969, 9.53912353515625, 20.160648345947266, 1.9041595458984375, -5.01173210144043, -3.8204421997070312, 15.674591064453125, 3.4165191650390625, 18.275976181030273, 11.663089752197266, 28.303367614746094, 15.054679870605469, 11.156707763671875, 26.416034698486328, 3.281505584716797, 22.182479858398438, -13.54404067993164, 1.0369281768798828, 9.27996826171875, 14.916648864746094, 28.195114135742188, 27.586669921875, 26.142559051513672, -2.725250244140625, -3.4368515014648438, 10.853921890258789, 20.81787109375, 16.200551986694336, 15.742080688476562, 2.088794708251953, 2.3600997924804688, 3.178844451904297, -5.155467987060547, 11.370460510253906, 35.58274841308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000196.npy"}
|
|
{"epoch": 0.2962962962962963, "step": 197, "batch_size": 64, "mean": 11.268684387207031, "std": 18.791465759277344, "min": -35.67440414428711, "p10": -15.406493949890137, "median": 9.429205894470215, "p90": 34.95525932312012, "max": 43.18360137939453, "pos_frac": 0.765625, "sample": [-4.048076629638672, 39.20433807373047, 9.50874137878418, 2.438201904296875, 9.34967041015625, 22.00135040283203, -15.462230682373047, -27.51092529296875, 1.7289886474609375, -5.396553039550781, 26.352901458740234, 34.15386199951172, -15.337631225585938, -19.64151382446289, 8.687358856201172, 35.676025390625, 18.95832061767578, 3.9086570739746094, 43.18360137939453, 35.12066650390625, 5.455390930175781, 10.631752014160156, 23.211467742919922, 18.25830078125, 12.543754577636719, 1.9756011962890625, 4.883941650390625, -15.436006546020508, 32.75068664550781, 14.965278625488281, 34.25310516357422, 36.353912353515625, -25.7408447265625, 29.79355812072754, -3.2082595825195312, -35.67440414428711, 3.5703353881835938, 14.938720703125, 30.409759521484375, 16.07269287109375, -0.2031993865966797, 32.49614715576172, 34.56930923461914, 2.0718536376953125, -2.431427001953125, 38.23736572265625, 2.3965606689453125, 0.9901046752929688, 8.064273834228516, 25.823707580566406, -10.831602096557617, -5.345497131347656, 23.348655700683594, 21.830490112304688, 3.649632453918457, -24.997047424316406, 40.691368103027344, 24.50409698486328, 2.0447921752929688, 26.897220611572266, 5.0397186279296875, 22.592498779296875, 7.833080291748047, 29.03919792175293], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000197.npy"}
|
|
{"epoch": 0.29780801209372637, "step": 198, "batch_size": 64, "mean": 12.191038131713867, "std": 14.665315628051758, "min": -23.765472412109375, "p10": -6.093245506286618, "median": 11.66331672668457, "p90": 29.89137878417969, "max": 44.97233581542969, "pos_frac": 0.765625, "sample": [-2.9269256591796875, 27.569297790527344, -0.2752513885498047, 18.43902587890625, 11.344223022460938, 11.352043151855469, -10.455537796020508, 5.500816345214844, 10.490957260131836, 6.0865936279296875, 29.045852661132812, 9.25616455078125, -0.04759025573730469, -16.908254623413086, 44.97233581542969, 36.511871337890625, 18.048980712890625, 12.9249267578125, 12.691070556640625, -3.714479446411133, 23.724937438964844, 22.911649703979492, 33.663307189941406, -9.966970443725586, 33.79728698730469, -18.437591552734375, -9.916664123535156, -0.06460952758789062, 27.30274200439453, 23.7956600189209, 25.160079956054688, 19.716285705566406, 16.073806762695312, -23.765472412109375, 11.974590301513672, -2.2773971557617188, 1.205841064453125, 2.612743377685547, -7.1127166748046875, 13.805435180664062, 5.2065582275390625, 27.492355346679688, 29.639362335205078, 16.384170532226562, 17.66141700744629, 8.4722900390625, -0.28751373291015625, 35.88050842285156, 42.16474151611328, 11.001609802246094, 6.023468017578125, 1.2760238647460938, 24.727752685546875, 29.999385833740234, 3.7235107421875, 11.232940673828125, 14.001541137695312, 14.773239135742188, -1.0843791961669922, 11.255779266357422, 14.778129577636719, 9.729637145996094, 20.995330810546875, 21.06951904296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000198.npy"}
|
|
{"epoch": 0.29931972789115646, "step": 199, "batch_size": 64, "mean": 9.460265159606934, "std": 18.56617546081543, "min": -33.82267761230469, "p10": -11.962799644470213, "median": 10.93288803100586, "p90": 31.574518966674805, "max": 44.80025863647461, "pos_frac": 0.671875, "sample": [19.13534927368164, 25.062423706054688, 3.5339202880859375, 13.04659652709961, -12.706613540649414, -25.596542358398438, 8.323631286621094, 32.181419372558594, 22.707420349121094, 8.64605712890625, 16.354995727539062, -4.709114074707031, 44.80025863647461, 23.836326599121094, 41.94260787963867, 31.48431396484375, 22.11684799194336, 29.7120361328125, 22.869369506835938, 31.613178253173828, -5.1358795166015625, -20.69083023071289, 22.96957778930664, 30.413848876953125, -6.02360725402832, -0.42821502685546875, 18.761123657226562, -8.767127990722656, 13.537178039550781, 25.583168029785156, 24.178176879882812, 3.5295352935791016, 17.069904327392578, -3.56964111328125, -0.7371129989624023, 5.0972900390625, 12.06591796875, -9.770240783691406, -23.573402404785156, 16.378814697265625, -33.82267761230469, 17.871978759765625, 3.3679428100585938, 29.384193420410156, -28.576499938964844, 9.799858093261719, 8.509407043457031, 7.04296875, -8.744644165039062, 3.1482620239257812, -0.09107398986816406, -31.689361572265625, -9.231651306152344, 19.215885162353516, -10.22723388671875, -1.9142608642578125, 32.022003173828125, -7.783973693847656, 29.877273559570312, 16.153316497802734, 37.13823699951172, 4.204437255859375, 38.14778137207031, 16.411815643310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000199.npy"}
|
|
{"epoch": 0.30083144368858655, "step": 200, "batch_size": 64, "mean": 13.507145881652832, "std": 19.24868392944336, "min": -41.61656188964844, "p10": -7.71124973297119, "median": 10.908551216125488, "p90": 41.72841415405274, "max": 44.609466552734375, "pos_frac": 0.75, "sample": [5.852069854736328, 24.717632293701172, 40.04132080078125, 6.8138275146484375, 13.3109130859375, 1.8935279846191406, 37.26869201660156, 6.906044006347656, 6.919258117675781, -3.7667694091796875, 3.3131103515625, 26.875244140625, -12.239299774169922, 42.994293212890625, 1.8818588256835938, -3.351804733276367, 7.780799865722656, -1.681427001953125, 42.451454162597656, -9.577728271484375, 1.7373504638671875, -1.0499801635742188, 22.835350036621094, 33.89499282836914, 1.4707679748535156, 21.802993774414062, 31.484479904174805, -17.281005859375, 28.220569610595703, -2.3617210388183594, 36.734535217285156, 25.06409454345703, 11.31158447265625, 43.05781555175781, 44.609466552734375, -4.498571395874023, -41.61656188964844, 27.624935150146484, 43.90592956542969, 36.41949462890625, 33.933326721191406, 22.38127899169922, 43.95832824707031, 11.271636962890625, 8.691696166992188, -29.006027221679688, 3.8213272094726562, 9.73046875, 10.545465469360352, 43.41117858886719, -3.7487411499023438, 14.465873718261719, 13.078075408935547, -8.168190002441406, 12.329795837402344, 18.679946899414062, -6.645055770874023, -9.237476348876953, 18.99768829345703, 9.577140808105469, 1.1166839599609375, -6.1206865310668945, 32.818153381347656, 36.805870056152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000200.npy"}
|
|
{"epoch": 0.30234315948601664, "step": 201, "batch_size": 64, "mean": 11.096135139465332, "std": 18.219829559326172, "min": -31.069992065429688, "p10": -14.11599769592285, "median": 9.289595603942871, "p90": 37.444243621826175, "max": 50.229652404785156, "pos_frac": 0.75, "sample": [8.780181884765625, 16.2103271484375, 12.852108001708984, 11.104598999023438, 3.8299808502197266, 13.338245391845703, 24.120433807373047, 6.697187423706055, 19.325424194335938, -0.14371490478515625, 2.7551040649414062, 25.611486434936523, 29.778676986694336, 18.480697631835938, 1.2140674591064453, -16.08819580078125, 50.229652404785156, 37.25482940673828, 0.3966217041015625, 43.53064727783203, -15.902442932128906, 3.4770545959472656, 9.677749633789062, -17.79159164428711, 1.8043994903564453, 39.33003616333008, 21.797592163085938, 15.321876525878906, -14.340923309326172, 35.62041473388672, 2.418682098388672, 37.525421142578125, 1.9370250701904297, -7.468498229980469, 27.147624969482422, 23.64840316772461, -1.1305122375488281, -17.828018188476562, 3.25421142578125, 6.0685882568359375, 29.67626190185547, 8.009754180908203, -2.6478748321533203, 21.296142578125, -6.915012359619141, 20.23236083984375, 13.07000732421875, -22.373809814453125, 9.388334274291992, 45.06852722167969, -3.3090343475341797, 9.19085693359375, 19.759098052978516, 28.181110382080078, 20.558500289916992, 42.896812438964844, 26.141372680664062, -10.789909362792969, 2.4728469848632812, -13.591171264648438, 40.52006149291992, -1.4979095458984375, 2.0398712158203125, -31.069992065429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000201.npy"}
|
|
{"epoch": 0.30385487528344673, "step": 202, "batch_size": 64, "mean": 13.410652160644531, "std": 17.968969345092773, "min": -37.52008819580078, "p10": -5.606543350219726, "median": 15.278118133544922, "p90": 32.27442321777344, "max": 54.9150390625, "pos_frac": 0.78125, "sample": [-6.24421501159668, 12.191680908203125, 47.03024673461914, 54.9150390625, 32.06226348876953, -5.103752136230469, 24.65835189819336, 24.746437072753906, 29.17047882080078, 19.590290069580078, 4.383852005004883, 24.021099090576172, 5.718257904052734, 7.110984802246094, -3.3877639770507812, 26.308212280273438, 29.231781005859375, 3.1268234252929688, 25.1666259765625, -5.822025299072266, 13.735153198242188, 13.5184326171875, 20.196823120117188, 3.8023147583007812, -4.333990097045898, 22.324386596679688, 7.088596343994141, 46.28630447387695, -0.09459686279296875, 23.77397346496582, -2.2207069396972656, 12.059667587280273, 19.158958435058594, 20.224594116210938, 4.19549560546875, -37.52008819580078, 24.156051635742188, 12.814403533935547, 26.965106964111328, -28.40985870361328, 15.109222412109375, 16.467422485351562, 22.4959716796875, 9.827705383300781, 35.045509338378906, 15.447013854980469, 32.28672790527344, 4.241113662719727, 38.6422119140625, -2.4804916381835938, 0.5643596649169922, -24.141468048095703, -1.0142669677734375, -6.814977645874023, 2.8212356567382812, 25.286529541015625, 35.86396789550781, 2.7727203369140625, 16.788501739501953, 23.03627586364746, -30.2557373046875, 28.145538330078125, 19.305204391479492, 32.24571228027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000202.npy"}
|
|
{"epoch": 0.30536659108087677, "step": 203, "batch_size": 64, "mean": 11.91786003112793, "std": 18.367298126220703, "min": -30.475250244140625, "p10": -6.8049697875976545, "median": 8.402344703674316, "p90": 36.44302978515625, "max": 55.23004150390625, "pos_frac": 0.75, "sample": [7.553352355957031, 0.677581787109375, -20.005477905273438, -3.92529296875, 3.945159912109375, 23.282691955566406, 1.1464920043945312, 20.700408935546875, 11.802764892578125, -5.279594421386719, 34.848175048828125, 13.076805114746094, 16.483957290649414, 44.867889404296875, 7.79463005065918, 55.23004150390625, 36.45933532714844, 4.924293518066406, 30.957408905029297, 17.913330078125, -30.475250244140625, -0.4388313293457031, 14.406105041503906, -3.3711318969726562, -1.0408802032470703, -10.719406127929688, 10.232437133789062, 44.66502380371094, 3.5514373779296875, 23.245628356933594, 9.288528442382812, 3.7225723266601562, 33.98548126220703, 9.010059356689453, 25.302520751953125, 1.1792144775390625, 37.94892120361328, 21.10538673400879, 36.40498352050781, 16.845693588256836, 1.2675933837890625, -1.651611328125, 4.211112976074219, 16.14410400390625, 29.166778564453125, -3.5275192260742188, 35.715248107910156, 38.165283203125, -7.458702087402344, 49.08055877685547, 23.572509765625, 2.6413650512695312, 1.4040985107421875, 6.7942962646484375, 4.3408355712890625, -22.42853546142578, -0.03936767578125, 7.3031768798828125, 30.19043731689453, 25.78547477722168, 16.501022338867188, -24.455322265625, -16.565532684326172, -0.7166728973388672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000203.npy"}
|
|
{"epoch": 0.30687830687830686, "step": 204, "batch_size": 64, "mean": 12.833279609680176, "std": 18.620126724243164, "min": -36.39906311035156, "p10": -5.84673728942871, "median": 11.283490180969238, "p90": 41.830619049072276, "max": 49.246185302734375, "pos_frac": 0.75, "sample": [45.24822998046875, 36.28865051269531, -8.22584342956543, 6.545219421386719, 9.456199645996094, 49.246185302734375, 2.4708709716796875, 20.184249877929688, -12.066307067871094, 14.26446533203125, 45.71856689453125, -26.07884979248047, -4.428241729736328, 5.592464447021484, 18.142250061035156, 12.107467651367188, 23.351119995117188, 4.837059020996094, -18.628707885742188, 0.5469818115234375, 26.620515823364258, 10.615409851074219, 28.914329528808594, 38.011390686035156, -2.6140003204345703, -4.649543762207031, 17.605743408203125, 13.601642608642578, 2.815258026123047, 6.494346618652344, 42.92084503173828, -5.690650939941406, 3.8031673431396484, 14.0189208984375, 23.188814163208008, -3.945934295654297, 21.351959228515625, 45.416683197021484, -3.966989517211914, 5.9107208251953125, 6.831947326660156, 7.175876617431641, 34.129478454589844, 36.59523010253906, -8.08563232421875, 13.381278991699219, 15.795394897460938, -1.5216388702392578, 28.674331665039062, -1.2478523254394531, 39.28675842285156, 43.005828857421875, 0.6173362731933594, 23.89824676513672, 17.7352294921875, 44.652103424072266, 24.654312133789062, 1.1165237426757812, 1.4117889404296875, 23.426177978515625, -36.39906311035156, -5.913631439208984, 11.951570510864258, -4.83636474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000204.npy"}
|
|
{"epoch": 0.30839002267573695, "step": 205, "batch_size": 64, "mean": 12.756902694702148, "std": 20.44365692138672, "min": -40.902198791503906, "p10": -8.574050903320312, "median": 11.120780944824219, "p90": 41.3978500366211, "max": 58.37207794189453, "pos_frac": 0.703125, "sample": [58.148468017578125, 7.853418350219727, -0.8964633941650391, 10.332275390625, 17.140483856201172, 51.38545608520508, 12.825582504272461, 24.45024871826172, -34.68035888671875, 38.9688720703125, 4.013219833374023, 4.867603302001953, 58.37207794189453, 17.02642822265625, 14.701173782348633, 2.7681427001953125, 11.506942749023438, -9.528121948242188, -6.72064208984375, 21.496932983398438, 9.733549118041992, 17.473281860351562, 3.632448196411133, 8.418327331542969, 25.316009521484375, -2.4255599975585938, -10.621841430664062, -2.4751815795898438, 9.244255065917969, 48.82172393798828, -4.23077392578125, -7.977750778198242, 20.6072998046875, 10.359756469726562, -0.9527587890625, 22.296295166015625, 16.22883415222168, 24.5662841796875, -1.8850250244140625, 18.573326110839844, -1.480438232421875, 36.38213348388672, 27.229248046875, 22.864334106445312, 45.12212371826172, 41.889556884765625, 1.8333206176757812, 15.953117370605469, 33.574951171875, -8.6370849609375, 16.45417022705078, 8.021308898925781, 22.67922592163086, 10.734619140625, -32.958587646484375, 40.25053405761719, 28.303943634033203, -10.378774642944336, 43.149749755859375, -8.426971435546875, -2.8761978149414062, -40.902198791503906, -1.5940475463867188, 20.51950454711914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000205.npy"}
|
|
{"epoch": 0.30990173847316704, "step": 206, "batch_size": 64, "mean": 8.996944427490234, "std": 21.932971954345703, "min": -36.30450439453125, "p10": -19.003664779663083, "median": 5.3913373947143555, "p90": 39.9510986328125, "max": 47.202728271484375, "pos_frac": 0.65625, "sample": [-14.656620025634766, 1.2097625732421875, 2.9830245971679688, -31.36223602294922, -10.948507308959961, -2.4256591796875, 5.16461181640625, 10.779117584228516, 31.35955047607422, 9.70673942565918, -3.851804733276367, 46.93983459472656, 17.762775421142578, -4.655601501464844, -10.591804504394531, 23.420623779296875, -2.4725189208984375, 45.56748962402344, -25.673828125, 5.618062973022461, 47.202728271484375, 26.44717025756836, 3.1927719116210938, 4.94891357421875, -7.5457000732421875, 39.46338653564453, 7.765083312988281, -31.88203239440918, 26.32724952697754, 30.182098388671875, 32.64501190185547, 43.41681671142578, 0.8768844604492188, 5.960788726806641, 37.8458251953125, 1.49896240234375, -36.30450439453125, -20.866683959960938, -7.24540901184082, -0.03436279296875, 22.566421508789062, 0.4530220031738281, 23.932891845703125, -9.715469360351562, -3.0856781005859375, -29.677093505859375, 42.79894256591797, 30.552734375, 4.234130859375, 7.080108642578125, 21.578414916992188, 40.160118103027344, 34.61736297607422, -13.668632507324219, 22.386276245117188, 38.92456817626953, 5.970623016357422, 0.7778587341308594, 9.03680419921875, 46.600555419921875, -32.12178039550781, -0.1780242919921875, -0.5834426879882812, 15.395668029785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000206.npy"}
|
|
{"epoch": 0.31141345427059713, "step": 207, "batch_size": 64, "mean": 13.077994346618652, "std": 19.79827308654785, "min": -29.396015167236328, "p10": -7.881428527832031, "median": 7.149354934692383, "p90": 45.013834381103514, "max": 54.32151794433594, "pos_frac": 0.75, "sample": [54.32151794433594, 2.293100357055664, -5.486719131469727, 3.1433334350585938, 11.198501586914062, 18.645780563354492, -2.0331268310546875, 5.481559753417969, 1.7313766479492188, 44.505523681640625, 10.177284240722656, 4.196441650390625, -1.2675914764404297, 45.23168182373047, -3.292062759399414, -7.943931579589844, 10.989471435546875, 29.07391357421875, 11.755460739135742, 37.95431900024414, 37.96089553833008, 9.4490966796875, 16.682846069335938, 2.319793701171875, -12.437345504760742, -7.735588073730469, -3.7832298278808594, 46.96141815185547, -12.437889099121094, 6.924858093261719, 42.048763275146484, 7.220005035400391, 52.37803649902344, -5.330192565917969, 7.078704833984375, -3.9684486389160156, -20.125017166137695, 18.29187774658203, 6.23565673828125, 2.2556819915771484, 22.335662841796875, 35.05204772949219, 4.706089019775391, 19.69324493408203, 0.917999267578125, 27.528091430664062, 27.571582794189453, -29.396015167236328, 39.255592346191406, 16.606826782226562, -21.342758178710938, 26.908323287963867, 4.633056640625, 13.268985748291016, 4.6074371337890625, 50.79204559326172, 45.905548095703125, 5.0906524658203125, 14.028884887695312, 24.10895347595215, -9.274654388427734, -1.75927734375, 4.824615478515625, 50.26298141479492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000207.npy"}
|
|
{"epoch": 0.3129251700680272, "step": 208, "batch_size": 64, "mean": 7.732011795043945, "std": 19.917207717895508, "min": -41.784969329833984, "p10": -13.511311340332032, "median": 7.496814727783203, "p90": 36.26444282531738, "max": 46.873291015625, "pos_frac": 0.609375, "sample": [8.590627670288086, 1.926259994506836, 31.64203643798828, -6.140819549560547, 38.625030517578125, 37.63722229003906, 2.192676544189453, -5.9242706298828125, -19.2672119140625, -33.55967712402344, -4.170936584472656, -13.47265625, 34.527679443359375, 25.33000946044922, 24.852855682373047, 4.242685317993164, 8.853408813476562, 15.32012939453125, -5.277246475219727, -11.179542541503906, 26.977798461914062, -6.5996856689453125, 2.1447181701660156, -13.527877807617188, 26.757272720336914, 36.394737243652344, 32.14886474609375, -12.299762725830078, 8.96124267578125, 21.040754318237305, -6.838123321533203, -4.357086181640625, -12.634063720703125, 7.6886138916015625, -28.005630493164062, 22.474777221679688, 39.45988082885742, 28.063331604003906, 13.783805847167969, 0.2612800598144531, 46.873291015625, 12.063758850097656, 6.466026306152344, -5.430728912353516, 35.96042251586914, -7.716697692871094, 41.87010192871094, -2.8649158477783203, 21.952116012573242, 7.722431182861328, -14.233375549316406, 18.84703826904297, -0.5629959106445312, 22.804397583007812, -11.926811218261719, 11.039047241210938, 12.912612915039062, -6.235528945922852, -25.758548736572266, -2.2198486328125, -41.784969329833984, 7.305015563964844, 13.10772705078125, 38.01606750488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000208.npy"}
|
|
{"epoch": 0.3144368858654573, "step": 209, "batch_size": 64, "mean": 16.331928253173828, "std": 23.811077117919922, "min": -52.72257995605469, "p10": -6.94539566040039, "median": 14.902300834655762, "p90": 45.96295318603516, "max": 60.299217224121094, "pos_frac": 0.765625, "sample": [43.6411018371582, 44.725608825683594, 5.5609893798828125, 16.081295013427734, -11.087844848632812, 52.32952880859375, 14.622259140014648, 41.67387390136719, 8.653461456298828, 50.7120361328125, -2.1731185913085938, 6.323768615722656, -26.404312133789062, 40.471900939941406, 1.560394287109375, 10.472640991210938, -2.132080078125, -2.0935287475585938, -1.7106361389160156, 56.59919738769531, 2.920654296875, -6.26934814453125, 0.5944900512695312, 34.85563659667969, 5.3021240234375, 6.8035125732421875, 24.464576721191406, 31.703994750976562, 50.01170349121094, 15.835762023925781, 27.851150512695312, 18.54998779296875, 6.253110885620117, 46.04126739501953, 45.78022003173828, -0.2333221435546875, 44.022308349609375, 25.71441650390625, -52.72257995605469, 9.565155029296875, -10.541458129882812, 2.2196292877197266, -4.05419921875, -7.235130310058594, 43.588958740234375, 16.06108283996582, 60.299217224121094, 40.51666259765625, 21.793663024902344, 36.96320724487305, 7.523162841796875, -24.898889541625977, 7.018482208251953, 22.44482421875, 15.182342529296875, 2.255483627319336, 26.473281860351562, -47.20948791503906, 14.531402587890625, 20.011472702026367, 42.963348388671875, 49.0186653137207, 27.181522369384766, -1.7352523803710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000209.npy"}
|
|
{"epoch": 0.31594860166288735, "step": 210, "batch_size": 64, "mean": 15.542803764343262, "std": 22.541624069213867, "min": -29.826961517333984, "p10": -14.48196449279785, "median": 11.038400650024414, "p90": 48.97519454956055, "max": 58.90668869018555, "pos_frac": 0.78125, "sample": [50.68000030517578, 16.93958282470703, 7.93994140625, 14.388839721679688, -1.7140789031982422, 24.75463104248047, 58.90668869018555, 27.962203979492188, 52.06694793701172, 48.25677490234375, 2.592998504638672, 50.45918273925781, 49.28308868408203, 0.9462890625, 49.6187744140625, 3.6272735595703125, -26.66646957397461, -29.826961517333984, 45.112030029296875, -17.980918884277344, 45.847198486328125, 16.038143157958984, 6.739961624145508, -2.3754730224609375, 43.939910888671875, 1.6248092651367188, -0.8780784606933594, 8.908294677734375, 43.85266876220703, -4.494926452636719, 40.59540557861328, 2.228954315185547, 14.561920166015625, 8.84759521484375, 15.253963470458984, -3.9840545654296875, 2.0692481994628906, -12.225055694580078, 0.18746566772460938, 44.160057067871094, -17.126630783081055, 24.749267578125, 14.546295166015625, 5.449302673339844, 33.91200256347656, -0.5237274169921875, 16.507333755493164, 5.882732391357422, 10.452308654785156, 4.170036315917969, 16.713394165039062, 36.56852722167969, 11.624492645263672, -22.546873092651367, 3.8839645385742188, 52.458335876464844, 30.103668212890625, 29.441883087158203, 31.405874252319336, -24.898971557617188, 2.736663818359375, -15.449211120605469, 8.95477294921875, 37.47916793823242], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000210.npy"}
|
|
{"epoch": 0.31746031746031744, "step": 211, "batch_size": 64, "mean": 14.708765029907227, "std": 21.58000373840332, "min": -33.17399215698242, "p10": -8.950099182128906, "median": 8.670373916625977, "p90": 46.802790069580084, "max": 67.20986938476562, "pos_frac": 0.765625, "sample": [0.868255615234375, -33.17399215698242, 18.5111083984375, 6.892171859741211, 0.9047012329101562, 1.0202999114990234, -7.7922515869140625, -2.2943267822265625, 14.471954345703125, 3.430112838745117, 24.5870361328125, -1.1059684753417969, 26.4515380859375, 46.14067077636719, 8.956989288330078, 23.454769134521484, 2.321199417114258, -0.8139533996582031, 42.4031982421875, -14.905269622802734, 5.01763916015625, -9.446319580078125, 26.182659149169922, -11.63482666015625, 4.341129302978516, -23.736112594604492, 1.0118560791015625, 10.397064208984375, 53.599735260009766, 24.555185317993164, 24.81637954711914, 8.383758544921875, 14.987936019897461, 3.0324859619140625, 47.370635986328125, 18.94772720336914, 12.738067626953125, 3.362548828125, -10.941959381103516, 25.418052673339844, 6.152843475341797, 67.20986938476562, 57.2607421875, 2.933805465698242, -1.5713272094726562, 35.26882553100586, 47.08655548095703, 29.793899536132812, 18.7894287109375, 53.21270751953125, -20.05572509765625, 15.144962310791016, 44.912506103515625, 42.74230194091797, 38.53089141845703, 40.36865234375, 7.268566131591797, 25.371715545654297, -5.955747604370117, 49.49208068847656, -0.108856201171875, -6.948249816894531, 3.1171875, 2.6113967895507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000211.npy"}
|
|
{"epoch": 0.31897203325774753, "step": 212, "batch_size": 64, "mean": 16.295551300048828, "std": 22.912029266357422, "min": -54.17311096191406, "p10": -7.41371726989746, "median": 17.28154754638672, "p90": 48.87076416015625, "max": 57.874507904052734, "pos_frac": 0.796875, "sample": [48.86791229248047, 29.706859588623047, 31.580108642578125, 47.36282730102539, 0.6345710754394531, 4.13470458984375, 57.874507904052734, 25.21333122253418, -5.306129455566406, 39.2762451171875, -4.330013275146484, -54.17311096191406, 40.334983825683594, 52.501373291015625, 7.280181884765625, 39.70893859863281, 35.333457946777344, 25.767518997192383, -19.473251342773438, 26.155637741088867, 15.083305358886719, 1.3677616119384766, 0.8438606262207031, 19.73572540283203, -35.12318420410156, 1.390533447265625, 17.202117919921875, -0.18445777893066406, 48.871986389160156, 50.171295166015625, 26.68375015258789, 28.143070220947266, 28.381080627441406, 17.408538818359375, 17.360977172851562, 7.606163024902344, -6.656276702880859, 25.85865020751953, 0.8539791107177734, 14.38228988647461, 0.28469085693359375, 7.671531677246094, 0.4339714050292969, -3.6753997802734375, 19.52567481994629, 34.64331817626953, 20.123685836791992, 54.06769561767578, -13.821735382080078, -10.254547119140625, -1.6189384460449219, 1.647613525390625, 29.89214324951172, -7.738334655761719, 26.102020263671875, 49.53020477294922, 42.92864227294922, 51.759437561035156, 34.7939453125, 0.5981369018554688, 6.8448944091796875, 10.125518798828125, -19.031692504882812, 0.25096893310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000212.npy"}
|
|
{"epoch": 0.3204837490551776, "step": 213, "batch_size": 64, "mean": 18.605833053588867, "std": 24.479598999023438, "min": -55.46854019165039, "p10": -6.582703971862792, "median": 13.318428993225098, "p90": 51.77110900878907, "max": 75.93011474609375, "pos_frac": 0.796875, "sample": [39.10592269897461, 10.9619140625, 7.104103088378906, 44.3907470703125, 22.912696838378906, 8.846189498901367, -12.417724609375, -6.369266510009766, -1.426300048828125, 37.51463317871094, 52.21690368652344, 1.9827003479003906, 10.747161865234375, -6.674177169799805, -55.46854019165039, -12.136337280273438, 32.615699768066406, 75.93011474609375, -2.3285140991210938, 26.416122436523438, 41.010982513427734, -29.992603302001953, 48.52410125732422, 61.30017852783203, 38.79435729980469, 7.760263442993164, -3.703611373901367, -22.67306137084961, 4.607648849487305, 12.294626235961914, -8.10702133178711, 14.572998046875, 22.263378143310547, 12.28885269165039, 24.706802368164062, 26.09710693359375, 3.7330188751220703, 37.29374694824219, 47.791015625, 8.097457885742188, 8.724380493164062, 14.342231750488281, 51.16629409790039, 1.7780342102050781, 40.902381896972656, 16.283416748046875, 1.857818603515625, 36.043697357177734, 53.62312316894531, 4.117820739746094, -0.894683837890625, 33.45069122314453, -4.14520263671875, 1.7634429931640625, 5.267219543457031, 22.361595153808594, 2.7401123046875, 54.77325439453125, 16.949012756347656, 9.86328125, 48.712738037109375, 52.955718994140625, 45.522377014160156, 52.03031539916992], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000213.npy"}
|
|
{"epoch": 0.3219954648526077, "step": 214, "batch_size": 64, "mean": 18.587932586669922, "std": 26.090524673461914, "min": -38.55381774902344, "p10": -14.574981689453121, "median": 19.058466911315918, "p90": 52.770352935791024, "max": 69.79420471191406, "pos_frac": 0.6875, "sample": [30.3156681060791, 28.42852020263672, -1.920166015625, 12.058292388916016, 13.402603149414062, 41.36597442626953, 3.3270950317382812, 9.990316390991211, 35.54039764404297, 48.94322967529297, 51.08042907714844, 40.48578643798828, 21.43507957458496, 0.8731536865234375, 36.90385437011719, 47.162109375, 35.20983123779297, -38.55381774902344, 25.643211364746094, 34.61818313598633, -1.1511154174804688, 27.275188446044922, 16.681854248046875, -16.11773681640625, -0.6781902313232422, 50.17912292480469, -21.8677978515625, 34.35694885253906, 56.36335372924805, 43.16009521484375, 22.082977294921875, -9.218368530273438, -6.435647964477539, 5.1927642822265625, 27.437034606933594, 44.63636016845703, -21.224273681640625, -6.0473480224609375, 53.494606018066406, 69.79420471191406, -3.5680923461914062, -18.317588806152344, 46.497703552246094, 36.54792022705078, 58.3779411315918, 61.933013916015625, 11.745746612548828, -0.840606689453125, 11.103431701660156, -8.969001770019531, -10.9752197265625, 59.324195861816406, 7.69972038269043, -3.4721851348876953, 5.11079216003418, -0.6598339080810547, 55.59832763671875, 14.309394836425781, -37.49226379394531, 35.137054443359375, 25.99147605895996, 27.97292709350586, -3.513416290283203, -24.137537002563477], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000214.npy"}
|
|
{"epoch": 0.3235071806500378, "step": 215, "batch_size": 64, "mean": 18.799654006958008, "std": 27.288509368896484, "min": -40.58098602294922, "p10": -17.655781936645504, "median": 12.47576904296875, "p90": 51.560383605957036, "max": 67.31103515625, "pos_frac": 0.703125, "sample": [18.524402618408203, -27.306991577148438, -40.58098602294922, 26.967727661132812, 12.765289306640625, -27.314537048339844, 8.777938842773438, -4.685310363769531, 47.49871063232422, 51.07899856567383, 8.8740234375, -3.0900535583496094, -21.86248016357422, 16.51879119873047, 22.264923095703125, 57.0042724609375, 3.5758743286132812, 46.73536682128906, 8.325042724609375, -1.5010528564453125, 10.949234008789062, 8.011451721191406, -2.032970428466797, 35.14361572265625, 47.15570068359375, 0.9678630828857422, -1.9917984008789062, 46.609649658203125, 46.48180389404297, -11.71162223815918, -7.490901947021484, 49.098907470703125, 5.194744110107422, -13.652969360351562, -6.118499755859375, -25.288055419921875, 37.96623229980469, 33.55475616455078, 40.018463134765625, 58.89312744140625, 6.502418518066406, 13.172073364257812, -3.803985595703125, -2.0081100463867188, 44.440032958984375, 50.54005432128906, 58.79271697998047, 59.18207931518555, 43.04846954345703, 67.31103515625, 28.64151382446289, 51.203163146972656, 44.92554473876953, -22.59619140625, 12.186248779296875, 2.260517120361328, 10.23291015625, 60.59266662597656, 43.080780029296875, -0.6702175140380859, 51.713478088378906, 4.300540924072266, 45.172691345214844, -19.371273040771484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000215.npy"}
|
|
{"epoch": 0.3250188964474679, "step": 216, "batch_size": 64, "mean": 20.00769805908203, "std": 24.869125366210938, "min": -52.045127868652344, "p10": -5.604507446289062, "median": 19.306148529052734, "p90": 52.19568252563477, "max": 87.40036010742188, "pos_frac": 0.765625, "sample": [3.1098556518554688, 52.616790771484375, 32.16131591796875, 41.26725769042969, 30.370664596557617, 9.318584442138672, 19.25678253173828, 7.178825378417969, 38.36927032470703, 51.078636169433594, 41.30281066894531, 26.904891967773438, 4.686912536621094, 14.106460571289062, 19.491981506347656, -4.0770416259765625, 9.40042495727539, 16.005538940429688, -5.866119384765625, 61.67799377441406, 30.824172973632812, 55.65922546386719, 25.316627502441406, 37.84351348876953, 27.090843200683594, 12.734375, 57.606727600097656, 2.612232208251953, 3.827890396118164, 59.54341125488281, 43.972450256347656, -52.045127868652344, 14.058563232421875, 20.574634552001953, 19.355514526367188, 23.644378662109375, -25.464981079101562, 1.2783164978027344, 24.507938385009766, -3.6768054962158203, -0.48638153076171875, 87.40036010742188, 51.213096618652344, -12.127304077148438, 12.592689514160156, -17.377235412597656, 18.810409545898438, 1.680694580078125, -6.222908020019531, 30.552749633789062, 63.45463180541992, -20.242015838623047, -4.99407958984375, -0.6036796569824219, 48.336021423339844, -0.2639636993408203, 18.879716873168945, 37.25902557373047, -4.572029113769531, 46.1986083984375, -1.0441761016845703, 42.408592224121094, 20.283470153808594, 21.730560302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000216.npy"}
|
|
{"epoch": 0.32653061224489793, "step": 217, "batch_size": 64, "mean": 21.108844757080078, "std": 26.46893882751465, "min": -39.65631103515625, "p10": -7.000574493408203, "median": 15.068052291870117, "p90": 56.8022071838379, "max": 75.40046691894531, "pos_frac": 0.765625, "sample": [37.66949462890625, 57.73065948486328, -8.061012268066406, -3.100799560546875, -0.8938255310058594, -6.398937225341797, 3.0372142791748047, 75.40046691894531, -3.46533203125, 2.90118408203125, 65.91511535644531, 68.09524536132812, 6.329032897949219, -7.258419036865234, 13.747062683105469, 6.9691925048828125, 52.299163818359375, 33.750606536865234, 7.6375274658203125, 53.766151428222656, 13.572423934936523, 34.762672424316406, 6.6634063720703125, 43.82526397705078, 9.359817504882812, 49.821353912353516, -39.65631103515625, -0.8670806884765625, 3.1190757751464844, 16.389041900634766, 53.22499465942383, 29.727088928222656, 18.735641479492188, -11.624717712402344, 34.94309997558594, -17.399822235107422, 5.615348815917969, 35.02606964111328, 0.7797584533691406, -23.407615661621094, -34.612945556640625, 54.258880615234375, 29.306373596191406, -2.6031455993652344, -0.36339569091796875, 5.629417419433594, 10.700347900390625, 12.300247192382812, 27.193138122558594, 70.70082092285156, 26.079944610595703, 46.640296936035156, 11.425172805786133, 1.8808326721191406, 60.64836883544922, 31.534713745117188, 58.07331085205078, 30.889686584472656, 28.334518432617188, 51.301025390625, 35.955406188964844, 54.63581848144531, 24.02843475341797, -1.6505126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000217.npy"}
|
|
{"epoch": 0.328042328042328, "step": 218, "batch_size": 64, "mean": 13.540608406066895, "std": 30.26529312133789, "min": -69.99144744873047, "p10": -20.55666408538818, "median": 12.16080093383789, "p90": 53.92923355102539, "max": 74.89457702636719, "pos_frac": 0.703125, "sample": [5.984893798828125, 0.8219013214111328, 18.516281127929688, 32.02381134033203, 0.032379150390625, -9.297836303710938, 13.729888916015625, -1.967315673828125, 0.300933837890625, 54.07115173339844, 11.885726928710938, 13.285282135009766, 54.48876190185547, -49.8790283203125, -19.988494873046875, -38.95848083496094, 7.4141845703125, 57.91393280029297, -36.919708251953125, -2.4691429138183594, 34.4305419921875, 19.337753295898438, 4.4242401123046875, 19.461532592773438, 35.09785079956055, 74.89457702636719, 53.59809112548828, 69.03903198242188, 23.462827682495117, 29.4208984375, -2.8225555419921875, 40.494171142578125, 27.95147705078125, -12.206184387207031, -12.57321548461914, -18.328964233398438, 2.9361495971679688, 21.96979331970215, 0.166534423828125, 44.427032470703125, 48.38727569580078, 27.26025390625, -3.2484264373779297, 9.687065124511719, 13.363388061523438, -20.8001651763916, 37.822532653808594, -0.31548500061035156, -31.00183868408203, -13.450370788574219, 45.60725402832031, -42.92677307128906, 22.51409149169922, 0.4195823669433594, 12.435874938964844, 45.34063720703125, 5.009410858154297, 63.82805633544922, -69.99144744873047, -4.923095703125, 26.54174041748047, 11.473007202148438, 50.74107360839844, 66.65453338623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000218.npy"}
|
|
{"epoch": 0.3295540438397581, "step": 219, "batch_size": 64, "mean": 14.142940521240234, "std": 24.763917922973633, "min": -39.686302185058594, "p10": -17.673844909667967, "median": 9.253498077392578, "p90": 51.952079772949226, "max": 61.89006042480469, "pos_frac": 0.703125, "sample": [7.5040283203125, 57.81621170043945, 39.25153350830078, 6.739046096801758, 20.893638610839844, -1.8104705810546875, 2.7873878479003906, 1.9246253967285156, 13.096611022949219, -18.76317596435547, -4.052648544311523, 60.69483184814453, 30.154998779296875, -23.07567024230957, -23.74359893798828, 21.626434326171875, 25.254493713378906, 3.038726806640625, 21.653728485107422, 2.8445587158203125, 1.8329486846923828, -8.963088989257812, -15.132072448730469, 18.064403533935547, -26.010757446289062, -0.07985687255859375, 5.410453796386719, -1.4988441467285156, 20.227935791015625, 8.454292297363281, 44.407936096191406, 52.98216247558594, 53.59577178955078, 57.20478820800781, 40.54331970214844, 49.548553466796875, -7.750755310058594, 1.803110122680664, -3.7019729614257812, 24.98678970336914, 10.502159118652344, 18.0653076171875, -39.686302185058594, 20.53595733642578, -2.6505661010742188, -1.62176513671875, 35.518707275390625, 10.052703857421875, 24.15097427368164, 61.89006042480469, 44.24504089355469, 42.627891540527344, 12.46066665649414, -3.5846786499023438, -37.3189811706543, 6.055881500244141, 6.928001403808594, 8.286529541015625, 40.83946228027344, -0.15024185180664062, 53.55883026123047, -27.119140625, 44.13309860229492, 17.668182373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000219.npy"}
|
|
{"epoch": 0.3310657596371882, "step": 220, "batch_size": 64, "mean": 16.774028778076172, "std": 29.86203384399414, "min": -51.920162200927734, "p10": -14.494635772705077, "median": 11.503183364868164, "p90": 57.19535522460938, "max": 79.712158203125, "pos_frac": 0.671875, "sample": [3.6590633392333984, -18.111770629882812, 28.559959411621094, 10.079475402832031, 1.160623550415039, 21.085540771484375, 73.07269287109375, 13.865951538085938, 4.361278533935547, 7.043731689453125, -6.6116485595703125, 68.24691772460938, 4.48090934753418, -40.3889274597168, -13.262046813964844, -6.624900817871094, 57.01763153076172, 1.01373291015625, -2.903980255126953, 0.7140617370605469, 43.43280792236328, 15.294677734375, -2.6386585235595703, -41.35267639160156, -35.791526794433594, -15.715066909790039, -6.8328857421875, -15.02288818359375, -11.1478271484375, 39.096031188964844, 44.115264892578125, 32.72919464111328, 27.667831420898438, 50.869537353515625, 58.1370849609375, 4.139659881591797, 49.71759033203125, 39.006439208984375, 40.16600799560547, -4.250541687011719, 5.568450927734375, 29.51822280883789, -7.957054138183594, 30.44326400756836, 79.712158203125, -2.298858642578125, 63.053855895996094, -0.4568901062011719, 16.85991859436035, 57.271522521972656, 47.92993927001953, -5.820518493652344, 48.97967529296875, -0.7610092163085938, 18.704856872558594, 14.218711853027344, -51.920162200927734, 12.926891326904297, 46.291893005371094, 0.6690044403076172, 74.3760986328125, -5.646396636962891, 49.03789138793945, 34.758018493652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000220.npy"}
|
|
{"epoch": 0.3325774754346183, "step": 221, "batch_size": 64, "mean": 18.386539459228516, "std": 29.383577346801758, "min": -47.50901794433594, "p10": -13.791241836547847, "median": 11.384347915649414, "p90": 60.91162338256838, "max": 79.395751953125, "pos_frac": 0.765625, "sample": [7.411291122436523, -15.546855926513672, 25.66376495361328, 27.87676239013672, -0.8365077972412109, 55.76390075683594, 4.0416412353515625, 6.8582916259765625, 31.910614013671875, -32.758140563964844, -8.99344253540039, 11.513168334960938, 14.929576873779297, 15.8114013671875, -22.517486572265625, 10.737768173217773, 3.369964599609375, -1.120269775390625, 30.736692428588867, 30.91326904296875, 48.65057373046875, 51.88615417480469, 11.791767120361328, 49.999603271484375, 79.01904296875, 3.38287353515625, 46.21088409423828, 73.59274291992188, -5.2508392333984375, 3.6270217895507812, 30.203018188476562, 63.11779022216797, 2.0330810546875, 24.596298217773438, 74.8431396484375, 11.25552749633789, 40.2049560546875, -20.37909698486328, 31.3426513671875, 33.929351806640625, 39.193565368652344, -9.694808959960938, 3.4278030395507812, -1.4873886108398438, 0.5076961517333984, -0.43976593017578125, 0.2239551544189453, 68.80320739746094, 1.92730712890625, 8.749393463134766, -37.85716247558594, -47.50901794433594, 2.231342315673828, 79.395751953125, 45.545379638671875, -34.82160949707031, 9.253448486328125, 22.770851135253906, 72.14436340332031, 44.55120849609375, -0.35074615478515625, 1.0368194580078125, 44.34379577636719, 14.971214294433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000221.npy"}
|
|
{"epoch": 0.3340891912320484, "step": 222, "batch_size": 64, "mean": 24.98198699951172, "std": 31.070524215698242, "min": -46.16423034667969, "p10": -8.685395431518554, "median": 27.609929084777832, "p90": 61.568582534790046, "max": 87.4515380859375, "pos_frac": 0.78125, "sample": [-1.7576160430908203, -30.43773651123047, 29.514446258544922, 10.214813232421875, 54.521366119384766, 51.90290832519531, 29.127300262451172, -8.763408660888672, 57.27433776855469, 67.0149917602539, 27.546783447265625, 4.760736465454102, -40.605018615722656, 3.0860137939453125, 54.611167907714844, 4.700736999511719, 10.476066589355469, 8.00076675415039, 10.077384948730469, 46.44277572631836, 38.730010986328125, -0.4864006042480469, 51.99674987792969, 58.062156677246094, -4.378822326660156, 64.4347915649414, -5.686256408691406, 82.51797485351562, -11.307374954223633, 46.361351013183594, 27.67307472229004, 9.82635498046875, 62.269866943359375, 56.11770248413086, 14.398452758789062, 33.91050720214844, 41.34416198730469, -8.503364562988281, -6.038259506225586, -4.2556304931640625, -46.16423034667969, 26.151161193847656, 31.940574645996094, 12.729927062988281, 2.089214324951172, 87.4515380859375, 11.776588439941406, -42.58695983886719, 1.4133987426757812, 49.92168426513672, 53.87566375732422, 0.4090080261230469, 50.796173095703125, 71.60784149169922, 39.919036865234375, 9.079940795898438, 62.08005905151367, 56.112945556640625, 9.705841064453125, 60.06770324707031, 40.6954345703125, 31.20185089111328, 60.37513732910156, -16.4981689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000222.npy"}
|
|
{"epoch": 0.3356009070294785, "step": 223, "batch_size": 64, "mean": 21.921615600585938, "std": 28.25886344909668, "min": -28.95034408569336, "p10": -10.835313415527342, "median": 20.06884765625, "p90": 58.542368698120136, "max": 82.59382629394531, "pos_frac": 0.71875, "sample": [42.628456115722656, 5.7988128662109375, 49.545860290527344, 7.2662353515625, -3.9202346801757812, 39.890628814697266, -0.4995574951171875, 35.19291687011719, 7.9197540283203125, 34.91407012939453, 2.925107955932617, 71.42564392089844, 43.434104919433594, 49.79851531982422, 5.853435516357422, -17.428470611572266, -0.5641021728515625, 42.600196838378906, 0.19500732421875, -5.47674560546875, 54.042911529541016, 24.40593719482422, -16.69314956665039, 41.72996520996094, -11.807846069335938, 4.316658020019531, -4.14689826965332, 41.99354553222656, -4.147701263427734, -18.619064331054688, 43.86778259277344, 10.449016571044922, -23.087127685546875, 4.283565521240234, -3.2457122802734375, 71.47511291503906, 53.41870880126953, 31.568256378173828, -8.090682983398438, -28.95034408569336, -8.566070556640625, -12.638914108276367, 30.901702880859375, 41.19288635253906, 67.78567504882812, 8.519329071044922, 20.68609619140625, 44.52934265136719, 25.803115844726562, 60.1756706237793, 80.3995590209961, -6.754020690917969, 82.59382629394531, 63.84404754638672, 49.27632141113281, 31.50079345703125, 48.1875, 54.73133087158203, 25.12493133544922, -2.8275146484375, 0.9859161376953125, 19.45159912109375, 1.5759849548339844, 2.2417526245117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000223.npy"}
|
|
{"epoch": 0.3371126228269085, "step": 224, "batch_size": 64, "mean": 12.311203956604004, "std": 29.42028045654297, "min": -49.78230285644531, "p10": -27.232344818115234, "median": 5.843591690063477, "p90": 55.20755348205567, "max": 80.15573120117188, "pos_frac": 0.671875, "sample": [16.58626937866211, 80.15573120117188, 18.764789581298828, 21.287673950195312, 8.856414794921875, 62.64390182495117, 32.33583068847656, 40.18772888183594, -1.4802818298339844, 3.5756378173828125, 57.50034713745117, 5.7027587890625, 45.364341735839844, 53.827022552490234, -31.414348602294922, -1.0986671447753906, 7.128194808959961, 17.573833465576172, 16.928443908691406, 0.5470123291015625, 5.545936584472656, 27.217010498046875, -1.8028106689453125, -49.78230285644531, -9.375801086425781, 0.032497406005859375, -10.385147094726562, -17.9945068359375, 23.610076904296875, 2.7074737548828125, 45.72667694091797, -46.00938034057617, -31.108362197875977, -16.3009033203125, -3.406890869140625, 0.1890239715576172, -1.4478187561035156, -44.976097106933594, 5.878170013427734, 21.710708618164062, -28.10327911376953, 32.449790954589844, 46.870643615722656, 8.503646850585938, -27.424942016601562, 5.809013366699219, 15.842132568359375, 42.1732292175293, -26.78295135498047, -7.224586486816406, -1.756988525390625, 25.6704158782959, 49.55792236328125, 55.79920959472656, 76.64703369140625, 26.45539093017578, 4.482307434082031, 6.89276123046875, -0.7389812469482422, 63.70188522338867, -4.5341339111328125, 63.21952819824219, 3.4215087890625, 1.9863319396972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000224.npy"}
|
|
{"epoch": 0.3386243386243386, "step": 225, "batch_size": 64, "mean": 23.437641143798828, "std": 30.97134017944336, "min": -37.390045166015625, "p10": -4.0576225280761715, "median": 18.259641647338867, "p90": 70.18205108642579, "max": 86.06570434570312, "pos_frac": 0.78125, "sample": [1.0402584075927734, 30.653255462646484, 7.0764007568359375, -0.23736572265625, -3.783447265625, 37.54254150390625, -3.7619590759277344, 0.3230705261230469, 38.648353576660156, 10.714508056640625, 8.109321594238281, 61.364898681640625, 9.629364013671875, 72.33256530761719, -19.64535140991211, 2.468189239501953, 22.172134399414062, 6.7332305908203125, 52.67112350463867, 68.87684631347656, 70.727783203125, 24.928770065307617, 80.32930755615234, 37.14653015136719, 86.06570434570312, 23.047595977783203, 0.37103271484375, 52.28648376464844, 68.90867614746094, -3.6805343627929688, 0.5249519348144531, -28.975738525390625, 40.090126037597656, -9.915699005126953, 78.38079071044922, 33.03172302246094, 1.2527828216552734, 20.141204833984375, 46.74810791015625, 17.298847198486328, 2.7918624877929688, 7.3973846435546875, 67.26927947998047, -2.600595474243164, 33.282936096191406, -35.32476806640625, -23.817047119140625, -4.114799499511719, 21.17670440673828, 14.754104614257812, 50.62727355957031, 0.9954814910888672, 5.5944671630859375, 77.67501831054688, 58.269474029541016, -0.9394721984863281, 7.0415802001953125, 19.220436096191406, 73.74118041992188, 37.516845703125, -37.390045166015625, 29.263748168945312, 59.86585235595703, -3.9242095947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000225.npy"}
|
|
{"epoch": 0.3401360544217687, "step": 226, "batch_size": 64, "mean": 18.736942291259766, "std": 35.193546295166016, "min": -63.594879150390625, "p10": -25.255218505859368, "median": 15.178056716918945, "p90": 64.64050369262695, "max": 78.40231323242188, "pos_frac": 0.71875, "sample": [6.611351013183594, -28.237022399902344, 34.395172119140625, 27.03687858581543, 17.358901977539062, 25.773712158203125, 32.22386169433594, 2.4164466857910156, 41.784324645996094, -15.37563705444336, 54.81596374511719, 12.847475051879883, 49.19731903076172, 59.002811431884766, -44.55657196044922, -1.0073699951171875, 3.593709945678711, -63.594879150390625, 61.328067779541016, 32.50068664550781, 69.56440734863281, -4.9722442626953125, 56.79656982421875, 3.7978973388671875, 35.13323211669922, 0.13048744201660156, 8.647026062011719, 64.69912719726562, 56.28614044189453, 38.297142028808594, -9.148340225219727, 66.37837219238281, 15.434513092041016, 22.058273315429688, 70.0049057006836, 8.400375366210938, 38.40613555908203, -54.55018615722656, -2.9763126373291016, 2.6490135192871094, 0.11297607421875, 9.735336303710938, -18.29767608642578, -52.59368896484375, 71.39639282226562, 78.40231323242188, -40.832847595214844, 14.921600341796875, -47.65333557128906, 35.781639099121094, 13.792224884033203, 3.584016799926758, 19.554134368896484, 16.370542526245117, -2.7401275634765625, 74.49505615234375, 59.698028564453125, 64.50371551513672, -10.125381469726562, -3.668336868286133, 63.74183654785156, -1.061483383178711, -5.880863189697266, 62.776512145996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000226.npy"}
|
|
{"epoch": 0.3416477702191988, "step": 227, "batch_size": 64, "mean": 19.684673309326172, "std": 36.05058288574219, "min": -62.54161071777344, "p10": -25.285150146484373, "median": 9.483898162841797, "p90": 71.44070281982422, "max": 79.82060241699219, "pos_frac": 0.71875, "sample": [47.536102294921875, 13.35361099243164, 1.1621170043945312, 66.37648010253906, 1.618927001953125, 0.2645416259765625, 50.404571533203125, 74.9871826171875, -62.54161071777344, -2.7891292572021484, 79.82060241699219, -28.71466827392578, -2.552042007446289, 73.2757339477539, 15.649162292480469, 10.173484802246094, 3.4603614807128906, -1.7519111633300781, 66.12821960449219, -11.302825927734375, 27.883865356445312, 66.08780670166016, 3.2893028259277344, 5.370893478393555, 79.5472183227539, 31.208641052246094, -1.5840110778808594, 49.07969665527344, -3.8053760528564453, -2.76220703125, 42.924163818359375, 78.541748046875, 34.12236022949219, 10.346466064453125, 5.65545654296875, 49.166778564453125, 79.71710205078125, 3.1259384155273438, 24.02281951904297, 71.6324691772461, 5.688041687011719, 17.215118408203125, -28.889678955078125, 19.37994384765625, 57.54936218261719, 2.577606201171875, 4.5240478515625, 3.5639400482177734, -22.118637084960938, -0.8945960998535156, -48.58103942871094, 43.35395050048828, 21.433761596679688, 49.14958190917969, 2.995351791381836, -45.49755859375, 8.7943115234375, -0.863311767578125, 70.99324798583984, -4.2191314697265625, -55.086952209472656, -26.642227172851562, 66.463623046875, 70.80025482177734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000227.npy"}
|
|
{"epoch": 0.3431594860166289, "step": 228, "batch_size": 64, "mean": 21.450952529907227, "std": 32.23533630371094, "min": -52.63847732543945, "p10": -15.537391662597653, "median": 18.531646728515625, "p90": 69.020361328125, "max": 86.49937438964844, "pos_frac": 0.75, "sample": [0.1268939971923828, 56.98381805419922, 3.84649658203125, 72.41825866699219, 17.9835205078125, 31.201309204101562, 27.443511962890625, 76.17679595947266, 8.9508056640625, -37.97034454345703, 33.341552734375, 5.2462158203125, 9.708770751953125, -52.63847732543945, 17.793777465820312, 2.761859893798828, 62.3787956237793, -17.989288330078125, 9.039466857910156, 24.94445037841797, 1.0036983489990234, 69.15838623046875, -45.87030029296875, 15.931266784667969, -9.284500122070312, 60.482234954833984, 48.520606994628906, 86.49937438964844, 7.283041000366211, 48.23725128173828, -10.035266876220703, -11.648544311523438, 58.63557815551758, 3.6070175170898438, 39.985572814941406, -3.2581424713134766, 41.50529861450195, -1.8929023742675781, -17.20404052734375, -7.347402572631836, 19.07977294921875, 43.21501922607422, 29.64190673828125, -33.359764099121094, 23.958648681640625, 2.7460670471191406, 74.75565338134766, 80.59153747558594, -22.616424560546875, 14.408309936523438, -2.9509124755859375, 15.234416961669922, 33.6331787109375, 30.91844940185547, -4.603263854980469, 22.138160705566406, 68.69830322265625, 71.36119842529297, 62.84764862060547, 35.530662536621094, -5.864128112792969, 23.847206115722656, 43.04545593261719, 20.547462463378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000228.npy"}
|
|
{"epoch": 0.34467120181405897, "step": 229, "batch_size": 64, "mean": 19.097354888916016, "std": 37.25196075439453, "min": -73.18119812011719, "p10": -13.40556640625, "median": 12.837600708007812, "p90": 65.70505294799804, "max": 87.6446533203125, "pos_frac": 0.703125, "sample": [41.82582092285156, 65.5256576538086, 40.520294189453125, 31.40032196044922, 15.462570190429688, 24.483352661132812, 45.732688903808594, 27.627286911010742, 21.900863647460938, -37.946929931640625, 60.415374755859375, 37.078948974609375, 67.1550521850586, -4.450218200683594, 28.03338623046875, 5.818769454956055, -66.66986083984375, -6.861843109130859, 65.26639556884766, 64.05776977539062, 10.212631225585938, 1.6730594635009766, 21.872352600097656, 30.10753631591797, -45.40925598144531, 5.743038177490234, -13.735664367675781, -0.8949623107910156, 4.26336669921875, -12.635337829589844, -2.08038330078125, 85.27755737304688, -73.18119812011719, 61.53358459472656, -11.866622924804688, 3.999917984008789, 60.301513671875, -48.73159408569336, 3.4318771362304688, 25.95184326171875, 64.74115753173828, 33.02627182006836, -0.09753608703613281, -2.484201431274414, 52.90028762817383, -0.0523681640625, 74.0287857055664, 23.384151458740234, 67.79854583740234, 6.931573867797852, 1.6394119262695312, -0.8946380615234375, 38.05614471435547, 0.0656585693359375, 79.05661010742188, 2.8270111083984375, 0.11174774169921875, -1.1381969451904297, 65.78193664550781, 87.6446533203125, 63.49622344970703, -0.12518310546875, 3.2870941162109375, -69.96331787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000229.npy"}
|
|
{"epoch": 0.34618291761148906, "step": 230, "batch_size": 64, "mean": 22.495281219482422, "std": 33.36581039428711, "min": -76.09282684326172, "p10": -2.6623029708862287, "median": 11.402950286865234, "p90": 70.41036300659181, "max": 107.74417114257812, "pos_frac": 0.84375, "sample": [-3.460926055908203, 12.008087158203125, 61.54438781738281, 1.930419921875, 16.59001922607422, -41.834716796875, 2.6455631256103516, 1.9558143615722656, 10.797813415527344, 76.73809051513672, 19.40362548828125, 76.20882415771484, 71.76464080810547, 61.2998046875, 23.215133666992188, 35.293968200683594, 29.768287658691406, 66.88475799560547, -27.307640075683594, 4.371002197265625, -76.09282684326172, 2.51129150390625, 12.103118896484375, 41.93368148803711, 37.524635314941406, -0.6648788452148438, 72.98357391357422, 10.348594665527344, 67.25038146972656, 56.301361083984375, 38.684940338134766, 4.336505889892578, -0.7988491058349609, 2.0231685638427734, 5.0383758544921875, 17.22142791748047, 6.618766784667969, 8.741554260253906, 107.74417114257812, 14.767776489257812, 8.529098510742188, 5.003211975097656, 3.663135528564453, 31.58879852294922, 92.89107513427734, 45.03003692626953, -23.304258346557617, 76.03116607666016, 7.995513916015625, 0.36605072021484375, 56.89967346191406, -23.42212677001953, 65.0226821899414, 43.31798553466797, 24.604103088378906, 7.251502990722656, 2.710704803466797, -0.0048465728759765625, 13.517702102661133, 3.002685546875, 8.207460403442383, 59.596099853515625, -5.546104431152344, 8.352924346923828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000230.npy"}
|
|
{"epoch": 0.3476946334089191, "step": 231, "batch_size": 64, "mean": 34.58314514160156, "std": 36.20094680786133, "min": -75.29693603515625, "p10": -4.9154216766357415, "median": 40.60606384277344, "p90": 79.34579849243164, "max": 91.109619140625, "pos_frac": 0.828125, "sample": [67.68043518066406, 16.925979614257812, 55.25014877319336, 79.80377960205078, 2.44171142578125, 28.562294006347656, 5.857509613037109, 76.58212280273438, 15.113075256347656, 9.086061477661133, 53.631385803222656, 3.6061553955078125, 49.018585205078125, -5.167934417724609, -5.5242156982421875, 52.34644317626953, 16.33544921875, 76.10102081298828, 20.694931030273438, -18.13874053955078, 48.778533935546875, -4.326225280761719, 53.61695861816406, 58.94601821899414, 4.802894592285156, 86.49336242675781, -4.20343017578125, -32.598812103271484, 82.48184204101562, 45.62807846069336, 75.80965423583984, -57.90859603881836, 41.934051513671875, 76.41792297363281, 66.54005432128906, 13.318695068359375, 4.179914474487305, 36.773250579833984, 1.710174560546875, -2.8922805786132812, 91.109619140625, 5.9677886962890625, 38.941246032714844, 35.41566467285156, 42.69220733642578, 23.133535385131836, 40.56456756591797, -13.544925689697266, 18.253841400146484, -2.696086883544922, 68.25480651855469, 83.77871704101562, 40.647560119628906, 57.600975036621094, 65.1021728515625, 80.68859100341797, 42.21697998046875, 62.6649169921875, 46.171722412109375, 66.29267883300781, -75.29693603515625, 39.961143493652344, 78.27717590332031, 81.41522216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000231.npy"}
|
|
{"epoch": 0.3492063492063492, "step": 232, "batch_size": 64, "mean": 23.126924514770508, "std": 41.42884063720703, "min": -96.2864990234375, "p10": -18.46896820068359, "median": 12.087669372558594, "p90": 81.68711166381837, "max": 119.864501953125, "pos_frac": 0.734375, "sample": [1.9565200805664062, 75.55207824707031, 56.14527893066406, 85.09477996826172, 6.679704666137695, 63.84014892578125, 34.337791442871094, 77.47199249267578, -1.1471939086914062, -22.6671142578125, -22.333812713623047, 6.326202392578125, 2.6364822387695312, 19.586223602294922, 27.647350311279297, 74.84310913085938, -0.737823486328125, 3.0452423095703125, 49.13432312011719, 5.370105743408203, -0.008880615234375, 45.083946228027344, 52.00421142578125, 36.7198486328125, 7.8381500244140625, 45.53655242919922, -7.706399917602539, 83.87504577636719, -12.279346466064453, 13.325599670410156, 38.33903503417969, 77.37184143066406, -14.959606170654297, -1.5008049011230469, 24.81634521484375, 3.13543701171875, 0.7497310638427734, -57.26099395751953, 88.43278503417969, 24.259597778320312, 0.797607421875, 84.84814453125, -15.437797546386719, 111.31387329101562, 83.49359130859375, 119.864501953125, -17.219764709472656, -19.00434112548828, -8.068122863769531, 11.434419631958008, 25.318763732910156, 72.58721160888672, 51.387786865234375, 11.825241088867188, 4.272480010986328, -51.559913635253906, 0.7231388092041016, 74.29844665527344, 10.369468688964844, 32.07540512084961, -29.913490295410156, -96.2864990234375, 12.35009765625, 20.099393844604492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000232.npy"}
|
|
{"epoch": 0.3507180650037793, "step": 233, "batch_size": 64, "mean": 17.855308532714844, "std": 35.79460525512695, "min": -61.10038757324219, "p10": -20.80551643371582, "median": 7.754217147827148, "p90": 66.33196029663087, "max": 93.90162658691406, "pos_frac": 0.671875, "sample": [89.3265151977539, 8.501747131347656, 66.54088592529297, -33.56386947631836, 8.231914520263672, 31.135345458984375, 29.399276733398438, 17.37563705444336, -6.400547027587891, -9.235649108886719, 11.386558532714844, 93.90162658691406, -3.9299888610839844, 0.10095977783203125, -3.3569564819335938, 19.496002197265625, 62.0944938659668, 5.618459701538086, -2.3649368286132812, 2.4695606231689453, 76.63911437988281, 4.7180938720703125, 59.80840301513672, 71.14247131347656, -20.500198364257812, 18.304595947265625, -11.03558349609375, 36.14104461669922, -39.635498046875, -32.924835205078125, 61.717140197753906, 2.9470596313476562, 64.77890014648438, 32.95326614379883, 57.19206237792969, -61.10038757324219, 13.960250854492188, 2.357646942138672, -3.743011474609375, 1.1189422607421875, 7.276519775390625, -51.52848815917969, 34.95916748046875, -0.22259521484375, 35.048301696777344, -9.813407897949219, -6.842071533203125, 37.30128479003906, 60.83027648925781, 54.514251708984375, -20.93636703491211, -16.40489387512207, 1.8722114562988281, 89.60901641845703, 39.51460266113281, 87.75798034667969, 3.8438034057617188, -8.315807342529297, 65.84446716308594, 4.83502197265625, 27.002197265625, -2.7372970581054688, -27.335725784301758, 15.100685119628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000233.npy"}
|
|
{"epoch": 0.35222978080120937, "step": 234, "batch_size": 64, "mean": 17.907297134399414, "std": 38.75445556640625, "min": -58.424434661865234, "p10": -29.61852760314941, "median": 5.565151214599609, "p90": 72.73220748901367, "max": 87.61054992675781, "pos_frac": 0.671875, "sample": [4.351615905761719, -58.16438293457031, 5.279640197753906, -23.522613525390625, 39.97465896606445, -0.6307182312011719, 1.9973773956298828, -18.854904174804688, 59.067901611328125, -20.88811492919922, 19.873016357421875, 32.2786750793457, 72.3682632446289, 18.347856521606445, 81.01178741455078, 58.08964920043945, -31.370628356933594, -58.424434661865234, 45.16020202636719, -3.295248031616211, 4.820940017700195, 1.6456871032714844, 73.05218505859375, 87.61054992675781, -0.6108989715576172, -7.4712066650390625, 71.40414428710938, -0.5512294769287109, -33.642478942871094, 10.503562927246094, 65.54742431640625, 84.68572998046875, 78.56534576416016, 64.85279846191406, -3.7681140899658203, -58.118125915527344, -3.5158233642578125, 3.535125732421875, 4.289402008056641, -25.530292510986328, 67.93147277832031, 43.992767333984375, 5.306797027587891, 39.56420135498047, 65.53424835205078, 1.5335960388183594, 4.39080810546875, 38.18138122558594, 72.88818359375, -18.93259620666504, -13.975051879882812, 27.97196388244629, -36.17767333984375, 27.741870880126953, -38.11448669433594, 2.2448463439941406, 50.56877136230469, 9.4794921875, 7.822914123535156, 5.823505401611328, 19.268844604492188, 84.72901153564453, 57.141517639160156, -18.803695678710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000234.npy"}
|
|
{"epoch": 0.35374149659863946, "step": 235, "batch_size": 64, "mean": 30.72690200805664, "std": 35.72421646118164, "min": -58.28933334350586, "p10": -4.0667682647705075, "median": 28.41862201690674, "p90": 77.5781463623047, "max": 95.5806655883789, "pos_frac": 0.84375, "sample": [5.138916015625, 22.044647216796875, 13.664482116699219, 29.428632736206055, 36.83927917480469, 27.408611297607422, 17.59514617919922, 95.5806655883789, 94.10261535644531, 62.49168395996094, 30.909103393554688, 62.39640808105469, 25.689422607421875, 36.070518493652344, 76.78932189941406, 68.21614074707031, 36.894081115722656, 8.666423797607422, 65.4403076171875, 80.32846069335938, 7.4153594970703125, 10.000640869140625, 22.83758544921875, 79.9017333984375, 76.68949127197266, 49.856590270996094, -4.1592559814453125, -6.92828369140625, 76.27938842773438, -58.28933334350586, 44.204833984375, 2.620901107788086, 7.890832901000977, 19.543060302734375, 77.91621398925781, 8.169401168823242, 30.879871368408203, 5.9626922607421875, 36.817291259765625, -42.31352233886719, -16.278837203979492, 53.17414093017578, 32.337066650390625, 9.49856948852539, 61.36198425292969, 5.376972198486328, -2.1065139770507812, 74.64787292480469, 90.86012268066406, 7.891590118408203, 19.66761016845703, -16.698257446289062, 85.17060852050781, 36.056427001953125, 0.07387542724609375, -55.629791259765625, 51.245338439941406, 66.90074157714844, 75.90388488769531, 3.8148193359375, 46.563209533691406, -3.850963592529297, 2.3452491760253906, -2.7942562103271484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000235.npy"}
|
|
{"epoch": 0.35525321239606955, "step": 236, "batch_size": 64, "mean": 22.92660903930664, "std": 41.233516693115234, "min": -92.78024291992188, "p10": -12.10850257873535, "median": 10.89624309539795, "p90": 81.09524383544922, "max": 102.12876892089844, "pos_frac": 0.75, "sample": [71.40071105957031, -4.539146423339844, 3.7946701049804688, 63.46426010131836, 10.849157333374023, 2.6027069091796875, 72.72149658203125, 22.689163208007812, -11.161994934082031, 5.12908935546875, 36.17558288574219, 7.516754150390625, -7.767522811889648, 4.67242431640625, -8.352075576782227, 3.9116287231445312, 59.37205505371094, 69.85014343261719, 33.40234375, 78.11414337158203, 2.8684349060058594, -5.854827880859375, 2.950592041015625, 0.6316184997558594, 9.263374328613281, 11.635009765625, 49.315162658691406, 67.70108032226562, 82.64732360839844, 50.22943115234375, 78.37991333007812, -2.0640792846679688, 9.005661010742188, 12.995845794677734, 61.01441955566406, -29.915786743164062, 6.3764495849609375, 102.12876892089844, 12.3857421875, 14.708791732788086, -59.88177490234375, 25.023578643798828, 69.75572204589844, 81.7755355834961, 40.69911193847656, 40.182647705078125, -2.063751220703125, -14.787879943847656, 83.84495544433594, 2.429882049560547, 1.7992439270019531, -92.78024291992188, -87.56596374511719, 86.39100646972656, 85.74201965332031, 79.50789642333984, 10.943328857421875, -9.953756332397461, -13.240589141845703, -12.514148712158203, 84.17879486083984, -2.6016616821289062, 11.534521102905273, 8.63592529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000236.npy"}
|
|
{"epoch": 0.35676492819349964, "step": 237, "batch_size": 64, "mean": 27.795513153076172, "std": 42.335655212402344, "min": -87.62893676757812, "p10": -22.179537963867187, "median": 23.37424087524414, "p90": 79.74594345092774, "max": 104.89140319824219, "pos_frac": 0.75, "sample": [70.8566665649414, 70.44481658935547, 6.138580322265625, 88.03881072998047, -1.6702957153320312, -25.66387939453125, 13.812088012695312, 17.527862548828125, 8.315227508544922, 69.11983489990234, 41.876609802246094, 35.389739990234375, -9.287147521972656, -2.2679595947265625, 71.17626953125, 19.81407928466797, -2.569957733154297, 41.574180603027344, -37.4510498046875, 72.13981628417969, -75.81333923339844, 62.451454162597656, 25.452865600585938, 60.36348342895508, 101.10762023925781, -4.183324813842773, 54.0399169921875, 61.71044158935547, -22.781707763671875, 28.85470962524414, 80.03501892089844, 22.085464477539062, 9.307441711425781, -87.62893676757812, 50.895484924316406, 5.094524383544922, 6.001182556152344, -66.73983001708984, 104.25259399414062, 11.782106399536133, 37.68400573730469, 24.251487731933594, 17.847015380859375, 36.297027587890625, 4.862861633300781, -0.14451980590820312, 48.33136749267578, 67.09213256835938, 32.404327392578125, 79.0714340209961, 77.75956726074219, 11.709403991699219, 86.94422149658203, 104.89140319824219, -23.567230224609375, 10.158138275146484, -2.6831798553466797, 73.00411224365234, 4.6888427734375, 36.80035400390625, -10.286205291748047, 22.496994018554688, -20.77447509765625, 86.47230529785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000237.npy"}
|
|
{"epoch": 0.35827664399092973, "step": 238, "batch_size": 64, "mean": 27.931781768798828, "std": 40.180419921875, "min": -68.59974670410156, "p10": -22.56655464172363, "median": 26.564929962158203, "p90": 81.85365829467774, "max": 97.68910217285156, "pos_frac": 0.734375, "sample": [-24.095401763916016, 7.486000061035156, 75.457763671875, -17.712242126464844, 72.90522003173828, 82.58008575439453, 2.553417205810547, 18.042396545410156, 84.25716400146484, -35.52435302734375, 23.46764373779297, -11.380081176757812, 85.8242416381836, 16.858497619628906, 54.53980255126953, 34.86640930175781, -2.3341903686523438, -13.126623153686523, 49.8084716796875, 83.7524642944336, 63.671287536621094, 75.78770446777344, 65.94683837890625, 57.04281997680664, 3.079906463623047, 75.4599380493164, 25.542518615722656, -26.97045135498047, -32.800167083740234, 61.550743103027344, 62.467201232910156, 56.85801315307617, 2.992563247680664, 80.15866088867188, -2.934490203857422, 11.922430038452148, 46.057769775390625, -3.1605606079101562, 83.8407211303711, 70.01889038085938, 4.600433349609375, -13.767005920410156, 0.8665885925292969, 44.15031433105469, 47.643089294433594, 30.250320434570312, 97.68910217285156, 41.6868896484375, 33.51750183105469, 50.806610107421875, 6.8795166015625, 51.506919860839844, 78.62899780273438, -18.999244689941406, -10.68562126159668, 0.7240715026855469, -68.59974670410156, 7.3858642578125, 92.44068908691406, -14.578201293945312, 27.58734130859375, -33.03276824951172, -37.25468826293945, 3.428140640258789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000238.npy"}
|
|
{"epoch": 0.35978835978835977, "step": 239, "batch_size": 64, "mean": 26.812570571899414, "std": 42.72753143310547, "min": -72.66820526123047, "p10": -19.99174118041992, "median": 21.30475616455078, "p90": 87.93661041259766, "max": 109.31849670410156, "pos_frac": 0.765625, "sample": [15.213676452636719, 5.929595947265625, 49.783966064453125, -0.23056411743164062, 96.85334777832031, 6.8135223388671875, -6.369647979736328, 1.8002395629882812, -12.429443359375, 22.360008239746094, -15.711616516113281, 32.696380615234375, 1.8480796813964844, 88.93586730957031, 72.64811706542969, 8.383514404296875, -29.09878158569336, -40.005123138427734, 8.3673095703125, 86.62847900390625, 4.6947021484375, 20.495262145996094, 76.09931182861328, 91.5669174194336, 83.60029602050781, 26.252389907836914, 47.581512451171875, 53.29584884643555, 38.70680236816406, 12.237701416015625, 82.10333251953125, 38.88103485107422, -41.388954162597656, -12.003768920898438, 24.8121337890625, 74.08432006835938, -2.5251007080078125, 10.470891952514648, -21.826080322265625, 38.56269836425781, -2.2882728576660156, 17.025894165039062, 19.957054138183594, 12.346725463867188, -66.76055908203125, 46.264976501464844, 106.68543243408203, 105.9844970703125, 109.31849670410156, -60.97966384887695, 0.2375335693359375, 8.758895874023438, -72.66820526123047, 76.72319793701172, 40.81829833984375, 64.36170959472656, 88.49723815917969, -4.614429473876953, 29.657241821289062, 56.071075439453125, 3.0943145751953125, 42.778419494628906, 32.50225067138672, 22.11425018310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000239.npy"}
|
|
{"epoch": 0.36130007558578986, "step": 240, "batch_size": 64, "mean": 34.92331314086914, "std": 42.41022491455078, "min": -82.73184967041016, "p10": -10.641552352905272, "median": 31.37297248840332, "p90": 91.7740089416504, "max": 108.68539428710938, "pos_frac": 0.796875, "sample": [47.86725997924805, -11.407257080078125, 84.25302124023438, 1.133056640625, 58.3585205078125, 108.68539428710938, 33.8819580078125, 43.76428985595703, -40.41313171386719, -33.9223747253418, 1.3866500854492188, -3.8474292755126953, 64.39680480957031, 25.208768844604492, 62.133235931396484, 92.3918228149414, 66.99118041992188, -19.710229873657227, 90.2015609741211, 33.69420623779297, -8.854907989501953, 74.28943634033203, 33.782230377197266, -32.20623016357422, 31.81195068359375, 50.11387634277344, 23.706146240234375, 85.13096618652344, 106.62521362304688, 21.269676208496094, 81.17528533935547, 108.06816101074219, 6.795158386230469, 12.83050537109375, 2.6916351318359375, 68.67962646484375, 21.445762634277344, 76.05374145507812, 85.36451721191406, 5.983919143676758, 73.61656951904297, 11.469257354736328, -13.05324935913086, 43.751678466796875, 97.32584381103516, -82.73184967041016, -3.9298572540283203, -0.09810638427734375, 39.87089538574219, 0.81988525390625, 30.93399429321289, 90.33244323730469, -1.8138809204101562, 9.996871948242188, 1.8073158264160156, 14.228096008300781, 94.71678161621094, 98.32572937011719, 58.72801208496094, 21.32889747619629, 76.11759185791016, 5.256065368652344, -4.670982360839844, 12.9599609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000240.npy"}
|
|
{"epoch": 0.36281179138321995, "step": 241, "batch_size": 64, "mean": 22.992887496948242, "std": 41.95619583129883, "min": -85.01871490478516, "p10": -21.539677429199212, "median": 16.872994422912598, "p90": 80.76660766601563, "max": 110.4867935180664, "pos_frac": 0.671875, "sample": [-8.6915283203125, -28.95220184326172, 60.80524444580078, -30.91581916809082, 59.76317596435547, -16.615570068359375, 20.284927368164062, 52.56212615966797, 78.37400817871094, 64.78465270996094, -6.5324249267578125, 61.266571044921875, -11.148365020751953, 21.624025344848633, 12.445457458496094, 77.32461547851562, 1.8603477478027344, 3.299520492553711, 2.8091659545898438, 78.18284606933594, 54.52025604248047, 38.02305603027344, -5.6772308349609375, -5.287101745605469, 25.48302459716797, -4.414741516113281, 64.81172180175781, -0.5819759368896484, -11.758346557617188, -23.650009155273438, 86.23257446289062, 102.94174194335938, 100.02572631835938, 19.343429565429688, -1.0104751586914062, -52.46394348144531, -6.3925933837890625, 23.279159545898438, 33.58659362792969, 4.956356048583984, -85.01871490478516, 3.1128463745117188, 0.32735443115234375, 84.40013122558594, 17.79155921936035, 30.193702697753906, -56.95587158203125, 110.4867935180664, 32.267303466796875, -44.03861999511719, 90.48446655273438, 15.954429626464844, 43.00846862792969, 19.24365234375, 81.79200744628906, -6.308650970458984, 13.184371948242188, 75.69625091552734, 63.136680603027344, 49.96099853515625, 2.1169281005859375, 8.542436599731445, -8.468626022338867, -3.863157272338867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000241.npy"}
|
|
{"epoch": 0.36432350718065004, "step": 242, "batch_size": 64, "mean": 30.96282196044922, "std": 44.47850799560547, "min": -87.84974670410156, "p10": -5.717323684692382, "median": 22.679428100585938, "p90": 92.94622268676758, "max": 135.05349731445312, "pos_frac": 0.765625, "sample": [-62.73706817626953, 21.69775390625, 23.82384490966797, 2.8355712890625, 41.82976531982422, -38.10011672973633, 96.1825180053711, 66.3726806640625, 29.26177978515625, 45.901023864746094, 67.86354064941406, 62.841983795166016, 0.26153564453125, 110.0360336303711, 86.03736877441406, -14.63836669921875, 93.05226135253906, -5.116180419921875, 1.6857070922851562, 12.360713958740234, 17.73265838623047, 30.079551696777344, 92.69879913330078, 4.740545272827148, -1.8314285278320312, 23.661102294921875, 27.78961181640625, -1.6688690185546875, 73.43850708007812, 38.25511169433594, 68.49658203125, 0.9904804229736328, 103.25148010253906, 13.686607360839844, 70.42765808105469, 14.129524230957031, -0.2933979034423828, -0.13180923461914062, 135.05349731445312, 20.191028594970703, -34.246795654296875, 47.85588836669922, 54.275177001953125, 1.1079978942871094, 45.541725158691406, 1.87945556640625, 63.58203887939453, 6.633567810058594, 84.75614929199219, 81.04525756835938, 95.17210388183594, 57.38514709472656, 112.94288635253906, -4.364553451538086, -0.3026313781738281, 52.00944519042969, 61.800811767578125, -0.4591045379638672, 6.322761535644531, -87.84974670410156, 5.086326599121094, -5.974956512451172, -51.058013916015625, 16.330076217651367], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000242.npy"}
|
|
{"epoch": 0.36583522297808013, "step": 243, "batch_size": 64, "mean": 27.232925415039062, "std": 41.75434494018555, "min": -80.11636352539062, "p10": -25.19256477355957, "median": 23.04412078857422, "p90": 86.13301696777344, "max": 100.29815673828125, "pos_frac": 0.75, "sample": [4.119354248046875, 73.63993835449219, 57.247840881347656, -25.272354125976562, 67.1951675415039, 4.062032699584961, -80.11636352539062, 3.60089111328125, 76.6209487915039, -1.7062530517578125, 59.54295349121094, 29.84040069580078, 2.532318115234375, 20.88519287109375, 25.203048706054688, 93.93637084960938, -0.5576553344726562, 3.3796310424804688, 90.88789367675781, -10.124191284179688, 4.1674957275390625, 96.28562927246094, 43.333839416503906, -3.8972339630126953, 66.58142852783203, 41.912254333496094, 6.710548400878906, 36.53965759277344, 80.43379211425781, 39.90666961669922, -64.72932434082031, -39.175235748291016, 53.73169708251953, -6.059663772583008, 83.11447143554688, 45.91889953613281, 74.84712219238281, 84.5520248413086, -26.94317626953125, 18.638992309570312, 100.29815673828125, 34.286537170410156, 31.11285400390625, 96.21730041503906, 6.5167999267578125, -0.298858642578125, 2.4717273712158203, 5.822866439819336, 86.81058502197266, 46.76459503173828, -0.8020782470703125, 37.489990234375, 5.983707427978516, 73.1786117553711, -36.14090347290039, 27.2198486328125, 35.10367965698242, 8.80245590209961, -25.006389617919922, -12.584587097167969, -28.712562561035156, 91.7328109741211, 7.5471038818359375, 18.306121826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000243.npy"}
|
|
{"epoch": 0.3673469387755102, "step": 244, "batch_size": 64, "mean": 36.194820404052734, "std": 41.71103286743164, "min": -80.79582977294922, "p10": -1.779432868957518, "median": 26.251794815063477, "p90": 94.53964080810549, "max": 102.244140625, "pos_frac": 0.875, "sample": [9.386405944824219, 35.413612365722656, 24.773178100585938, 23.22771453857422, -22.27886962890625, 28.483495712280273, -2.4165077209472656, 91.27246856689453, 34.362213134765625, -26.539154052734375, 24.661312103271484, 76.30332946777344, 3.3046875, 83.41108703613281, 45.10004425048828, 2.73211669921875, 72.93233489990234, 90.99466705322266, -80.79582977294922, 90.04580688476562, 22.668792724609375, 7.57916259765625, -57.59114074707031, 64.4111099243164, 11.27398681640625, 84.77574920654297, 58.99201965332031, 71.92431640625, 96.8492660522461, 15.989740371704102, 98.5914306640625, 28.53919219970703, 0.44252586364746094, -6.450584411621094, -5.804862976074219, 20.485002517700195, -0.2929248809814453, 86.69380187988281, 74.6663818359375, 102.244140625, 2.6191539764404297, 81.69125366210938, 0.8356895446777344, 8.069557189941406, 8.145309448242188, 96.35572814941406, 30.337623596191406, 64.15577697753906, 5.0497894287109375, 26.281536102294922, 2.6990489959716797, 10.618064880371094, 90.61424255371094, 33.55744171142578, 101.88337707519531, 35.96405029296875, 0.9203720092773438, 26.22205352783203, 10.80868148803711, 1.9255142211914062, 79.63566589355469, 25.341596603393555, 95.93985748291016, 96.4359130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000244.npy"}
|
|
{"epoch": 0.3688586545729403, "step": 245, "batch_size": 64, "mean": 41.92542266845703, "std": 41.74399948120117, "min": -66.67457580566406, "p10": -4.607279968261718, "median": 44.20911407470703, "p90": 91.35424880981445, "max": 107.80394744873047, "pos_frac": 0.859375, "sample": [53.834251403808594, 62.873321533203125, 62.36027145385742, 91.5396728515625, 16.162961959838867, 7.980579376220703, 4.160343170166016, 86.4620132446289, 78.0198745727539, 48.22856903076172, 2.2542457580566406, 23.43145751953125, 87.21189880371094, 44.29410934448242, 40.45111083984375, 89.70354461669922, 0.7152557373046875, 85.04031372070312, -37.90313720703125, 107.80394744873047, 103.9518051147461, 2.7931442260742188, -2.346588134765625, 41.26872253417969, 88.70896911621094, 22.86456298828125, 45.92730712890625, -4.282989501953125, 81.28523254394531, 1.0090293884277344, 86.21644592285156, -52.427879333496094, 1.5317153930664062, 58.84027099609375, 8.629829406738281, 57.38836669921875, -4.7462615966796875, 40.35784912109375, 71.10221862792969, 105.23278045654297, 43.17366027832031, 3.5772247314453125, 90.92159271240234, 13.742362976074219, -4.762117385864258, 52.64668655395508, 97.47703552246094, 90.61560821533203, 0.3108367919921875, -9.959197998046875, 46.556365966796875, 80.28651428222656, -9.69207763671875, 63.54549789428711, 105.11514282226562, 44.12411880493164, -66.67457580566406, 96.52909088134766, 40.56253433227539, 17.673011779785156, 3.097442626953125, 87.61578369140625, 37.48356246948242, 51.331851959228516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000245.npy"}
|
|
{"epoch": 0.37037037037037035, "step": 246, "batch_size": 64, "mean": 32.31889343261719, "std": 52.72543716430664, "min": -92.31454467773438, "p10": -19.372788047790525, "median": 22.070207595825195, "p90": 95.57829971313477, "max": 142.76446533203125, "pos_frac": 0.71875, "sample": [82.45594787597656, 87.4490966796875, 95.9247055053711, 91.16386413574219, 10.928298950195312, 5.5113067626953125, -90.98249816894531, 100.20912170410156, 27.572311401367188, -17.242433547973633, 82.62626647949219, 77.94793701171875, 54.69573974609375, -5.11689567565918, 55.320587158203125, 78.38613891601562, 9.442543029785156, -5.472349166870117, -7.814170837402344, -1.8022918701171875, 17.960296630859375, 57.25560760498047, -2.0377120971679688, 98.34719848632812, 108.45108795166016, -49.79689025878906, 35.023345947265625, 142.76446533203125, -20.285797119140625, 7.150970458984375, 20.380701065063477, 88.53533935546875, 12.0989990234375, 2.6468563079833984, -51.2681884765625, 47.06944274902344, -84.88190460205078, 9.674371719360352, 94.77001953125, 4.3567352294921875, 51.106590270996094, -56.93561553955078, -16.052528381347656, 105.09835052490234, 36.7741584777832, -1.083465576171875, 73.31391906738281, 84.19732666015625, 93.27098083496094, -92.31454467773438, 4.4452362060546875, 21.95468521118164, 71.9923324584961, 110.66959381103516, 18.11709213256836, -5.8179931640625, 86.3243637084961, 83.25945281982422, -3.9107666015625, -9.101287841796875, 41.4715690612793, 22.18572998046875, 74.98301696777344, 5.04266357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000246.npy"}
|
|
{"epoch": 0.37188208616780044, "step": 247, "batch_size": 64, "mean": 18.50930404663086, "std": 45.33747863769531, "min": -89.03662109375, "p10": -41.57162399291991, "median": 14.588470458984375, "p90": 75.86463623046876, "max": 131.67349243164062, "pos_frac": 0.71875, "sample": [-66.90426635742188, 12.6390380859375, 55.24342346191406, 14.269393920898438, 3.0777931213378906, -7.9412994384765625, 5.4349365234375, 98.87553405761719, 94.90861511230469, -5.2032012939453125, 42.42045593261719, 36.51039123535156, 77.41596221923828, 28.275835037231445, 14.907546997070312, 73.840087890625, 68.34915161132812, 30.179943084716797, -6.018266677856445, -61.2208251953125, 131.67349243164062, 92.82020568847656, -75.7011947631836, -12.057519912719727, 2.4134445190429688, 3.3473052978515625, 54.661434173583984, -0.7754135131835938, -46.70770263671875, 63.26014709472656, 26.224502563476562, 11.346824645996094, 28.989837646484375, 4.6610107421875, 23.123077392578125, 10.855804443359375, 50.365234375, -89.03662109375, -27.4012451171875, 1.977670669555664, -1.3519020080566406, 11.103818893432617, 2.094482421875, 83.92220306396484, 76.7322998046875, 11.890369415283203, 0.5945854187011719, -29.587440490722656, 15.982929229736328, 55.3594970703125, 17.428848266601562, 45.667823791503906, -21.80291748046875, 18.4872989654541, -74.48164367675781, 56.40614318847656, 65.20462036132812, 71.27489471435547, -22.208221435546875, -53.40885925292969, -9.4522705078125, 66.46810913085938, 16.846328735351562, 18.324007034301758], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000247.npy"}
|
|
{"epoch": 0.37339380196523053, "step": 248, "batch_size": 64, "mean": 34.50338363647461, "std": 51.48137664794922, "min": -64.66869354248047, "p10": -28.798050689697263, "median": 18.383980751037598, "p90": 101.99616241455078, "max": 123.5180892944336, "pos_frac": 0.71875, "sample": [6.7973175048828125, 86.49269104003906, -34.31263732910156, -53.579925537109375, 108.16768646240234, 91.30327606201172, 79.67720031738281, 80.29865264892578, -1.2994098663330078, -23.26917839050293, 1.6596565246582031, 98.50624084472656, 28.121627807617188, -47.24737548828125, 97.26095581054688, 46.711761474609375, 112.1802978515625, -18.089073181152344, -60.86841583251953, 24.29291534423828, -25.877540588378906, 8.253189086914062, 11.064796447753906, 15.908525466918945, 94.01728820800781, 83.06233215332031, 93.74317169189453, 13.933860778808594, 50.900821685791016, 102.91563415527344, -35.40198516845703, 91.81986999511719, -5.172039031982422, -10.65029525756836, 101.26615905761719, 88.88125610351562, 15.918533325195312, 32.08564758300781, 37.12743377685547, 19.794227600097656, 72.04000854492188, 82.81055450439453, 16.97373390197754, 114.16554260253906, 111.49598693847656, 46.02055740356445, 3.1096115112304688, -0.4544868469238281, 4.771062850952148, 79.84883880615234, 123.5180892944336, 7.855670928955078, 65.66349792480469, 6.4917144775390625, -64.66869354248047, 6.183565139770508, -24.677764892578125, 102.30902099609375, 83.02017974853516, -0.8390274047851562, -3.5018749237060547, 7.945270538330078, -30.049697875976562, -8.2100830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000248.npy"}
|
|
{"epoch": 0.3749055177626606, "step": 249, "batch_size": 64, "mean": 13.5355224609375, "std": 48.9523811340332, "min": -116.71940612792969, "p10": -49.44315338134765, "median": 11.000553131103516, "p90": 83.81719665527345, "max": 105.45198822021484, "pos_frac": 0.6875, "sample": [10.909919738769531, -43.44738006591797, 6.072029113769531, 56.68272399902344, 18.68199920654297, -30.309219360351562, -67.06961822509766, 54.917091369628906, 30.099609375, 43.09173583984375, 45.44462585449219, 76.44000244140625, 3.7496337890625, 79.682373046875, -90.39347839355469, -14.889663696289062, 34.074989318847656, -51.09233856201172, -59.856201171875, -45.595054626464844, 5.082498550415039, -1.4384307861328125, -40.110862731933594, 5.138761520385742, 43.678184509277344, 11.272346496582031, 0.18725013732910156, -10.938644409179688, 95.61676025390625, -27.158218383789062, -16.17560577392578, 85.58926391601562, 5.474922180175781, 41.919761657714844, 49.49113082885742, -69.45150756835938, 25.76966094970703, 3.42236328125, 15.087892532348633, -40.037445068359375, 6.185201644897461, 100.03114318847656, 68.65234375, 87.17514038085938, 21.201644897460938, 6.419521331787109, -62.99629211425781, 50.31940841674805, 25.357627868652344, 11.0911865234375, 40.126617431640625, 92.05326843261719, 52.923431396484375, 0.27094268798828125, 15.07206916809082, 19.071701049804688, -24.014734268188477, 71.94171905517578, 105.45198822021484, -116.71940612792969, 89.38058471679688, -35.17039489746094, -2.087738037109375, 4.922637939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000249.npy"}
|
|
{"epoch": 0.3764172335600907, "step": 250, "batch_size": 64, "mean": 34.82949447631836, "std": 48.37198257446289, "min": -113.2628173828125, "p10": -9.246647071838376, "median": 26.232149124145508, "p90": 98.65013427734375, "max": 133.50717163085938, "pos_frac": 0.765625, "sample": [50.31068420410156, 7.925786972045898, -2.416746139526367, 12.136228561401367, 17.6119384765625, 38.00843811035156, 54.08197021484375, 108.86280822753906, 97.40357971191406, 0.18045806884765625, 83.6690673828125, -20.89092254638672, 72.71505737304688, 18.397171020507812, 70.93328857421875, -0.7325515747070312, -3.06988525390625, -113.2628173828125, 85.36607360839844, 13.435264587402344, 39.188812255859375, -6.639413833618164, 22.26763916015625, -63.63618469238281, 83.42704010009766, 10.846038818359375, 103.72409057617188, 38.40685272216797, 1.9147796630859375, 40.530181884765625, -10.364032745361328, 40.53125, 96.34707641601562, -0.9697074890136719, 39.83341979980469, 28.14249038696289, 0.02825927734375, 81.24693298339844, 114.29891204833984, 24.321807861328125, -66.05286407470703, -2.171802520751953, 32.8067741394043, 2.057647705078125, 116.69200134277344, 78.71295166015625, 59.345977783203125, 77.58201599121094, 106.0955810546875, 85.62748718261719, -13.2303466796875, -18.70288848876953, 99.18437194824219, 5.527545928955078, 8.2762451171875, 8.620628356933594, 133.50717163085938, 81.64203643798828, 18.482669830322266, -0.3318004608154297, -4.129055023193359, 95.86357879638672, 36.55149841308594, 13.019119262695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000250.npy"}
|
|
{"epoch": 0.3779289493575208, "step": 251, "batch_size": 64, "mean": 21.314550399780273, "std": 48.63502883911133, "min": -95.47749328613281, "p10": -39.75961265563965, "median": 18.21249008178711, "p90": 84.41400375366212, "max": 120.0584945678711, "pos_frac": 0.703125, "sample": [2.846231460571289, 101.70513153076172, 105.3075180053711, -0.33677101135253906, 42.91938781738281, 62.08271026611328, -95.47749328613281, 25.302608489990234, 58.221229553222656, 120.0584945678711, 5.763481140136719, -81.01451873779297, 3.8459625244140625, -31.037437438964844, 4.795297622680664, -40.1214485168457, 29.737171173095703, 116.560791015625, 13.386816024780273, -73.84403991699219, -38.46698760986328, 104.11632537841797, 0.4448871612548828, -40.465667724609375, 18.918212890625, -27.352813720703125, 17.50676727294922, 65.18850708007812, -27.36736297607422, -1.07952880859375, 73.4000244140625, 81.52826690673828, 17.231098175048828, 30.876977920532227, 52.508567810058594, 32.61962127685547, 64.75989532470703, 47.49761962890625, -38.91532897949219, -0.5462055206298828, 50.2617301940918, 4.1741943359375, 84.5735092163086, 13.29556655883789, 13.974929809570312, 49.168365478515625, 23.24005126953125, 17.39752197265625, 34.69142150878906, 47.7520751953125, 84.04182434082031, 4.3592071533203125, 63.7555046081543, -32.847862243652344, -4.175346374511719, -1.6390247344970703, 56.51633071899414, -58.19136047363281, 31.76910972595215, -48.161155700683594, -36.11511993408203, 51.176055908203125, 86.39204406738281, 25.617685317993164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000251.npy"}
|
|
{"epoch": 0.3794406651549509, "step": 252, "batch_size": 64, "mean": 19.533233642578125, "std": 50.59071350097656, "min": -108.43038940429688, "p10": -52.60012588500975, "median": 14.08959674835205, "p90": 86.99638824462892, "max": 120.93359375, "pos_frac": 0.734375, "sample": [45.14386749267578, -87.04222106933594, -11.892633438110352, -71.5511474609375, 48.20137023925781, -6.4121551513671875, 11.746112823486328, 57.267059326171875, 0.12812042236328125, 97.9388198852539, 88.79143524169922, 66.5469970703125, 23.819435119628906, 1.0869064331054688, 110.67302703857422, -10.510238647460938, 107.85436248779297, -58.334190368652344, 28.726112365722656, -1.0688323974609375, -71.640625, 17.294771194458008, 31.957870483398438, 10.681983947753906, 38.24250793457031, 9.55389404296875, 13.713394165039062, -13.076766967773438, 82.17393493652344, 4.855384826660156, 64.01163482666016, 120.84043884277344, 74.3466796875, 15.8187255859375, 33.125694274902344, 5.4903411865234375, 27.377838134765625, 13.725698471069336, 32.52092742919922, -37.14810562133789, 43.9870491027832, -41.163063049316406, 0.4860687255859375, 66.99971008300781, 1.3635330200195312, 120.93359375, -57.50172424316406, 9.491058349609375, -108.43038940429688, -77.54743957519531, -18.04863739013672, 12.636688232421875, 61.501251220703125, -18.28856086730957, 7.585044860839844, 97.5684585571289, 11.415618896484375, 82.80794525146484, 20.064712524414062, 42.49913787841797, 59.70599365234375, 29.32415771484375, -26.6951847076416, 14.453495025634766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000252.npy"}
|
|
{"epoch": 0.38095238095238093, "step": 253, "batch_size": 64, "mean": 25.176788330078125, "std": 45.57313537597656, "min": -79.8304672241211, "p10": -35.61824951171875, "median": 18.746013641357422, "p90": 85.34297103881836, "max": 117.08356475830078, "pos_frac": 0.703125, "sample": [68.58839416503906, -0.4149017333984375, 106.27874755859375, 10.243309020996094, -1.788290023803711, 28.965736389160156, 27.641159057617188, 83.94219970703125, 69.97869873046875, 33.50752258300781, 63.144432067871094, -2.9731826782226562, 79.42048645019531, 5.903327941894531, -2.923948287963867, -69.33961486816406, -15.799575805664062, 29.475751876831055, -9.415809631347656, 55.722442626953125, 64.13020324707031, 66.35037231445312, -62.7275276184082, 56.80354309082031, 9.59359359741211, 3.1002044677734375, 83.650390625, 22.505447387695312, 4.461631774902344, 44.27656555175781, 16.481658935546875, 18.89110565185547, 85.45330810546875, -4.251949310302734, 9.614410400390625, 116.05178833007812, 29.936813354492188, -79.8304672241211, 10.591049194335938, 25.458581924438477, 8.411117553710938, -7.998268127441406, 23.6949462890625, -6.034664154052734, 103.99625396728516, -36.06309509277344, -16.43895149230957, 36.83995056152344, 104.60799407958984, 34.59868621826172, 20.918685913085938, 90.69815063476562, 117.08356475830078, 8.497993469238281, 85.08551788330078, -34.58027648925781, 18.600921630859375, -39.64898681640625, 75.58919525146484, -41.31575012207031, -36.602142333984375, -2.350412368774414, 11.844736099243164, 11.181629180908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000253.npy"}
|
|
{"epoch": 0.382464096749811, "step": 254, "batch_size": 64, "mean": 28.40981674194336, "std": 46.58782958984375, "min": -80.68074798583984, "p10": -17.538407897949217, "median": 12.836301803588867, "p90": 99.01150894165039, "max": 108.3810043334961, "pos_frac": 0.65625, "sample": [-0.7457275390625, 26.343704223632812, 97.87164306640625, 4.5744781494140625, -9.050384521484375, 70.48921203613281, 90.43338012695312, 90.32090759277344, 8.663747787475586, -0.22231483459472656, -4.193412780761719, 11.073528289794922, 50.894004821777344, 68.33195495605469, 86.3344955444336, 89.2412109375, 105.8564453125, 25.314376831054688, -26.15705108642578, 64.1216812133789, -9.796592712402344, 107.71884155273438, -26.02753448486328, 104.40070343017578, -11.585044860839844, -80.68074798583984, -21.666152954101562, 108.3810043334961, 22.853322982788086, -1.0869789123535156, 107.22441101074219, -13.309585571289062, 5.7215118408203125, -1.4979133605957031, -1.7017173767089844, 15.085609436035156, 4.232608795166016, 86.18476867675781, 0.45186424255371094, 52.145774841308594, 14.599075317382812, -13.36209487915039, 17.378711700439453, -27.445968627929688, -13.442342758178711, 107.8470230102539, 5.838172912597656, 99.5000228881836, 37.217254638671875, 26.353010177612305, 17.941635131835938, -17.878334045410156, -16.74524688720703, 85.20799255371094, 79.61097717285156, -7.502784729003906, 41.27281951904297, 3.4975624084472656, 69.99657440185547, 63.336517333984375, 6.432781219482422, -3.372344970703125, 8.507904052734375, -63.10462188720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000254.npy"}
|
|
{"epoch": 0.3839758125472411, "step": 255, "batch_size": 64, "mean": 34.916255950927734, "std": 50.303829193115234, "min": -91.24158477783203, "p10": -11.84956703186035, "median": 20.444215774536133, "p90": 110.44254760742189, "max": 131.64996337890625, "pos_frac": 0.75, "sample": [103.72700500488281, 51.82353210449219, 131.64996337890625, 62.195770263671875, 80.12155151367188, 45.07861328125, -6.628150939941406, -10.357765197753906, 9.90481185913086, 89.30355834960938, -63.344268798828125, -1.1401424407958984, 13.768505096435547, 90.60073852539062, -12.488910675048828, 7.279609680175781, 49.860206604003906, 11.764411926269531, 13.822608947753906, -91.24158477783203, 105.32638549804688, -1.7868728637695312, 20.6676025390625, 8.277946472167969, 112.62860107421875, 80.56725311279297, -33.373931884765625, 20.220829010009766, 119.25643157958984, -5.725982666015625, 111.5106201171875, 81.35848236083984, -23.697189331054688, 40.708927154541016, 19.64752197265625, 19.764986038208008, 5.917802810668945, 56.097320556640625, 114.59966278076172, 15.801733016967773, -1.4342823028564453, 70.95138549804688, 88.18544006347656, -4.5072174072265625, 21.377111434936523, -2.2268638610839844, 30.313621520996094, 120.16841125488281, -57.86778259277344, 53.243629455566406, 1.9786758422851562, 113.72796630859375, 11.135520935058594, 107.95037841796875, 48.82536315917969, 60.75874328613281, 3.921031951904297, 58.72428512573242, 76.73001098632812, -51.30276870727539, 0.1663970947265625, 1.6974868774414062, 45.50746154785156, -6.851860046386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000255.npy"}
|
|
{"epoch": 0.3854875283446712, "step": 256, "batch_size": 64, "mean": 31.95915412902832, "std": 55.17235565185547, "min": -118.35631561279297, "p10": -24.436429595947267, "median": 27.95752716064453, "p90": 106.8600296020508, "max": 150.8355712890625, "pos_frac": 0.671875, "sample": [-31.970314025878906, 25.432907104492188, 64.1435546875, 49.65719223022461, 72.53419494628906, -40.41937255859375, -23.40663719177246, 109.23512268066406, 37.300697326660156, -11.758550643920898, -62.92884826660156, 1.3454093933105469, -18.6151123046875, 116.68119812011719, 19.88616943359375, 116.37323760986328, -18.94477081298828, 4.7448883056640625, -118.35631561279297, 76.35506439208984, 5.889064788818359, 67.98748779296875, -28.649763107299805, 18.393836975097656, -16.292648315429688, -89.7387924194336, 52.39011001586914, -4.478233337402344, 17.173973083496094, 72.3258285522461, -4.911773681640625, 43.426918029785156, 47.327720642089844, 101.31814575195312, 90.81480407714844, 61.317039489746094, 99.22091674804688, 98.00386810302734, -9.606185913085938, 111.04255676269531, 29.432159423828125, 43.54790496826172, 128.06378173828125, -5.310546875, 100.65969848632812, 26.55267333984375, 85.59834289550781, 8.698135375976562, 47.72019958496094, 150.8355712890625, -24.424087524414062, 84.69792938232422, 1.59259033203125, 2.2988128662109375, 75.35629272460938, -10.707355499267578, 60.09722900390625, -9.478706359863281, -4.624948501586914, 112.68431854248047, -24.172182083129883, 29.362380981445312, -24.44171905517578, 61.102783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000256.npy"}
|
|
{"epoch": 0.3869992441421013, "step": 257, "batch_size": 64, "mean": 38.68852996826172, "std": 54.67399978637695, "min": -100.13801574707031, "p10": -9.895859336853027, "median": 22.97077178955078, "p90": 111.32835235595704, "max": 163.20858764648438, "pos_frac": 0.765625, "sample": [-0.77398681640625, 13.104057312011719, -15.431716918945312, 163.20858764648438, 119.48614501953125, 44.14193344116211, 5.058282852172852, 82.60926055908203, 33.920936584472656, 85.83794403076172, 108.45588684082031, 25.271446228027344, 8.022493362426758, -5.71282958984375, 94.30075073242188, 86.45621490478516, 107.11961364746094, 0.0969085693359375, -38.37989807128906, 0.37131500244140625, 18.12920570373535, 57.091461181640625, 35.374267578125, 14.972415924072266, 54.96532440185547, -3.9521484375, -2.574237823486328, 117.62406158447266, 102.69001007080078, -0.3630218505859375, 82.953125, 108.09146118164062, 45.873779296875, 6.4063873291015625, 48.68958282470703, 22.352195739746094, 3.04449462890625, 108.97023010253906, 120.69053649902344, -80.00810241699219, -100.13801574707031, 140.42535400390625, 31.796913146972656, 23.58934783935547, -5.2439117431640625, -9.239664077758789, 11.111167907714844, 0.7691802978515625, 63.814292907714844, 68.1759262084961, 96.04561614990234, 16.241592407226562, 113.2141342163086, 21.514881134033203, -67.17926025390625, 110.87763977050781, -2.0133018493652344, -27.14007568359375, 111.52151489257812, 9.89935302734375, 61.2560920715332, 20.425369262695312, 18.334579467773438, -10.177085876464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000257.npy"}
|
|
{"epoch": 0.3885109599395314, "step": 258, "batch_size": 64, "mean": 30.312185287475586, "std": 40.47798156738281, "min": -76.96121215820312, "p10": -1.0002527236938432, "median": 26.25485897064209, "p90": 86.37104873657229, "max": 124.74868774414062, "pos_frac": 0.890625, "sample": [53.12725830078125, 6.985813140869141, 39.15576171875, -2.890951156616211, 8.382118225097656, 95.16726684570312, 123.60287475585938, 47.44378662109375, 56.809898376464844, 9.639814376831055, 11.858640670776367, 28.435592651367188, -53.46876525878906, 14.89616584777832, 39.217613220214844, 78.60968017578125, 13.455406188964844, -17.660640716552734, 52.35736083984375, -74.41865539550781, 7.811389923095703, 18.770751953125, -62.796478271484375, 23.665283203125, 17.215316772460938, 45.59386444091797, 49.43792724609375, 56.922027587890625, 17.128192901611328, 20.131729125976562, 31.93524169921875, 6.507999420166016, 38.29473114013672, 75.5091323852539, 26.092679977416992, 13.750968933105469, 7.651054382324219, 16.671707153320312, 48.904510498046875, 124.74868774414062, 24.54364776611328, -15.868896484375, 108.27777099609375, 48.47810363769531, 45.0694580078125, 100.05854797363281, 52.74702453613281, 57.91563034057617, 3.7335433959960938, 89.69734954833984, 6.034645080566406, 26.417037963867188, 49.20423126220703, 37.116477966308594, 14.077934265136719, 5.437639236450195, 36.234100341796875, -76.96121215820312, 3.411376953125, 3.4468936920166016, 46.01439666748047, 62.30414581298828, 92.03947448730469, 5.89771842956543], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000258.npy"}
|
|
{"epoch": 0.3900226757369615, "step": 259, "batch_size": 64, "mean": 46.49407958984375, "std": 55.83539962768555, "min": -72.54446411132812, "p10": -29.856195259094214, "median": 56.438154220581055, "p90": 115.59817276000977, "max": 136.08334350585938, "pos_frac": 0.78125, "sample": [-5.363044738769531, -71.23748779296875, -69.25975036621094, 92.49053955078125, 41.09857177734375, 104.59930419921875, 105.95866394042969, 105.90028381347656, 71.35055541992188, -72.54446411132812, 69.53901672363281, -3.9999923706054688, 46.26044464111328, 119.8377685546875, 75.57362365722656, -5.25445556640625, -45.349853515625, 3.8897705078125, 123.91790771484375, 29.729522705078125, 58.57865524291992, 91.97511291503906, 104.83314514160156, 107.29759216308594, 61.471832275390625, -8.337024688720703, -71.95448303222656, 15.303970336914062, 45.40161895751953, 8.30040168762207, 94.98844146728516, 82.38763427734375, -4.761268615722656, 30.142921447753906, 71.77986907958984, 63.68072509765625, 127.40975189208984, -8.00518798828125, 97.30244445800781, 73.70542907714844, 114.95201110839844, 56.48234176635742, 20.877052307128906, 136.08334350585938, 8.170272827148438, 5.51416015625, -38.968414306640625, -8.594350814819336, -63.16022491455078, 2.3882904052734375, 127.14897155761719, 116.2076416015625, 95.67094421386719, 69.45526123046875, 56.39396667480469, 69.0631103515625, 36.322914123535156, 35.67646026611328, 16.266315460205078, 37.170997619628906, 92.83827209472656, 115.8750991821289, 64.8160400390625, 50.33203887939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000259.npy"}
|
|
{"epoch": 0.3915343915343915, "step": 260, "batch_size": 64, "mean": 36.835205078125, "std": 57.71482849121094, "min": -97.65505981445312, "p10": -34.38746871948242, "median": 32.88776397705078, "p90": 115.17416381835938, "max": 118.68771362304688, "pos_frac": 0.6875, "sample": [115.48947143554688, -52.33778381347656, 94.80673217773438, -25.189678192138672, 4.804912567138672, 116.84503173828125, 33.360107421875, 27.345787048339844, 116.33940124511719, -54.90753173828125, 110.4971923828125, 79.95313262939453, 96.30410766601562, 69.43319702148438, 102.82425689697266, -11.144935607910156, 116.82869720458984, 72.31979370117188, 24.71278953552246, 118.68771362304688, 89.17430877685547, 6.091064453125, 115.28290557861328, 114.9204330444336, -9.794281005859375, 33.6923942565918, 96.42568969726562, 112.21891021728516, -3.1761131286621094, -6.6222076416015625, 6.930412292480469, 18.019744873046875, 110.72980499267578, 38.511512756347656, -0.8827724456787109, 51.8625602722168, 32.41542053222656, -26.71265411376953, -97.65505981445312, 21.517539978027344, 2.1421337127685547, 71.58845520019531, -1.3276844024658203, -4.0195770263671875, 20.47154998779297, 49.48890686035156, 118.39266967773438, -26.436019897460938, 110.390380859375, -23.3260498046875, -47.92023468017578, 65.2728271484375, 33.53968811035156, 6.4950103759765625, 87.30345153808594, 56.679779052734375, -34.43292236328125, 87.9709243774414, 81.97134399414062, -34.281410217285156, 0.18220901489257812, -83.01119232177734, -35.8730354309082, -3.7299423217773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000260.npy"}
|
|
{"epoch": 0.3930461073318216, "step": 261, "batch_size": 64, "mean": 31.964664459228516, "std": 50.203704833984375, "min": -69.14152526855469, "p10": -23.56722259521484, "median": 20.057456970214844, "p90": 111.11715545654297, "max": 123.31903076171875, "pos_frac": 0.703125, "sample": [3.258087158203125, 34.76805114746094, 45.89440155029297, 6.506124496459961, -17.549514770507812, 60.4820671081543, -0.9407825469970703, 113.72758483886719, 30.298095703125, -9.812753677368164, -19.882278442382812, -14.552383422851562, -5.703483581542969, 116.14251708984375, 78.4654541015625, -19.285079956054688, -2.2823257446289062, 123.31903076171875, 12.880373001098633, 111.50486755371094, 10.922416687011719, 36.567115783691406, -16.95047378540039, -43.566436767578125, 114.771240234375, 59.602684020996094, 121.23883819580078, 50.4247932434082, -8.965278625488281, 33.0797119140625, 57.97110366821289, 73.8648681640625, 56.530418395996094, 110.21249389648438, 41.97484588623047, 3.044126510620117, 111.65193176269531, -68.7921142578125, -35.876075744628906, 75.41569519042969, 47.15454864501953, 13.709966659545898, 84.95491790771484, 80.4572982788086, 11.143085479736328, -69.14152526855469, 2.199615478515625, -25.146484375, -48.42231750488281, 9.479053497314453, 37.20240020751953, -17.11932373046875, 81.19169616699219, 99.55119323730469, 12.918632507324219, -26.453857421875, 55.34286117553711, 21.733413696289062, 6.827239990234375, 104.62130737304688, 18.381500244140625, 98.7291488647461, 17.82794189453125, -1.7638683319091797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000261.npy"}
|
|
{"epoch": 0.3945578231292517, "step": 262, "batch_size": 64, "mean": 30.042877197265625, "std": 47.99052047729492, "min": -77.59003448486328, "p10": -34.634937286376946, "median": 22.015586853027344, "p90": 96.27099914550783, "max": 118.40303039550781, "pos_frac": 0.703125, "sample": [-42.91535949707031, 55.239715576171875, 20.60987091064453, -1.0209598541259766, 7.3450775146484375, 92.83108520507812, 7.6678314208984375, 3.7846527099609375, 29.48326873779297, 97.34333801269531, -6.174015045166016, 74.21237182617188, -40.83345031738281, 5.463369369506836, 61.642765045166016, -49.50709915161133, 27.531587600708008, 97.61770629882812, 52.798370361328125, 53.70749282836914, -77.59003448486328, -4.298702239990234, 5.315422058105469, -7.06378173828125, 23.421302795410156, 85.63499450683594, 12.3245849609375, 118.40303039550781, -42.3310546875, 8.636518478393555, 98.28234100341797, 56.337303161621094, 42.02056884765625, 93.76887512207031, -10.176704406738281, 109.5091552734375, 3.187349319458008, 20.197628021240234, 117.60708618164062, -2.5910873413085938, -7.007871627807617, -26.84637451171875, -24.647872924804688, 72.43258666992188, 85.43072509765625, 47.703086853027344, 53.159423828125, 7.200410842895508, -44.12379455566406, 33.61764907836914, 59.87884521484375, -2.5646438598632812, 83.23284912109375, -9.529672622680664, 39.71576690673828, 36.191322326660156, 87.43711853027344, 105.03903198242188, 3.718597412109375, 87.13554382324219, 8.943748474121094, -20.29570770263672, 87.47391510009766, -37.97289276123047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000262.npy"}
|
|
{"epoch": 0.3960695389266818, "step": 263, "batch_size": 64, "mean": 28.88762664794922, "std": 49.06256866455078, "min": -94.6946792602539, "p10": -26.43455352783203, "median": 23.29960823059082, "p90": 94.32135696411133, "max": 115.30827331542969, "pos_frac": 0.6875, "sample": [43.09688949584961, 58.50349807739258, -31.655471801757812, -17.238311767578125, -8.046648025512695, -10.075431823730469, 92.98777770996094, -0.6238861083984375, 88.14543151855469, 79.626708984375, 48.36871337890625, 2.0780467987060547, -6.907928466796875, 53.1006965637207, 0.3080291748046875, -56.48924255371094, 89.29351806640625, -10.815338134765625, 14.894775390625, -10.275138854980469, 57.89936828613281, 89.64533233642578, 111.14009094238281, 24.49538803100586, 24.914764404296875, 2.6368637084960938, 94.89289093017578, 58.32884216308594, 96.63538360595703, 75.98945617675781, 111.99192810058594, 17.63983154296875, -27.150985717773438, 7.2025909423828125, -6.286403656005859, 22.10382843017578, -94.6946792602539, 90.7669677734375, 39.80848693847656, 97.18260192871094, 21.706634521484375, -6.903083801269531, -24.76287841796875, 84.47171020507812, -30.6937255859375, -0.32135009765625, 25.746047973632812, 89.7448501586914, 50.78588104248047, 24.755226135253906, 8.991485595703125, 115.30827331542969, -1.7118377685546875, 6.257358551025391, 38.124969482421875, -19.15373420715332, 33.792449951171875, 86.58685302734375, 9.498626708984375, -28.50014305114746, 9.675506591796875, -94.42598724365234, 100.09645080566406, 36.319297790527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000263.npy"}
|
|
{"epoch": 0.3975812547241119, "step": 264, "batch_size": 64, "mean": 31.955841064453125, "std": 54.0802001953125, "min": -94.72431945800781, "p10": -38.57439842224121, "median": 25.28217315673828, "p90": 109.25175552368165, "max": 132.4748992919922, "pos_frac": 0.734375, "sample": [60.029083251953125, -38.93482971191406, 27.593849182128906, 0.5508251190185547, 1.5479145050048828, -16.043664932250977, 4.8342437744140625, 1.8398513793945312, 86.96562194824219, 16.869491577148438, -56.124046325683594, 4.260349273681641, -94.72431945800781, 77.5290298461914, -30.269268035888672, 40.19053649902344, 65.1333236694336, 63.30574035644531, 7.851753234863281, 119.4288330078125, 99.11289978027344, 64.40953063964844, 96.42491149902344, 32.0211181640625, 50.72138977050781, 121.041259765625, -1.9169139862060547, 21.930145263671875, -9.48431396484375, 11.750808715820312, 8.300132751464844, -5.259376525878906, 103.2554702758789, 4.122470855712891, -10.175827026367188, 34.75538635253906, 113.78069305419922, -13.516990661621094, 78.98651885986328, 110.50555419921875, -47.86442565917969, -17.50061798095703, 32.352134704589844, 132.4748992919922, 62.063194274902344, 90.28425598144531, -81.27983093261719, 58.42845153808594, -38.76148986816406, 5.7970123291015625, -39.01454162597656, 3.1574020385742188, 117.9193344116211, 39.80234146118164, 63.98324203491211, -38.13785171508789, 22.970497131347656, 64.5545654296875, 29.6131591796875, -1.9867324829101562, 6.523101806640625, 129.48184204101562, 91.388427734375, 106.32622528076172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000264.npy"}
|
|
{"epoch": 0.39909297052154197, "step": 265, "batch_size": 64, "mean": 35.01887130737305, "std": 61.63483428955078, "min": -115.25790405273438, "p10": -34.7594024658203, "median": 26.13798999786377, "p90": 115.7394790649414, "max": 126.22660827636719, "pos_frac": 0.75, "sample": [17.18865966796875, 0.6424770355224609, 1.2127227783203125, -38.219482421875, 2.7028427124023438, -8.537086486816406, 16.294349670410156, 24.80858612060547, -83.8048095703125, 112.75912475585938, 38.701480865478516, 4.0064239501953125, 56.20362091064453, 55.648338317871094, -115.25790405273438, -15.192848205566406, 55.72541809082031, 1.9035682678222656, 26.28655433654785, -92.04966735839844, 11.84872817993164, 109.40768432617188, 109.9225845336914, 21.69972038269043, 7.211954116821289, -18.093669891357422, 79.06671142578125, 3.9391098022460938, 32.59113311767578, -0.6428546905517578, 121.56210327148438, 126.22660827636719, -20.85907745361328, 110.39730834960938, 75.14966583251953, 116.87548065185547, 2.8343372344970703, 27.568180084228516, -1.348297119140625, -26.685882568359375, 42.62638854980469, 102.19636535644531, 111.40697479248047, 119.06633758544922, 108.3038101196289, 95.47409057617188, 118.05201721191406, 38.335662841796875, 18.746498107910156, -62.82588195800781, 25.989425659179688, 64.54612731933594, -14.933116912841797, 115.94590759277344, 115.2578125, 107.4106216430664, 119.95672607421875, -77.81913757324219, 69.40425872802734, -7.379383087158203, 74.65494537353516, 4.939476013183594, 88.98580169677734, -86.82778930664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000265.npy"}
|
|
{"epoch": 0.40060468631897206, "step": 266, "batch_size": 64, "mean": 34.241844177246094, "std": 53.352073669433594, "min": -99.72307586669922, "p10": -21.484901046752924, "median": 23.285987854003906, "p90": 114.5585189819336, "max": 138.19619750976562, "pos_frac": 0.734375, "sample": [-15.042572021484375, 116.33804321289062, 110.8768310546875, 4.116926193237305, 38.47257995605469, -1.8234329223632812, -3.9110336303710938, -41.8555908203125, 4.681901931762695, 53.55321502685547, 31.43701934814453, 6.6466522216796875, 11.382759094238281, 66.47242736816406, 74.03033447265625, 66.04019165039062, 3.20330810546875, 138.19619750976562, -24.245899200439453, -12.112518310546875, 115.15522766113281, 17.070663452148438, 6.965660095214844, 117.28076934814453, 45.316131591796875, 129.25474548339844, -3.069671630859375, 113.16619873046875, 51.141357421875, -27.96729278564453, 105.12128448486328, 39.458465576171875, 116.22089385986328, -5.017951965332031, 53.519203186035156, -7.969642639160156, 29.501312255859375, 78.36273193359375, 79.2220458984375, -3.5055274963378906, 1.8194923400878906, -44.65977478027344, -49.840179443359375, 6.099491119384766, -99.72307586669922, 11.3675537109375, 34.18098449707031, 12.89324951171875, 47.33734893798828, 76.76958465576172, -78.58553314208984, 4.100507736206055, -5.7637939453125, 37.85296630859375, 100.1285629272461, 54.00180435180664, 124.80106353759766, -1.3481063842773438, 64.35750579833984, 7.493865966796875, 94.94903564453125, 4.046106338500977, 107.38619995117188, 6.129329681396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000266.npy"}
|
|
{"epoch": 0.4021164021164021, "step": 267, "batch_size": 64, "mean": 27.218570709228516, "std": 56.82466125488281, "min": -119.90240478515625, "p10": -30.331139373779298, "median": 24.751827239990234, "p90": 104.78658294677736, "max": 125.91482543945312, "pos_frac": 0.703125, "sample": [87.87294006347656, 2.6042861938476562, 3.741863250732422, -101.20660400390625, 5.8145599365234375, -27.440509796142578, 76.56803894042969, 119.18502807617188, 75.33641052246094, 0.9925308227539062, -6.556196212768555, 49.296669006347656, 7.112926483154297, 125.26158905029297, 4.158512115478516, -60.07079315185547, 34.3892822265625, -63.867557525634766, -10.701255798339844, -108.34515380859375, -29.05898094177246, -29.695167541503906, -8.875404357910156, 71.10179138183594, 67.83937072753906, 102.65558624267578, -0.15058135986328125, 12.32741928100586, 83.1121826171875, -30.60369873046875, 39.754032135009766, 1.0529918670654297, 105.69986724853516, 89.695556640625, -29.613540649414062, 34.55496597290039, 34.415191650390625, -10.028274536132812, 0.540435791015625, 125.91482543945312, 28.631912231445312, 0.1620044708251953, 95.935302734375, -0.9774246215820312, 95.44005584716797, 78.74685668945312, 112.50360870361328, 28.712608337402344, -119.90240478515625, 110.59272003173828, 48.61763000488281, -37.74407958984375, -2.3373546600341797, 31.14389419555664, 20.871742248535156, 10.948238372802734, 71.272705078125, 68.05646514892578, 113.8010482788086, 35.41987609863281, 9.001232147216797, 70.40211486816406, 29.907127380371094, -2.0024681091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000267.npy"}
|
|
{"epoch": 0.4036281179138322, "step": 268, "batch_size": 64, "mean": 31.281864166259766, "std": 47.17568588256836, "min": -78.52566528320312, "p10": -12.59381866455078, "median": 14.243436813354492, "p90": 104.92776260375977, "max": 140.10678100585938, "pos_frac": 0.65625, "sample": [11.786468505859375, 122.98796844482422, 0.14188194274902344, -4.0646820068359375, 89.92481994628906, 119.25292205810547, 35.91508483886719, -6.937644958496094, -24.04470443725586, 28.142108917236328, 95.41122436523438, -0.07372283935546875, -0.9238758087158203, -5.9906463623046875, 20.18792724609375, 14.247600555419922, 50.63835144042969, -24.662948608398438, -0.08954048156738281, 101.73162841796875, 72.57400512695312, 86.88633728027344, -3.1452465057373047, 3.2342681884765625, -10.103721618652344, 28.982086181640625, 19.184829711914062, 78.45024108886719, 14.239273071289062, -5.97540283203125, 105.70797729492188, 3.8605918884277344, 19.239559173583984, 10.429367065429688, -14.500198364257812, 103.10726165771484, -5.792045593261719, -15.045467376708984, -43.445465087890625, 11.80746078491211, 63.82817077636719, 9.212686538696289, -1.4977035522460938, 87.41036987304688, 55.060272216796875, 35.54518127441406, 119.3721923828125, -3.944286346435547, 9.138427734375, -78.52566528320312, -3.7719955444335938, -13.661003112792969, 55.78456115722656, -0.5421714782714844, 140.10678100585938, 58.07829284667969, 26.665939331054688, 28.697715759277344, 112.06893157958984, 40.25106430053711, -0.5093593597412109, 121.29425048828125, 52.579524993896484, 6.121250152587891], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000268.npy"}
|
|
{"epoch": 0.4051398337112623, "step": 269, "batch_size": 64, "mean": 39.4373779296875, "std": 53.23208236694336, "min": -103.72265625, "p10": -7.316047477722168, "median": 25.545536041259766, "p90": 113.86678161621094, "max": 140.8677978515625, "pos_frac": 0.765625, "sample": [-5.352813720703125, -28.6204833984375, 12.200958251953125, 112.24652099609375, 44.90582275390625, -6.237255096435547, 28.46917724609375, 108.04927062988281, 12.024833679199219, -7.5008392333984375, -11.196235656738281, 25.78569793701172, 53.25688171386719, 36.973907470703125, 112.81669616699219, 101.32176208496094, -0.4229087829589844, 59.972373962402344, 139.92413330078125, -85.0739517211914, 31.55542755126953, 0.7261962890625, 104.869140625, 25.305374145507812, 17.339706420898438, 114.31681823730469, -27.37290382385254, 140.8677978515625, 67.87239074707031, 110.41779327392578, 27.696977615356445, 8.614627838134766, -6.884866714477539, 61.64085388183594, -5.1011810302734375, 5.823812484741211, 2.902667999267578, 49.84614562988281, -0.1020050048828125, 101.8680191040039, 115.26217651367188, 67.47213745117188, 21.776512145996094, 8.153570175170898, 116.6132583618164, -27.77947235107422, 67.8888168334961, 19.426284790039062, -1.5047569274902344, 7.2214508056640625, 122.2969970703125, 0.0144500732421875, 122.67543029785156, 17.895416259765625, -2.8225784301757812, -103.72265625, 11.207656860351562, 80.64993286132812, 46.549652099609375, 56.34132385253906, 102.30902862548828, 11.526611328125, 105.59794616699219, 23.196521759033203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000269.npy"}
|
|
{"epoch": 0.40665154950869237, "step": 270, "batch_size": 64, "mean": 30.669635772705078, "std": 53.68843078613281, "min": -108.1007080078125, "p10": -16.939935874938964, "median": 22.043216705322266, "p90": 110.03084335327151, "max": 130.53543090820312, "pos_frac": 0.78125, "sample": [62.70033264160156, 116.388427734375, -7.054313659667969, 129.79852294921875, 52.562110900878906, 120.8504638671875, 7.86671257019043, 53.037227630615234, 65.60983276367188, 21.057037353515625, 124.15151977539062, 66.43983459472656, 22.42676544189453, 79.59134674072266, 3.6060791015625, -57.48411560058594, 48.80107116699219, 58.459327697753906, 13.40927505493164, 64.56144714355469, 36.95210266113281, 81.93870544433594, 9.750373840332031, -53.86344528198242, 3.7522735595703125, 7.833194732666016, 99.9650650024414, -93.79953002929688, 47.51377487182617, 17.03460693359375, 21.65966796875, 22.66644287109375, 7.431640625, 52.11278533935547, -6.998985290527344, 5.234825134277344, 115.47937774658203, -2.9727020263671875, 11.318767547607422, -108.1007080078125, 130.53543090820312, 91.02606964111328, 98.63261413574219, -17.622102737426758, 10.914859771728516, 45.8724365234375, -1.6266937255859375, 2.163909912109375, 111.85189819335938, 50.38291931152344, 105.7817153930664, 1.0145988464355469, -27.466232299804688, 41.60301208496094, 0.9318313598632812, -9.481300354003906, -15.348213195800781, -104.50810241699219, 69.78976440429688, 9.38043212890625, 22.995887756347656, -5.17657470703125, 5.169792175292969, 24.351608276367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000270.npy"}
|
|
{"epoch": 0.40816326530612246, "step": 271, "batch_size": 64, "mean": 24.912029266357422, "std": 60.52397918701172, "min": -95.80431365966797, "p10": -34.86426849365234, "median": 4.766811370849609, "p90": 114.91333236694337, "max": 164.08074951171875, "pos_frac": 0.609375, "sample": [32.90863037109375, 59.62892150878906, -12.60107421875, -9.168037414550781, 120.55408477783203, 97.96974182128906, 15.964166641235352, 3.162473678588867, 115.03802490234375, -48.352386474609375, 76.43597412109375, -6.196882247924805, 141.88824462890625, -23.298080444335938, 70.4967269897461, 111.27537536621094, 2.463348388671875, -26.678665161132812, 2.092073440551758, 125.27278137207031, 12.863609313964844, 3.9213829040527344, 92.7822265625, -10.810949325561523, -17.761627197265625, 1.0276374816894531, -27.139556884765625, 72.6122817993164, -2.5114593505859375, -32.07594299316406, 6.551496505737305, 82.55926513671875, 5.538330078125, -95.80431365966797, -2.363800048828125, 101.30132293701172, 20.07970428466797, 3.9952926635742188, 106.46186828613281, 114.62238311767578, 0.5298309326171875, -61.16236114501953, 164.08074951171875, -57.35260772705078, -9.809097290039062, 30.74676513671875, -5.858589172363281, -85.76632690429688, -86.0372314453125, 9.107246398925781, 25.00192642211914, -21.82752227783203, 110.11106872558594, 118.01472473144531, 67.6124267578125, -3.78656005859375, -36.05926513671875, 43.56940460205078, -13.569572448730469, 8.499324798583984, -16.418100357055664, -8.176742553710938, 126.29729461669922, 11.918304443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000271.npy"}
|
|
{"epoch": 0.40967498110355255, "step": 272, "batch_size": 64, "mean": 48.72712707519531, "std": 47.14921188354492, "min": -89.12973022460938, "p10": -2.910593795776367, "median": 47.502140045166016, "p90": 115.92966918945314, "max": 131.0066375732422, "pos_frac": 0.859375, "sample": [15.003288269042969, -4.170066833496094, -8.605239868164062, 81.32304382324219, 50.74610137939453, 120.55924224853516, 24.19390869140625, 91.77205657958984, 109.22185516357422, 12.645824432373047, 95.9976806640625, 58.1481819152832, 16.41448974609375, -3.8051834106445312, 58.27272033691406, 54.247467041015625, 69.09298706054688, 5.524787902832031, 62.14598083496094, 50.23628616333008, 113.27584075927734, 20.73961639404297, 84.9598388671875, 84.97603607177734, 131.0066375732422, 11.894294738769531, 73.09880065917969, -30.859840393066406, 14.775520324707031, 124.14749908447266, 58.91149139404297, 122.952880859375, 89.77737426757812, 0.5948352813720703, 76.04692077636719, 19.696758270263672, 17.297298431396484, 0.49175262451171875, 35.90266418457031, -89.12973022460938, 96.35497283935547, 123.5225601196289, 128.90628051757812, 45.38414001464844, 2.2702484130859375, 22.76215362548828, 98.74461364746094, 79.5826187133789, 92.72380065917969, 90.45702362060547, 31.97700309753418, 106.63372802734375, -9.271591186523438, -3.0117950439453125, 18.182373046875, 36.747283935546875, 46.819435119628906, 48.184844970703125, -2.674457550048828, -0.6459579467773438, 18.118627548217773, 1.7995109558105469, 117.06702423095703, 8.379838943481445], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000272.npy"}
|
|
{"epoch": 0.41118669690098264, "step": 273, "batch_size": 64, "mean": 34.86532974243164, "std": 66.66400146484375, "min": -128.8633575439453, "p10": -44.391466522216795, "median": 32.80326271057129, "p90": 114.33710250854493, "max": 185.017333984375, "pos_frac": 0.75, "sample": [1.8420753479003906, 57.307586669921875, 115.7755126953125, -42.44835662841797, 96.33697509765625, -32.923805236816406, -77.38290405273438, 95.73011779785156, -85.66781616210938, 42.845458984375, 76.16828918457031, -45.22422790527344, 61.94093322753906, -3.175374984741211, 7.16876220703125, 50.295352935791016, 125.40410614013672, 26.82056427001953, 103.44365692138672, 89.7842788696289, 94.97090911865234, 6.202274322509766, -4.582756042480469, 2.9617652893066406, -128.8633575439453, 110.9808120727539, 37.786224365234375, 1.7327804565429688, -27.787996292114258, 4.698680877685547, 79.69204711914062, 185.017333984375, 105.61843872070312, -113.34075927734375, 20.568939208984375, 78.67127227783203, 76.65913391113281, 50.81143569946289, 2.4574661254882812, 104.89158630371094, -91.0046157836914, 21.67273712158203, 93.8971939086914, -32.343650817871094, 47.5502815246582, 158.4834747314453, 127.41393280029297, 83.4939956665039, 14.452468872070312, 89.03488159179688, 1.6407146453857422, 25.768936157226562, 1.173208236694336, -13.208633422851562, 21.585853576660156, 126.18456268310547, -0.08533859252929688, 39.280113220214844, 27.820301055908203, -20.76409149169922, 69.62348937988281, 121.23841094970703, -92.165771484375, 57.45123291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000273.npy"}
|
|
{"epoch": 0.4126984126984127, "step": 274, "batch_size": 64, "mean": 38.26170349121094, "std": 62.2528076171875, "min": -121.3869857788086, "p10": -26.130180358886715, "median": 28.919979095458984, "p90": 120.9302230834961, "max": 134.78701782226562, "pos_frac": 0.765625, "sample": [63.06633758544922, -23.68334197998047, -12.228017807006836, 4.633697509765625, -12.809600830078125, -63.16786193847656, 121.34162902832031, 118.83934783935547, 29.058143615722656, 2.781614303588867, 57.49292755126953, 103.08356475830078, 2.9198989868164062, -21.7052001953125, 64.9856185913086, 130.23536682128906, 119.97027587890625, -77.6609878540039, 6.348514556884766, 28.226715087890625, 121.78047180175781, 113.41471862792969, 72.29609680175781, 119.17314910888672, 53.64844512939453, 3.740345001220703, 121.8731918334961, 110.19065856933594, 54.28215789794922, 11.507736206054688, 8.250751495361328, 7.137657165527344, 105.30615997314453, -3.4745330810546875, -21.04264259338379, 60.12024688720703, -105.72588348388672, -27.17882537841797, 106.56534576416016, 9.908248901367188, -41.431114196777344, 33.072784423828125, 24.753170013427734, 25.957656860351562, -8.087472915649414, 28.781814575195312, 25.173797607421875, -121.3869857788086, 40.75444030761719, 43.52238082885742, 125.44652557373047, 109.18045043945312, 3.333446502685547, 53.174041748046875, 12.053756713867188, -17.10483741760254, -69.72396850585938, 7.518718719482422, 125.2662353515625, 119.08655548095703, 65.36344909667969, 53.323081970214844, 134.78701782226562, 112.43193817138672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000274.npy"}
|
|
{"epoch": 0.41421012849584277, "step": 275, "batch_size": 64, "mean": 38.457820892333984, "std": 53.26679229736328, "min": -104.86876678466797, "p10": -8.242071914672852, "median": 30.964183807373047, "p90": 119.28469543457032, "max": 134.9869384765625, "pos_frac": 0.8125, "sample": [34.176849365234375, 16.936729431152344, -27.003860473632812, 37.52751922607422, 116.92652130126953, 23.904441833496094, 44.761322021484375, -104.86876678466797, 122.40515899658203, 102.35684204101562, 5.3337249755859375, 45.37507629394531, 0.6831436157226562, 120.32596588134766, -1.8011398315429688, 19.27310562133789, 102.50459289550781, 50.49878692626953, 112.0072250366211, -37.181488037109375, -95.83265686035156, 124.81584167480469, 17.275909423828125, 8.714988708496094, 47.07286834716797, 0.8509292602539062, -43.428035736083984, 9.680000305175781, 7.092060089111328, 26.99365234375, 29.238250732421875, 134.9869384765625, 32.69011688232422, 65.51152801513672, 123.12873077392578, 37.85151672363281, 48.57350158691406, 13.645200729370117, -4.82806396484375, -3.8199615478515625, 15.251861572265625, 20.196136474609375, 35.59038543701172, 1.5146427154541016, 19.01164436340332, 130.46469116210938, 67.49476623535156, 93.8539047241211, -8.242118835449219, 87.35978698730469, 3.7916488647460938, 4.618553161621094, -7.330514907836914, 14.093719482421875, 120.29534149169922, 110.41748809814453, -8.241962432861328, 67.53812408447266, 53.98412322998047, 101.4773178100586, 50.07525634765625, 58.76713562011719, -34.891700744628906, 99.85521697998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000275.npy"}
|
|
{"epoch": 0.41572184429327286, "step": 276, "batch_size": 64, "mean": 38.03240966796875, "std": 54.022300720214844, "min": -98.46710968017578, "p10": -15.747384643554685, "median": 27.491747856140137, "p90": 113.7189712524414, "max": 133.05499267578125, "pos_frac": 0.78125, "sample": [107.49020385742188, 17.575210571289062, 8.14964485168457, 62.667869567871094, 4.718149185180664, 29.65843963623047, 133.05499267578125, -58.63787078857422, 78.69425964355469, 7.73774528503418, 13.415401458740234, 5.43415641784668, 65.70491790771484, 91.69828796386719, -13.884185791015625, 121.69776916503906, -5.492076873779297, 4.358467102050781, 25.702713012695312, 3.299968719482422, 41.40971755981445, 121.2520980834961, 104.9085922241211, 128.09373474121094, 13.308441162109375, -53.2181396484375, 95.95398712158203, -26.451217651367188, 58.708831787109375, 100.52224731445312, 8.706348419189453, 13.281173706054688, -44.66944122314453, -98.46710968017578, 99.36312866210938, -16.5458984375, 70.24844360351562, -9.076690673828125, 69.54055786132812, 86.43196105957031, -78.03433227539062, 43.36750793457031, 113.31484985351562, 1.8657608032226562, -10.430709838867188, 24.793365478515625, 104.12227630615234, 50.76274108886719, 113.89216613769531, 18.06854248046875, -8.236274719238281, -12.057601928710938, 14.351097106933594, 27.523120880126953, 21.80602264404297, 131.3193359375, 117.8371353149414, 66.25340270996094, 56.3956298828125, 46.37061309814453, 27.46037483215332, 65.41999816894531, 32.2035026550293, -0.6392097473144531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000276.npy"}
|
|
{"epoch": 0.41723356009070295, "step": 277, "batch_size": 64, "mean": 21.500307083129883, "std": 58.23041534423828, "min": -107.87970733642578, "p10": -54.508506011962886, "median": 14.756660461425781, "p90": 99.15697479248048, "max": 133.05458068847656, "pos_frac": 0.625, "sample": [90.48599243164062, -100.39381408691406, 96.63819885253906, -3.2539215087890625, 93.32145690917969, -65.2803955078125, 15.657901763916016, 15.391281127929688, 86.8000717163086, 14.122039794921875, -51.948997497558594, -13.295539855957031, 49.78826904296875, -2.8541412353515625, -43.26435852050781, 44.20823669433594, 40.26814270019531, -5.472801208496094, 1.1960296630859375, 27.135112762451172, 68.77708435058594, 34.111183166503906, 6.804592132568359, 8.863418579101562, -4.709705352783203, 4.26220703125, -16.976486206054688, -55.605438232421875, 22.254287719726562, 102.90190124511719, -0.12868499755859375, 31.627784729003906, -32.17242431640625, 131.97947692871094, 40.9517936706543, 115.40301513671875, -15.146202087402344, -70.44670104980469, 36.911163330078125, 27.457496643066406, -3.9697418212890625, -64.58229064941406, 93.48316955566406, 69.11647033691406, 29.248703002929688, 10.248924255371094, -13.817710876464844, 96.13533020019531, -107.87970733642578, 133.05458068847656, 66.72918701171875, -31.090002059936523, 114.10098266601562, 37.717681884765625, -3.985595703125, 1.0700454711914062, 100.2364501953125, -100.50379943847656, 80.1458740234375, 122.82882690429688, -42.03034210205078, 7.391826629638672, -0.0177001953125, 56.01995849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000277.npy"}
|
|
{"epoch": 0.41874527588813304, "step": 278, "batch_size": 64, "mean": 36.89111328125, "std": 59.56267547607422, "min": -139.6379852294922, "p10": -34.13298873901367, "median": 22.90591335296631, "p90": 123.01142654418945, "max": 137.3838348388672, "pos_frac": 0.734375, "sample": [-39.42363739013672, -62.910491943359375, 114.75082397460938, 41.63459777832031, -0.10699462890625, -6.778829574584961, -100.27359008789062, 56.1788330078125, 131.22183227539062, 137.3838348388672, 45.686187744140625, -4.581756591796875, 122.66674041748047, 120.12731170654297, 10.046607971191406, -10.697141647338867, 9.23666763305664, 31.396217346191406, 95.12626647949219, 87.98088073730469, 59.11003875732422, 78.74488830566406, 14.736129760742188, 6.116264343261719, -34.01286315917969, 133.79788208007812, -35.87086486816406, 88.95258331298828, 27.29071044921875, 11.105072021484375, 11.908855438232422, 71.71251678466797, -4.167022705078125, -34.184471130371094, 67.88114929199219, 9.442497253417969, 109.87928771972656, 12.008834838867188, 78.97430419921875, 82.43392181396484, 123.15914916992188, 132.98846435546875, -8.94168472290039, 69.90721130371094, 30.716760635375977, 62.93266296386719, -2.3900527954101562, 5.912866592407227, -36.92969512939453, 6.404380798339844, 42.723388671875, 74.83351135253906, -139.6379852294922, -2.993803024291992, 9.124916076660156, 123.687744140625, 8.626617431640625, 133.34588623046875, 45.64192581176758, 9.414848327636719, 18.521116256713867, -3.0553321838378906, 110.58346557617188, 11.930801391601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000278.npy"}
|
|
{"epoch": 0.42025699168556313, "step": 279, "batch_size": 64, "mean": 51.26152038574219, "std": 52.51218795776367, "min": -81.87791442871094, "p10": -5.4832149505615195, "median": 48.16475296020508, "p90": 121.01920547485352, "max": 139.66720581054688, "pos_frac": 0.859375, "sample": [-0.9421443939208984, 5.079936981201172, 129.47412109375, 46.40325164794922, -81.87791442871094, 108.21125793457031, 5.9069366455078125, -7.192352294921875, -13.470855712890625, 91.06791687011719, 5.494483947753906, 116.62075805664062, 57.49365997314453, 121.38699340820312, 43.16676712036133, 139.66720581054688, 81.30628967285156, 56.46332931518555, -44.787376403808594, 6.330913543701172, -7.042537689208984, 108.51280212402344, 69.0403823852539, 49.92625427246094, 13.390541076660156, 70.9464111328125, 23.8117618560791, -1.8447952270507812, 21.877525329589844, 97.80703735351562, 9.4073486328125, 94.3664779663086, 135.99313354492188, 34.85303497314453, -11.0478515625, 114.68743896484375, 68.55513000488281, 37.12738037109375, 1.6265945434570312, 129.10745239257812, 120.1610336303711, 8.63271713256836, 120.05667114257812, 12.386688232421875, 29.77880859375, 69.91706085205078, 3.713794708251953, 7.49700927734375, 53.730194091796875, 128.75685119628906, 80.50645446777344, -42.24317932128906, 92.22508239746094, 119.58253479003906, 1.7843570709228516, 15.122650146484375, 25.536102294921875, 21.33599853515625, 87.17898559570312, 98.6448974609375, 98.29016876220703, 64.88815307617188, 126.97406005859375, 9.375679016113281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000279.npy"}
|
|
{"epoch": 0.4217687074829932, "step": 280, "batch_size": 64, "mean": 51.18370819091797, "std": 53.19124984741211, "min": -25.66905975341797, "p10": -5.036739540100098, "median": 31.55238914489746, "p90": 127.3195556640625, "max": 174.88092041015625, "pos_frac": 0.84375, "sample": [102.32502746582031, 126.57014465332031, 2.2836360931396484, 99.26217651367188, -5.249664306640625, 124.53767395019531, 2.1273746490478516, -19.435264587402344, 8.634584426879883, 75.891357421875, 31.48487091064453, 5.8064422607421875, 99.64863586425781, 120.626708984375, 121.74513244628906, 119.03062438964844, 68.86054229736328, 41.02891159057617, 115.57899475097656, 33.62104034423828, 10.1573486328125, 5.096942901611328, -1.4161643981933594, -1.1978683471679688, 127.64073181152344, 81.45243835449219, 5.087726593017578, 82.69905853271484, 96.91854858398438, 174.88092041015625, 31.61990737915039, -9.33026123046875, 20.260177612304688, 92.86341857910156, 7.926280975341797, 44.03379821777344, 137.81741333007812, -20.3382568359375, 128.93582153320312, 138.00057983398438, -25.66905975341797, -8.00936508178711, -4.942741394042969, 2.3124866485595703, 74.52395629882812, 11.567127227783203, 51.06158447265625, -5.077024459838867, 17.998733520507812, 9.568307876586914, 128.7520751953125, 42.49314498901367, 26.637474060058594, 117.83607482910156, 7.394430160522461, 18.609878540039062, 10.552436828613281, 10.924484252929688, 28.7922306060791, 22.818763732910156, 7.741371154785156, 115.52734375, 130.96202087402344, 55.89418029785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000280.npy"}
|
|
{"epoch": 0.42328042328042326, "step": 281, "batch_size": 64, "mean": 46.05894088745117, "std": 61.26619338989258, "min": -83.69952392578125, "p10": -23.088070678710938, "median": 33.59571075439453, "p90": 124.66129989624024, "max": 138.22618103027344, "pos_frac": 0.75, "sample": [-37.78419876098633, 106.46820068359375, 13.487289428710938, -0.8614044189453125, 113.22822570800781, 84.93618774414062, 38.234413146972656, 97.66316223144531, 55.4910888671875, 128.17617797851562, 34.93517303466797, 127.98969268798828, 89.98625183105469, 76.37894439697266, -6.8408050537109375, 4.58671760559082, 10.095743179321289, 121.67657470703125, 0.33011817932128906, 128.839111328125, 74.21603393554688, 2.657878875732422, -23.278656005859375, 32.256248474121094, 112.45675659179688, 1.6945152282714844, 15.153312683105469, -3.616474151611328, 41.47993469238281, -83.69952392578125, 6.6002655029296875, 14.453136444091797, -3.8887710571289062, 30.904401779174805, 120.7247314453125, -80.06929016113281, 61.498687744140625, 138.22618103027344, 16.393798828125, -60.538089752197266, 128.29360961914062, 119.93910217285156, 127.48807525634766, 67.78343963623047, 125.3528823852539, 27.19811248779297, 117.99391174316406, 15.58029556274414, 122.55391693115234, 71.36434936523438, -3.4001617431640625, 8.023078918457031, 122.08993530273438, -75.92122650146484, -1.4889564514160156, 108.7403564453125, 123.047607421875, -22.64337158203125, 104.51614379882812, 91.63739776611328, -24.711544036865234, -7.290277481079102, 23.08638572692871, -22.102521896362305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000281.npy"}
|
|
{"epoch": 0.42479213907785335, "step": 282, "batch_size": 64, "mean": 42.963863372802734, "std": 66.67547607421875, "min": -112.83671569824219, "p10": -48.65839729309081, "median": 34.37539005279541, "p90": 128.39869384765626, "max": 170.42123413085938, "pos_frac": 0.6875, "sample": [38.87163543701172, -18.688438415527344, -56.39479064941406, -6.1235504150390625, 12.910507202148438, 110.70661926269531, 95.53305053710938, 6.820947647094727, -4.177509307861328, 138.3661346435547, 75.9499282836914, 119.90911865234375, 53.312232971191406, 116.55633544921875, 125.060791015625, 27.670562744140625, 133.1689453125, 89.00091552734375, -6.217620849609375, 98.89468383789062, 44.72084045410156, 26.410415649414062, -5.981821060180664, 145.50808715820312, 29.8791446685791, -15.97500228881836, -32.12803649902344, 118.29945373535156, 119.62239074707031, 10.543514251708984, -37.20222854614258, -16.551424026489258, -112.83671569824219, 117.99253845214844, 12.614652633666992, -64.3548583984375, 129.8292236328125, 13.090234756469727, -69.41778564453125, 45.08012390136719, 97.54427337646484, 117.08454132080078, 5.8879241943359375, 63.23054122924805, 124.50677490234375, 170.42123413085938, 68.93264770507812, -0.8528537750244141, -5.2770233154296875, 29.36810302734375, -22.149383544921875, -10.482467651367188, -58.40916442871094, 50.177738189697266, -53.56818389892578, 144.89523315429688, 135.80201721191406, 91.90049743652344, -64.63587188720703, 10.92816162109375, 46.33537292480469, 63.993370056152344, 106.63571166992188, 27.144729614257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000282.npy"}
|
|
{"epoch": 0.42630385487528344, "step": 283, "batch_size": 64, "mean": 39.28058624267578, "std": 61.946781158447266, "min": -119.21185302734375, "p10": -28.63833618164062, "median": 25.639095306396484, "p90": 119.67555236816406, "max": 146.69305419921875, "pos_frac": 0.765625, "sample": [141.45213317871094, -87.00764465332031, -30.141128540039062, -40.53950500488281, 97.53118896484375, 83.838623046875, 5.548164367675781, 16.077232360839844, 86.78852844238281, 1.52569580078125, -15.453994750976562, 90.27133178710938, 41.071624755859375, 46.047237396240234, 118.65507507324219, 10.393047332763672, 15.623382568359375, 127.45059967041016, -25.131820678710938, 35.52001953125, 113.15848541259766, 96.66905212402344, 100.65292358398438, 12.063705444335938, 73.80760192871094, 128.22705078125, 95.34796905517578, -0.8445053100585938, 146.69305419921875, 13.936500549316406, 0.9641952514648438, 120.11289978027344, -77.95294952392578, 8.215957641601562, 7.1255950927734375, 76.34881591796875, -80.31714630126953, 34.6126708984375, 61.687835693359375, 104.57955932617188, 130.3323211669922, -20.113176345825195, 112.92079162597656, 110.86152648925781, 16.008312225341797, 16.630138397216797, 25.719642639160156, -119.21185302734375, 6.426359176635742, -44.581085205078125, 81.69822692871094, 3.355060577392578, 110.64453125, -1.7181816101074219, 73.39007568359375, 81.29468536376953, -8.8916015625, -2.678882598876953, 7.787683486938477, 7.6593170166015625, -20.497116088867188, 134.0962371826172, 25.558547973632812, 32.656951904296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000283.npy"}
|
|
{"epoch": 0.42781557067271353, "step": 284, "batch_size": 64, "mean": 42.602237701416016, "std": 57.829139709472656, "min": -119.74340057373047, "p10": -6.430471801757809, "median": 24.3323974609375, "p90": 123.26398315429688, "max": 144.59010314941406, "pos_frac": 0.78125, "sample": [-15.580245971679688, 7.0673980712890625, 45.725372314453125, 26.826324462890625, 140.81243896484375, 85.28813171386719, 4.798025131225586, -19.58026123046875, 144.59010314941406, 8.347217559814453, 0.4579124450683594, 61.835872650146484, -1.4773635864257812, -7.95172119140625, 107.78608703613281, -2.880889892578125, 6.727865219116211, 89.3304672241211, -1.7860565185546875, 123.43098449707031, 8.757568359375, 42.92543029785156, 16.871734619140625, -39.06756591796875, -22.101829528808594, 21.838470458984375, 3.1121597290039062, 28.679977416992188, 56.26596450805664, -1.0992851257324219, 117.45439147949219, 79.38545227050781, 93.1352310180664, 103.43759155273438, 10.902362823486328, 14.155433654785156, 116.35762023925781, 124.98757934570312, 135.04660034179688, 12.363716125488281, 95.24492645263672, 144.22853088378906, 47.03309631347656, 4.643985748291016, 0.9980754852294922, -0.631591796875, 95.65777587890625, 9.451061248779297, 83.49523162841797, 122.87431335449219, 4.168449401855469, 53.58622360229492, 114.55206298828125, -1.6349029541015625, 15.150596618652344, 59.46803283691406, 141.05628967285156, 72.0554428100586, 61.902278900146484, -119.74340057373047, 82.90218353271484, -101.0165023803711, 15.679374694824219, -1.7546424865722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000284.npy"}
|
|
{"epoch": 0.4293272864701436, "step": 285, "batch_size": 64, "mean": 33.944580078125, "std": 61.261661529541016, "min": -110.18999481201172, "p10": -37.13433761596679, "median": 20.936973571777344, "p90": 126.77146301269532, "max": 137.4065704345703, "pos_frac": 0.671875, "sample": [31.55243682861328, 8.710060119628906, -27.5106201171875, 118.30258178710938, 13.551605224609375, -42.42025375366211, -110.18999481201172, 3.7599639892578125, 45.022762298583984, -26.72995948791504, 40.32672882080078, 124.93101501464844, 120.60066986083984, 15.116355895996094, 130.51243591308594, -44.58259582519531, 61.18030548095703, 127.56022644042969, -3.510417938232422, 7.762271881103516, 26.757591247558594, 2.5717239379882812, -65.45689392089844, -42.99959945678711, -2.6948623657226562, -73.48095703125, -8.05975341796875, 28.77105712890625, 43.0001220703125, -34.4193115234375, -20.080543518066406, 137.4065704345703, -12.637626647949219, 118.75359344482422, 87.29150390625, 47.95962142944336, 45.61205291748047, 12.637222290039062, 120.21198272705078, 124.238525390625, 103.88467407226562, -5.241539001464844, -2.6596908569335938, 130.29788208007812, 101.4472885131836, 27.057228088378906, 0.3480873107910156, 134.55657958984375, -14.892303466796875, -2.8053550720214844, 29.31730079650879, 103.25302124023438, 11.78106689453125, 34.756622314453125, -5.4034881591796875, 51.20008850097656, 132.69900512695312, 133.29635620117188, -38.29792022705078, 12.547149658203125, 0.45758056640625, 29.454330444335938, -11.409912109375, 87.48167419433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000285.npy"}
|
|
{"epoch": 0.4308390022675737, "step": 286, "batch_size": 64, "mean": 55.40478515625, "std": 60.74290084838867, "min": -77.16651916503906, "p10": -2.1728054046630834, "median": 48.38223838806152, "p90": 139.08076629638674, "max": 187.4263916015625, "pos_frac": 0.890625, "sample": [53.54913330078125, 3.8637638092041016, 2.8295764923095703, 55.743743896484375, 11.145782470703125, 118.36227416992188, 9.899375915527344, 2.3956851959228516, 30.00815773010254, -3.2965545654296875, 58.63630676269531, 6.089599609375, 37.37433624267578, 126.48634338378906, 129.95225524902344, 148.74209594726562, 120.00433349609375, 10.227289199829102, -20.954116821289062, 13.906982421875, 98.4803237915039, -44.32691955566406, 41.9145622253418, 133.6464080810547, 1.1010856628417969, -59.38084411621094, 76.52558898925781, 54.452816009521484, -9.040390014648438, 82.638671875, 122.33330535888672, 90.32882690429688, 127.74211120605469, 143.0673065185547, 76.2283706665039, 163.39471435546875, 49.8924674987793, 113.90711975097656, -77.16651916503906, 5.7683563232421875, 141.18983459472656, 187.4263916015625, 35.6810302734375, 3.1447906494140625, 95.7995376586914, 11.306289672851562, 14.992124557495117, 50.050201416015625, 134.15960693359375, 131.166259765625, 142.3072509765625, 46.87200927734375, 28.768352508544922, 17.39894676208496, 0.4492759704589844, 34.59037780761719, 81.19635772705078, 23.266437530517578, -50.81504821777344, 153.06224060058594, 69.08270263671875, 0.5265541076660156, 74.63134765625, 13.179840087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000286.npy"}
|
|
{"epoch": 0.4323507180650038, "step": 287, "batch_size": 64, "mean": 52.63068389892578, "std": 61.449440002441406, "min": -116.1794662475586, "p10": -4.78665142059326, "median": 27.892574310302734, "p90": 137.53872375488282, "max": 158.15615844726562, "pos_frac": 0.8125, "sample": [25.440261840820312, -5.9973907470703125, 6.6765289306640625, 81.13442993164062, 158.15615844726562, 132.51370239257812, 15.599456787109375, 15.396184921264648, 52.587913513183594, -5.6312408447265625, 66.18972778320312, 12.44533920288086, 50.862701416015625, 42.22877502441406, 144.0522003173828, 144.33963012695312, 150.11444091796875, 66.20852661132812, 128.41653442382812, 94.47055053710938, 38.543617248535156, 28.269073486328125, 124.10795593261719, 0.8053035736083984, 10.685047149658203, 105.6698226928711, -1.8453750610351562, -40.27033233642578, 117.20769500732422, 2.069110870361328, 110.24315643310547, 11.303817749023438, -116.1794662475586, 5.927940368652344, -2.8159427642822266, 121.69572448730469, 19.17626953125, -1.3445854187011719, 19.870765686035156, 121.15727233886719, 109.2136001586914, -27.82386016845703, 137.7057342529297, 94.73430633544922, -12.985282897949219, 6.53692626953125, 137.14903259277344, 3.469982147216797, 117.10457611083984, 122.96288299560547, -0.0702056884765625, 10.199798583984375, 100.99349212646484, 3.4563045501708984, 2.395038604736328, 27.516075134277344, 140.97911071777344, 9.152198791503906, 71.119384765625, -9.938102722167969, -1.0249252319335938, 145.89175415039062, 128.68942260742188, 1.4550857543945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000287.npy"}
|
|
{"epoch": 0.43386243386243384, "step": 288, "batch_size": 64, "mean": 39.66306686401367, "std": 62.9980354309082, "min": -101.63677978515625, "p10": -34.97567596435547, "median": 32.52634811401367, "p90": 126.11738739013673, "max": 159.24652099609375, "pos_frac": 0.6875, "sample": [38.27478790283203, 3.3530502319335938, -36.96099853515625, -101.63677978515625, 46.156410217285156, 20.08669662475586, 18.42805290222168, -3.2460784912109375, 92.517578125, 54.57051086425781, 19.867244720458984, 83.14453125, 14.692291259765625, 128.06607055664062, -13.42144775390625, 93.93682861328125, -34.56854248046875, 64.28970336914062, -19.876556396484375, 77.85441589355469, -92.59815216064453, 126.33525085449219, 159.24652099609375, 125.60903930664062, -18.46490478515625, 139.52862548828125, 35.704444885253906, 46.66523742675781, 7.971061706542969, 32.304832458496094, 138.41717529296875, -78.97422790527344, 61.610198974609375, 74.66261291503906, 121.28398132324219, 68.24200439453125, 56.29588317871094, -35.15016174316406, 32.74786376953125, -27.687217712402344, -54.80681610107422, -58.6717529296875, 136.1661376953125, -2.9615402221679688, -0.8302688598632812, -13.955581665039062, 108.6264877319336, -15.09014892578125, 123.64016723632812, 124.5780029296875, 112.50882720947266, -1.272064208984375, 98.0240478515625, 13.740371704101562, -3.5780715942382812, -0.6517066955566406, 84.49504089355469, 22.426856994628906, 0.40702247619628906, 136.64395141601562, 17.76426887512207, 61.03802490234375, 29.849166870117188, 101.06819152832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000288.npy"}
|
|
{"epoch": 0.43537414965986393, "step": 289, "batch_size": 64, "mean": 34.633052825927734, "std": 68.26002502441406, "min": -129.22113037109375, "p10": -40.91339492797851, "median": 14.024116516113281, "p90": 129.92740783691406, "max": 174.94180297851562, "pos_frac": 0.6875, "sample": [46.85755920410156, 15.362548828125, 117.28782653808594, -15.396963119506836, -49.95088195800781, -21.07611083984375, -17.578807830810547, 35.25548553466797, 93.3323745727539, 39.354576110839844, -5.481597900390625, 47.82494354248047, 114.87567901611328, 123.89408874511719, 37.30430603027344, 4.949010848999023, 142.53985595703125, -42.613861083984375, -18.686542510986328, 122.21188354492188, 14.146644592285156, 136.47781372070312, 2.4445648193359375, -21.515396118164062, 119.89482879638672, 3.1044654846191406, 130.23487854003906, 5.168148040771484, -4.432079315185547, 135.44427490234375, -2.1408843994140625, 12.605951309204102, -25.242130279541016, -129.22113037109375, 151.9788818359375, -6.601726531982422, 11.515024185180664, 174.94180297851562, 8.865005493164062, 13.901588439941406, 1.1019573211669922, 17.75267791748047, -44.17657470703125, 21.668256759643555, 57.413787841796875, -36.945640563964844, 50.2829704284668, -4.371318817138672, -65.27278137207031, -108.33181762695312, 101.526123046875, 18.109222412109375, 5.918548583984375, 23.112319946289062, 135.56069946289062, 125.44349670410156, 111.10968780517578, 129.20997619628906, 129.14633178710938, 5.338592529296875, 2.9651832580566406, -64.26395416259766, 110.45845031738281, -8.076793670654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000289.npy"}
|
|
{"epoch": 0.436885865457294, "step": 290, "batch_size": 64, "mean": 44.82925033569336, "std": 67.79192352294922, "min": -120.85993957519531, "p10": -15.824046897888179, "median": 31.059839248657227, "p90": 136.0748504638672, "max": 154.54559326171875, "pos_frac": 0.765625, "sample": [81.16232299804688, 33.748512268066406, 115.93029022216797, 154.54559326171875, 51.36640930175781, 11.448064804077148, -17.70041847229004, -9.651025772094727, 6.38897705078125, 150.37176513671875, 57.95634460449219, 14.834041595458984, 23.797645568847656, -96.7868881225586, 35.33744812011719, -2.2154006958007812, 4.943763732910156, -18.26488494873047, -5.874103546142578, -10.027580261230469, 50.00543212890625, 14.542808532714844, 136.47833251953125, 140.77713012695312, 14.758201599121094, 13.967643737792969, 26.860706329345703, -4.56890869140625, 7.420736312866211, -90.70933532714844, 134.53509521484375, 143.44497680664062, 123.35460662841797, 39.79891586303711, 29.175888061523438, 141.45986938476562, 135.13339233398438, 84.07942199707031, 50.337196350097656, -67.73126220703125, 116.93913269042969, 126.86859130859375, 16.580196380615234, -109.72502136230469, -1.8627853393554688, -11.445846557617188, -3.3704776763916016, 145.17489624023438, 132.73513793945312, 129.37477111816406, 29.994495391845703, 81.93643188476562, 2.942962646484375, 32.12518310546875, -120.85993957519531, 116.51651000976562, 19.422035217285156, 134.20095825195312, 106.7999267578125, 59.19384002685547, 49.9558219909668, 28.605804443359375, 78.04158020019531, 4.496124267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000290.npy"}
|
|
{"epoch": 0.4383975812547241, "step": 291, "batch_size": 64, "mean": 50.27552795410156, "std": 65.62101745605469, "min": -125.73468017578125, "p10": -19.69001083374023, "median": 25.974166870117188, "p90": 137.2735137939453, "max": 217.62237548828125, "pos_frac": 0.765625, "sample": [171.41741943359375, 133.71743774414062, 30.930084228515625, -16.9068603515625, 30.889755249023438, 20.849441528320312, 5.766395568847656, -0.2585906982421875, 131.61322021484375, 9.656265258789062, 88.36599731445312, 98.85916900634766, -16.219959259033203, -1.024484634399414, 124.2276611328125, 124.65821075439453, -24.57061004638672, -20.882789611816406, 20.1923828125, 122.87274932861328, -4.277740478515625, 6.536445617675781, 41.516326904296875, 67.60970306396484, 81.88430786132812, 121.70486450195312, 133.3323974609375, -1.2218666076660156, 140.21385192871094, 4.6809234619140625, 74.47309875488281, 145.00106811523438, -0.7216758728027344, 5.0825042724609375, -23.281585693359375, 5.888132095336914, 116.10951232910156, 13.2071533203125, 141.79171752929688, 29.56755828857422, -2.7563018798828125, 138.76824951171875, -125.73468017578125, 11.858261108398438, 139.83465576171875, 53.60308837890625, 49.62701416015625, 127.73944091796875, 97.13074493408203, 217.62237548828125, -23.949527740478516, 17.939315795898438, -36.71860885620117, 22.380775451660156, -44.43689727783203, 5.349071502685547, 20.166595458984375, 133.78579711914062, 12.493743896484375, 88.03887939453125, 84.90155029296875, 76.99283599853516, 11.01446533203125, 8.733146667480469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000291.npy"}
|
|
{"epoch": 0.4399092970521542, "step": 292, "batch_size": 64, "mean": 38.706398010253906, "std": 69.41310119628906, "min": -124.6072998046875, "p10": -39.983372497558584, "median": 33.775352478027344, "p90": 132.1277847290039, "max": 199.01071166992188, "pos_frac": 0.6875, "sample": [-114.93714904785156, -11.826484680175781, 66.5761489868164, 0.7329730987548828, 139.7947235107422, 72.43805694580078, 35.777713775634766, 199.01071166992188, 106.15359497070312, -30.493820190429688, -44.050323486328125, 5.022344589233398, 12.674184799194336, 2.4796981811523438, -63.53871154785156, -0.7705535888671875, 64.82907104492188, 24.763946533203125, 66.707275390625, 24.390655517578125, 40.030609130859375, 111.14393615722656, 99.64596557617188, -27.357646942138672, -7.485321044921875, -3.2625045776367188, -8.06527328491211, 129.45010375976562, -52.85462951660156, 130.31182861328125, 37.873756408691406, 109.66372680664062, 7.860506057739258, -10.065826416015625, -0.4301128387451172, -113.76824188232422, 88.0162353515625, -124.6072998046875, 0.03173828125, 2.5179214477539062, -13.97271728515625, 125.99213409423828, 109.9615478515625, -14.667016983032227, 141.97500610351562, 93.13862609863281, 9.9151611328125, 65.83165740966797, 33.73269271850586, 8.986038208007812, 155.82778930664062, 133.042724609375, -0.27605247497558594, 120.38603210449219, 59.14418411254883, 38.829872131347656, 92.3662109375, 33.81801223754883, 132.9060516357422, 68.6871566772461, 36.3109130859375, -11.644378662109375, -51.92362976074219, 144.45791625976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000292.npy"}
|
|
{"epoch": 0.4414210128495843, "step": 293, "batch_size": 64, "mean": 55.180824279785156, "std": 75.52891540527344, "min": -138.0938262939453, "p10": -24.991554641723628, "median": 35.914085388183594, "p90": 139.6563491821289, "max": 193.02178955078125, "pos_frac": 0.78125, "sample": [4.5093536376953125, 28.480514526367188, 12.308284759521484, 135.7034912109375, 111.3876724243164, 26.645183563232422, -29.895946502685547, 13.168411254882812, 111.14893341064453, 4.416839599609375, 139.69052124023438, -7.78339958190918, 140.88136291503906, 102.6028823852539, 113.33058166503906, 4.498893737792969, 127.77850341796875, 91.07768249511719, 118.0816650390625, 124.8819351196289, 128.7576141357422, -27.366413116455078, -13.275947570800781, -89.57963562011719, 112.75154876708984, 2.8192291259765625, 108.52357482910156, -55.119815826416016, 13.922386169433594, 170.2889404296875, 0.8964347839355469, 153.58236694335938, 167.7021484375, -107.04017639160156, 118.3106689453125, 6.227283477783203, 133.8231658935547, -138.0938262939453, -7.947559356689453, 0.0256195068359375, 25.211257934570312, 138.8596649169922, -75.53305053710938, -10.967178344726562, -9.279937744140625, 4.379005432128906, 44.75225830078125, 116.10633850097656, 193.02178955078125, 32.08735656738281, 117.91567993164062, 20.617340087890625, 64.05362701416016, 10.938899993896484, 132.2771759033203, -19.450218200683594, 139.5766143798828, 28.9783935546875, 89.68218231201172, 138.2845916748047, 39.740814208984375, -6.036319732666016, 116.68502807617188, 147.55044555664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000293.npy"}
|
|
{"epoch": 0.4429327286470144, "step": 294, "batch_size": 64, "mean": 36.940494537353516, "std": 58.492923736572266, "min": -112.0958480834961, "p10": -15.741166305541988, "median": 10.733673095703125, "p90": 133.55364532470702, "max": 179.64761352539062, "pos_frac": 0.75, "sample": [133.45953369140625, 35.59120178222656, 42.9163818359375, 29.80169677734375, 123.84646606445312, 10.722831726074219, 139.76333618164062, -28.33270263671875, -31.197853088378906, 4.219093322753906, 94.564453125, -112.0958480834961, -5.494871139526367, 25.6556396484375, 20.86846351623535, 70.8702621459961, 133.59397888183594, 10.744514465332031, 133.78907775878906, 17.455772399902344, -7.1924591064453125, 17.93415069580078, 1.6951236724853516, 65.5371322631836, -8.771163940429688, 142.390869140625, -5.293315887451172, 80.72830200195312, -3.0209426879882812, 179.64761352539062, 109.504150390625, -23.619808197021484, 2.122964859008789, 172.51382446289062, 4.84712028503418, 2.75689697265625, 6.628387451171875, -0.9946174621582031, -32.917205810546875, 32.76124572753906, 10.086402893066406, 6.152587890625, 132.53074645996094, -1.595001220703125, 4.720205307006836, 75.08284759521484, 4.401269912719727, 45.7725715637207, 7.3157958984375, 141.46304321289062, -17.378345489501953, 47.3062629699707, 108.55184936523438, 5.69085693359375, 4.16798210144043, 63.213218688964844, 0.1557159423828125, -0.32306480407714844, 22.470443725585938, 6.402923583984375, 90.64502716064453, -17.435222625732422, 48.71491622924805, -11.92108154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000294.npy"}
|
|
{"epoch": 0.4444444444444444, "step": 295, "batch_size": 64, "mean": 34.846317291259766, "std": 65.95061492919922, "min": -116.03076171875, "p10": -30.07492141723632, "median": 12.039669036865234, "p90": 135.44107055664062, "max": 174.03799438476562, "pos_frac": 0.703125, "sample": [141.94329833984375, 15.051803588867188, -5.268524169921875, -10.990798950195312, 64.00313568115234, 144.78329467773438, 128.52114868164062, 40.531864166259766, 16.03626251220703, 38.34912109375, 146.990234375, -11.634395599365234, 174.03799438476562, 12.840446472167969, 4.359455108642578, 10.687385559082031, 3.01953125, 8.712608337402344, -25.055618286132812, 103.64002990722656, 136.46279907226562, -100.58512878417969, -3.0593299865722656, 8.973905563354492, 75.17031860351562, 28.851837158203125, 5.840471267700195, -12.908626556396484, 133.05703735351562, -0.8402633666992188, -61.34101867675781, -4.216346740722656, 143.77203369140625, 123.2862548828125, 0.45220947265625, -81.63465881347656, 115.44746398925781, 22.694599151611328, 42.231258392333984, 139.8418426513672, 73.92033386230469, -116.03076171875, 10.05389404296875, -61.589263916015625, 100.36959838867188, 56.74903106689453, -60.05848693847656, -16.216026306152344, -32.226051330566406, 90.32686614990234, -7.783857345581055, 2.9217662811279297, 32.866661071777344, 11.2388916015625, 7.769338607788086, 8.335067749023438, 97.23274230957031, -14.985610961914062, 33.74811553955078, 5.1221160888671875, 92.36441802978516, 97.45858764648438, 112.32042694091797, -5.79852294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000295.npy"}
|
|
{"epoch": 0.4459561602418745, "step": 296, "batch_size": 64, "mean": 42.631317138671875, "std": 70.9499740600586, "min": -119.09567260742188, "p10": -55.28158836364745, "median": 36.14270782470703, "p90": 137.19476623535155, "max": 170.79660034179688, "pos_frac": 0.796875, "sample": [61.568695068359375, 123.56095886230469, 119.93663024902344, 6.405546188354492, 0.37296295166015625, 35.77679443359375, 3.1755924224853516, 166.40826416015625, -0.3888206481933594, 110.35894775390625, 37.102840423583984, 6.051937103271484, -61.66424560546875, 140.27085876464844, 1.774688720703125, 62.70005798339844, 136.76072692871094, 84.8673095703125, 11.904556274414062, 57.37715148925781, 0.640533447265625, 20.70895767211914, -3.203948974609375, -81.65496826171875, -45.935791015625, -23.911178588867188, 2.401458740234375, 49.21137237548828, 19.080543518066406, 29.704788208007812, 12.136039733886719, -76.65217590332031, 52.984375, 3.554046630859375, 170.79660034179688, 36.50862121582031, 5.377372741699219, -59.286930084228516, 117.6490249633789, -87.93742370605469, 54.48070526123047, 95.22793579101562, 143.48162841796875, -119.09567260742188, 126.52970886230469, 41.07611083984375, 110.53724670410156, 108.66149139404297, 108.47254943847656, 78.609619140625, 132.31446838378906, 11.045791625976562, 137.3807830810547, 13.970708847045898, 168.5279541015625, -15.212066650390625, 6.303611755371094, 146.30874633789062, 2.149810791015625, -41.24928283691406, -92.61526489257812, 103.79098510742188, 95.4722900390625, 65.7416000366211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000296.npy"}
|
|
{"epoch": 0.4474678760393046, "step": 297, "batch_size": 64, "mean": 47.264305114746094, "std": 60.00839614868164, "min": -105.57952880859375, "p10": -12.15131607055664, "median": 34.68609809875488, "p90": 132.48756103515626, "max": 150.6842803955078, "pos_frac": 0.828125, "sample": [124.92030334472656, -47.601951599121094, 10.719886779785156, 100.41838836669922, 41.824920654296875, 21.175888061523438, 10.978523254394531, 131.35531616210938, -5.4746551513671875, -105.57952880859375, 115.22100830078125, 8.45538330078125, 23.344425201416016, 54.49258041381836, 3.198484420776367, 138.82720947265625, 18.758636474609375, 71.42613220214844, 9.50360107421875, 9.5867919921875, 49.40928649902344, 13.653121948242188, -62.53614807128906, -67.5276107788086, 0.23941802978515625, 132.97280883789062, 35.15491485595703, 8.838699340820312, 111.10712432861328, 7.785575866699219, 130.64971923828125, 141.2443084716797, -7.6889801025390625, -14.049562454223633, 20.646198272705078, 20.776052474975586, 8.955490112304688, -12.606903076171875, 110.7933349609375, 80.49459075927734, 49.92694091796875, 27.530723571777344, 107.05741882324219, 45.947784423828125, 35.73368835449219, 106.52391815185547, 59.44408416748047, 0.6126337051391602, -15.518905639648438, 133.66348266601562, 12.603397369384766, 116.68440246582031, 126.16688537597656, 110.30252075195312, 3.4570350646972656, 70.19903564453125, 146.48983764648438, 60.58650207519531, -7.5070648193359375, 150.6842803955078, -11.088279724121094, 34.217281341552734, 144.14944458007812, 73.18576049804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000297.npy"}
|
|
{"epoch": 0.4489795918367347, "step": 298, "batch_size": 64, "mean": 35.74412536621094, "std": 70.75401306152344, "min": -132.3207244873047, "p10": -55.62729721069336, "median": 15.262542724609375, "p90": 132.79737396240236, "max": 157.43380737304688, "pos_frac": 0.765625, "sample": [69.23043060302734, -92.50331115722656, 154.5932159423828, 16.98040771484375, 142.34588623046875, 9.962814331054688, 109.92266845703125, 7.08135986328125, -58.685211181640625, 109.07344055175781, 143.05162048339844, 125.82389068603516, -57.23011779785156, 14.912628173828125, 5.023948669433594, 62.33070755004883, 105.14501953125, -51.88738250732422, 113.52334594726562, 93.87403869628906, 4.246006011962891, 1.0694847106933594, 125.9725570678711, 46.781097412109375, 15.612457275390625, 11.508651733398438, -97.96028900146484, 10.674886703491211, 81.8735580444336, 131.8133087158203, 2.573495864868164, 142.98312377929688, -10.418472290039062, 8.873466491699219, 29.920719146728516, 103.74017333984375, 10.63184928894043, 133.2191162109375, 4.514001846313477, 0.8138504028320312, -3.1572189331054688, -0.7808704376220703, 2.626861572265625, 17.706741333007812, 18.820396423339844, 157.43380737304688, 129.47283935546875, -7.947364807128906, -19.665149688720703, 7.875, 55.99473190307617, 17.151596069335938, -48.41191101074219, -68.1953125, 0.5797882080078125, 124.0371322631836, -89.66012573242188, 91.21778869628906, -17.690704345703125, -132.3207244873047, 133.58033752441406, 17.13882827758789, 1.0363540649414062, 119.7686538696289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000298.npy"}
|
|
{"epoch": 0.4504913076341648, "step": 299, "batch_size": 64, "mean": 31.84062385559082, "std": 74.0186767578125, "min": -127.55663299560547, "p10": -68.17895889282227, "median": 16.35663604736328, "p90": 125.11120147705078, "max": 176.8916015625, "pos_frac": 0.671875, "sample": [123.2457275390625, 100.53633117675781, -13.370939254760742, -16.75598907470703, 13.915658950805664, 113.32780456542969, 29.971725463867188, 74.66375732421875, 176.8916015625, -0.2712116241455078, -61.91291046142578, 88.45115661621094, -127.55663299560547, 4.025062561035156, -88.02046203613281, -29.7174072265625, 20.157928466796875, 129.205810546875, 58.24162292480469, -66.74007415771484, 134.64393615722656, -32.15513229370117, 39.07533645629883, 79.97052001953125, -75.10528564453125, -109.21212005615234, 6.672416687011719, 11.239490509033203, 147.5533905029297, -2.2996139526367188, 117.62457275390625, -0.11902618408203125, 59.15801239013672, -110.50239562988281, 14.300323486328125, 5.035400390625, 42.11614990234375, 121.95480346679688, 11.006050109863281, 6.567054748535156, 52.1165771484375, -39.567100524902344, 123.1091079711914, 118.15589141845703, 154.56153869628906, -63.37580871582031, 13.887075424194336, 105.20853424072266, 95.34878540039062, 89.64962768554688, -0.030088424682617188, 25.0875244140625, 94.30062103271484, -103.80685424804688, 7.919891357421875, 107.70661926269531, 7.455718994140625, 125.91069030761719, -1.2614002227783203, 18.412948608398438, -1.3694496154785156, 39.823707580566406, -68.79562377929688, 141.53895568847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000299.npy"}
|
|
{"epoch": 0.4520030234315949, "step": 300, "batch_size": 64, "mean": 39.158287048339844, "std": 70.29944610595703, "min": -108.84934997558594, "p10": -49.174880981445305, "median": 31.4169340133667, "p90": 139.6410842895508, "max": 164.82810974121094, "pos_frac": 0.703125, "sample": [31.318700790405273, 117.55523681640625, 11.667922973632812, 62.02021789550781, -2.617664337158203, 16.525527954101562, 112.09244537353516, 1.708648681640625, -0.04951286315917969, -99.6884994506836, 146.44692993164062, 18.618675231933594, 108.28582000732422, 156.82835388183594, 143.3840789794922, 67.93440246582031, 142.34951782226562, -51.93016052246094, -14.091102600097656, 99.0713882446289, 1.5127124786376953, 109.41675567626953, 31.515167236328125, -29.91582489013672, -96.59089660644531, 78.38375854492188, 121.28730773925781, -16.763893127441406, 40.090782165527344, 24.53866195678711, 3.313243865966797, -108.84934997558594, 108.85627746582031, -1.4086112976074219, -90.6820068359375, 65.20736694335938, 31.52242088317871, 150.54273986816406, 8.5277099609375, 136.84237670898438, 82.64352416992188, 39.722721099853516, 7.1641845703125, 164.82810974121094, 102.48197937011719, -42.74589538574219, -0.7002220153808594, 117.93984985351562, -7.811431884765625, 69.91572570800781, 74.83920288085938, 10.368461608886719, 33.814605712890625, 48.720428466796875, -8.423149108886719, -70.47706604003906, 17.129623413085938, 132.66616821289062, 66.48393249511719, -84.4600830078125, 8.916038513183594, -1.3516311645507812, 140.8405303955078, -31.152664184570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000300.npy"}
|
|
{"epoch": 0.45351473922902497, "step": 301, "batch_size": 64, "mean": 34.58222198486328, "std": 67.7806167602539, "min": -136.20767211914062, "p10": -36.749315643310545, "median": 28.6845064163208, "p90": 131.65882415771483, "max": 192.64767456054688, "pos_frac": 0.734375, "sample": [115.87348175048828, 14.411231994628906, 54.67046356201172, 39.006046295166016, 25.0723876953125, -33.232147216796875, 97.7933578491211, 142.8904571533203, 103.234130859375, 3.6474151611328125, 32.79967498779297, 29.092126846313477, 20.717361450195312, -8.959178924560547, -119.80829620361328, 24.858673095703125, -4.855499267578125, 97.928466796875, 98.2686996459961, 5.734653472900391, -62.007301330566406, 62.43514633178711, 131.7407684326172, -5.564704895019531, 59.822532653808594, 3.1731109619140625, -5.025373458862305, 131.10678100585938, 68.46974182128906, -36.723472595214844, -30.40323257446289, 32.516761779785156, 62.83195114135742, 138.60012817382812, -71.52206420898438, 0.81231689453125, -36.76039123535156, 192.64767456054688, -50.6842041015625, 18.074657440185547, -25.842321395874023, 49.002403259277344, 30.9017333984375, -98.10272216796875, 4.877582550048828, 97.4989013671875, 108.560302734375, 69.98115539550781, 28.276885986328125, 131.46762084960938, 17.713212966918945, 66.950439453125, 81.35855102539062, -34.17007827758789, -32.61940002441406, 4.779865264892578, 132.43682861328125, 132.61700439453125, 31.50347900390625, 12.063667297363281, 46.29182434082031, -136.20767211914062, 145.39453125, 5.844123840332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000301.npy"}
|
|
{"epoch": 0.455026455026455, "step": 302, "batch_size": 64, "mean": 28.039810180664062, "std": 65.32776641845703, "min": -141.3878936767578, "p10": -45.01562881469726, "median": 18.089385986328125, "p90": 118.04474945068363, "max": 177.47088623046875, "pos_frac": 0.703125, "sample": [80.21537780761719, 156.10829162597656, -0.5420379638671875, 177.47088623046875, 57.08935546875, 12.011957168579102, 111.01504516601562, -18.86493682861328, 146.88534545898438, 17.96331787109375, 36.045379638671875, 45.842201232910156, 3.949800491333008, 15.66952896118164, 89.68045043945312, 125.06461334228516, 2.9048004150390625, 14.298805236816406, 1.9505538940429688, 20.417831420898438, 26.749797821044922, -99.91815948486328, -103.55265808105469, 38.02702331542969, 15.941268920898438, -12.615381240844727, 61.5703125, 57.87537384033203, 12.204784393310547, -7.3172454833984375, -21.382774353027344, 69.91419982910156, 58.12321472167969, 5.995965957641602, -49.23253631591797, 124.42391967773438, 144.55831909179688, -35.176177978515625, 7.2691802978515625, 42.270320892333984, 75.99503326416016, 92.96820831298828, 1.2024116516113281, 50.4898681640625, 59.614990234375, 59.83988952636719, -127.04393005371094, -1.7403793334960938, -29.8372802734375, 64.0152816772461, 97.80130004882812, 67.39222717285156, -7.198356628417969, -2.0601654052734375, 26.157283782958984, -24.550182342529297, -53.70831298828125, 121.05747985839844, -16.891357421875, -73.41523742675781, -141.3878936767578, 18.2154541015625, 98.14424133300781, 8.581911087036133], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000302.npy"}
|
|
{"epoch": 0.4565381708238851, "step": 303, "batch_size": 64, "mean": 49.310699462890625, "std": 74.50186920166016, "min": -114.60566711425781, "p10": -63.49328842163085, "median": 63.64324188232422, "p90": 136.83401031494142, "max": 188.13116455078125, "pos_frac": 0.6875, "sample": [-12.981979370117188, 69.24832153320312, 103.63079833984375, 17.046354293823242, 140.22659301757812, 119.03877258300781, 112.15769958496094, -69.5398178100586, 125.0977554321289, 67.42843627929688, 134.0575408935547, -46.58942413330078, 46.242000579833984, -5.168718338012695, -86.01155090332031, -88.32192993164062, -67.89521789550781, 128.95120239257812, -114.60566711425781, -30.80743408203125, -0.238006591796875, 20.781003952026367, 188.13116455078125, 81.64088439941406, -53.242645263671875, 113.44740295410156, 100.67089080810547, -67.88642120361328, -3.9680137634277344, 24.927337646484375, 71.42974090576172, -5.2698516845703125, 9.226219177246094, 54.73387145996094, 3.6733245849609375, -10.704586029052734, 138.02392578125, 9.004615783691406, 108.4583740234375, 106.46469116210938, 25.89947509765625, 123.60777282714844, 68.181884765625, 145.27789306640625, -4.233421325683594, 125.33979797363281, 146.78517150878906, 39.26640319824219, 59.85804748535156, 132.46640014648438, -21.754989624023438, -21.404008865356445, -72.08193969726562, 71.4493408203125, 142.7943878173828, 110.36555480957031, 19.586809158325195, 89.07260131835938, 73.6192855834961, 127.31675720214844, 133.29457092285156, -36.511932373046875, 97.20867919921875, 149.97264099121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000303.npy"}
|
|
{"epoch": 0.4580498866213152, "step": 304, "batch_size": 64, "mean": 50.627525329589844, "std": 57.52477264404297, "min": -127.8292236328125, "p10": -6.737057876586912, "median": 43.05390167236328, "p90": 133.7212371826172, "max": 167.11209106445312, "pos_frac": 0.828125, "sample": [134.61032104492188, 0.496856689453125, 84.08544921875, 142.6609649658203, -3.2028350830078125, 71.975341796875, 39.6512451171875, 72.07508087158203, -25.842939376831055, 33.739295959472656, 5.997955322265625, 109.42276000976562, 111.11077880859375, 46.45655822753906, 105.17153930664062, 72.85887145996094, -4.054462432861328, 31.88839340209961, 95.22528076171875, -127.8292236328125, 14.929031372070312, 134.605224609375, 33.624237060546875, 38.10514831542969, -7.886741638183594, 12.782669067382812, 10.692995071411133, 88.01632690429688, 35.468719482421875, 131.65859985351562, 59.04131317138672, 5.194517135620117, 167.11209106445312, 50.91791534423828, 21.829666137695312, -63.42646789550781, 89.06464385986328, 93.15943145751953, 28.60965347290039, 65.20904541015625, 0.6770153045654297, -0.4734058380126953, 157.78713989257812, 52.94737243652344, 1.2462272644042969, 11.08807373046875, 54.13386535644531, 70.0790786743164, 122.8831558227539, 141.6578369140625, -27.9791259765625, -3.2272109985351562, 107.28375244140625, 1.3366966247558594, 106.9474868774414, -13.40060806274414, 28.73601531982422, 137.11184692382812, 89.04289245605469, 24.497344970703125, 32.80686950683594, -22.371349334716797, 51.54278564453125, 110.60249328613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000304.npy"}
|
|
{"epoch": 0.4595616024187453, "step": 305, "batch_size": 64, "mean": 35.80479431152344, "std": 72.27374267578125, "min": -148.09791564941406, "p10": -67.03749694824218, "median": 30.09612274169922, "p90": 128.92710876464844, "max": 155.10467529296875, "pos_frac": 0.734375, "sample": [127.10810852050781, 15.269706726074219, 17.630111694335938, -73.61246490478516, -12.547073364257812, -0.7415733337402344, 155.10467529296875, -73.25342559814453, 40.48771667480469, 149.0767059326172, 36.18169021606445, 150.85760498046875, -47.550689697265625, 115.12252807617188, -113.66899108886719, 3.872629165649414, 56.841758728027344, 4.235431671142578, -98.82278442382812, 64.265625, 27.72225570678711, 17.164878845214844, 15.138542175292969, 57.49507141113281, 26.839881896972656, 84.35792541503906, 77.33009338378906, 3.2333831787109375, 58.484771728515625, -148.09791564941406, -4.2471923828125, 55.33555603027344, 122.73713684082031, 115.52059173583984, -19.5648193359375, 24.881010055541992, 155.0712890625, -34.666473388671875, 4.123239517211914, 65.3668212890625, 15.701576232910156, 13.365219116210938, 71.1197509765625, 145.3343963623047, 115.51689910888672, -7.411708831787109, 22.881187438964844, -63.528717041015625, 137.35137939453125, 69.95079040527344, 110.9105453491211, 36.068504333496094, 32.46998977661133, 103.09902954101562, 63.642677307128906, 123.0557861328125, -57.890228271484375, 67.55424499511719, -98.78587341308594, 129.70668029785156, -22.2800350189209, 20.823442459106445, 111.30903625488281, -68.541259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000305.npy"}
|
|
{"epoch": 0.46107331821617537, "step": 306, "batch_size": 64, "mean": 38.970123291015625, "std": 56.64619064331055, "min": -120.60955810546875, "p10": -12.960375595092767, "median": 26.2243709564209, "p90": 117.26269760131837, "max": 174.29574584960938, "pos_frac": 0.78125, "sample": [133.22219848632812, 84.4017562866211, 16.373212814331055, 140.24899291992188, 7.923004150390625, 110.29976654052734, 19.31591796875, 22.05466079711914, 33.971221923828125, 75.34664154052734, 126.8793716430664, 105.70890808105469, -57.46820831298828, 28.035511016845703, -15.30208969116211, -120.60955810546875, 116.83049011230469, 45.3131103515625, 54.528018951416016, 2.396167755126953, -4.645957946777344, 99.57575225830078, 117.44792938232422, 12.403844833374023, 7.760114669799805, 16.58208465576172, -24.82550621032715, 93.66059875488281, 7.240753173828125, 43.12237548828125, -36.163909912109375, 108.1446533203125, -7.496376037597656, 8.424654006958008, 75.5762710571289, 45.783287048339844, 134.59188842773438, 170.35911560058594, 13.9432373046875, 5.1986083984375, 49.57048797607422, 8.136629104614258, 21.770084381103516, 47.92023468017578, -59.21240997314453, 12.361671447753906, -2.8847713470458984, 54.576904296875, 8.961181640625, 36.21722412109375, 78.2275619506836, 51.250640869140625, -0.3058624267578125, -1.9444580078125, 41.326629638671875, 6.271533966064453, 24.413230895996094, -5.592529296875, 44.07958221435547, 174.29574584960938, -41.075599670410156, 32.23724365234375, 98.39442443847656, -1.0598526000976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000306.npy"}
|
|
{"epoch": 0.46258503401360546, "step": 307, "batch_size": 64, "mean": 17.738990783691406, "std": 72.2691421508789, "min": -135.2657928466797, "p10": -61.59884529113769, "median": 6.316167831420898, "p90": 122.37134246826179, "max": 147.55323791503906, "pos_frac": 0.578125, "sample": [104.25952911376953, -46.72594451904297, 10.204757690429688, -37.59479522705078, -135.2657928466797, 83.77786254882812, 147.55323791503906, -34.088829040527344, 146.36798095703125, 102.64048767089844, -42.9979248046875, 29.765457153320312, 12.087810516357422, 137.52099609375, -127.60198974609375, 45.51716613769531, 48.13627624511719, 2.1875457763671875, 97.69558715820312, -47.046875, 52.919525146484375, -96.40766906738281, -14.405487060546875, -73.08760070800781, 26.39837646484375, -76.0860824584961, -43.11207580566406, 48.4783935546875, 73.36244201660156, -0.9925651550292969, -29.815513610839844, 14.482145309448242, 1.1052742004394531, -60.7595329284668, -58.695960998535156, 78.14596557617188, -53.78581237792969, -6.872093200683594, 105.95634460449219, -20.617446899414062, 2.4275779724121094, -21.18946075439453, 0.4023284912109375, 142.5428924560547, 129.40634155273438, -52.154876708984375, 36.052982330322266, 131.04229736328125, 104.87833404541016, 39.735504150390625, -105.10446166992188, 37.26563262939453, 104.39388275146484, -1.6021556854248047, 67.52963256835938, 58.91368865966797, 1.6984176635742188, 139.44754028320312, -61.64881134033203, -40.055198669433594, -61.48225784301758, 15.479358673095703, 105.8525161743164, -1.1394500732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000307.npy"}
|
|
{"epoch": 0.46409674981103555, "step": 308, "batch_size": 64, "mean": 36.740577697753906, "std": 65.57447814941406, "min": -142.47515869140625, "p10": -47.41776809692382, "median": 24.473432540893555, "p90": 128.17109603881838, "max": 159.3687744140625, "pos_frac": 0.734375, "sample": [140.88333129882812, 86.58604431152344, 71.51268005371094, -45.173038482666016, 121.83602905273438, 88.39303588867188, -20.572364807128906, 32.294219970703125, 53.20745849609375, -10.466743469238281, 125.2815933227539, 122.54277038574219, 129.60665893554688, 17.875995635986328, 13.192977905273438, -60.87533950805664, 64.88301849365234, 119.66390991210938, 74.29754638671875, 23.927165985107422, 70.28097534179688, -17.624408721923828, 145.3448486328125, 53.959041595458984, 17.12921905517578, 63.77589416503906, -4.260480880737305, 47.71062469482422, 76.29383850097656, -87.31268310546875, 159.3687744140625, 4.494728088378906, 25.926162719726562, -48.37979507446289, 60.63612365722656, 2.1796951293945312, 99.28034210205078, -4.81451416015625, 1.2373199462890625, -142.47515869140625, 11.194389343261719, -1.0968551635742188, 68.61048889160156, 141.01512145996094, 3.3696060180664062, 25.019699096679688, 62.652687072753906, -36.23518371582031, 94.41474914550781, 5.527576446533203, 122.28871154785156, 21.639312744140625, -19.655960083007812, 74.40624237060547, 129.40945434570312, 19.69873809814453, 136.33984375, -65.38420104980469, -55.425437927246094, 7.696317672729492, 11.676549911499023, -87.92518615722656, 11.594863891601562, -1.0818843841552734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000308.npy"}
|
|
{"epoch": 0.4656084656084656, "step": 309, "batch_size": 64, "mean": 38.080989837646484, "std": 60.57160949707031, "min": -118.65902709960938, "p10": -23.008073425292967, "median": 33.75628662109375, "p90": 112.51822357177736, "max": 165.54844665527344, "pos_frac": 0.734375, "sample": [106.28152465820312, 153.85113525390625, 9.758895874023438, 114.08647155761719, 52.894371032714844, 4.327461242675781, 86.12787628173828, -13.7822265625, 65.85716247558594, 15.410484313964844, 43.049530029296875, -1.393280029296875, -49.55922317504883, -19.850019454956055, 19.862197875976562, -101.60678100585938, 34.19010925292969, 108.85897827148438, -45.17243194580078, -10.471345901489258, 3.9303131103515625, 75.03121948242188, 103.18114471435547, 2.353391647338867, 76.59744262695312, 94.2757568359375, -3.5082015991210938, 33.32246398925781, 16.09540557861328, 2.2126617431640625, 7.162946701049805, 15.462203979492188, -9.362838745117188, 135.28707885742188, 106.14588928222656, 37.786495208740234, 44.270851135253906, -0.6581382751464844, -5.723932266235352, 56.14038848876953, -11.825668334960938, 157.2147216796875, 85.92356872558594, 12.618518829345703, 165.54844665527344, -118.65902709960938, 105.3565902709961, 53.81852340698242, 30.130666732788086, 56.40168762207031, -21.348403930664062, 5.731498718261719, 53.55455017089844, 59.10856628417969, 138.8438720703125, 46.94364547729492, 60.27940368652344, 97.30230712890625, -51.25761413574219, -54.68505859375, 25.84686279296875, -23.7193603515625, 53.369163513183594, 147.9624786376953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000309.npy"}
|
|
{"epoch": 0.4671201814058957, "step": 310, "batch_size": 64, "mean": 22.764583587646484, "std": 61.0108528137207, "min": -87.4352035522461, "p10": -49.63096618652343, "median": 14.644515991210938, "p90": 111.5351875305176, "max": 180.32711791992188, "pos_frac": 0.625, "sample": [106.70317077636719, -4.08905029296875, 17.564373016357422, 53.04627227783203, 28.440032958984375, -61.68939971923828, -21.565292358398438, 85.59033203125, 106.72154998779297, -1.8860530853271484, -41.547393798828125, -33.57057189941406, 38.37274932861328, 136.1898651123047, 18.698822021484375, -2.034862518310547, 1.0398941040039062, 71.03530883789062, 154.68092346191406, -28.07599639892578, 6.686733245849609, -44.40403747558594, -87.4352035522461, 180.32711791992188, -19.626976013183594, -37.73668670654297, 55.70500946044922, -24.547348022460938, 24.9178466796875, 113.59817504882812, -59.29502868652344, 56.72163391113281, -8.073837280273438, 100.02401733398438, -51.87107849121094, 40.0123291015625, 0.23636627197265625, 74.55705261230469, 5.0102081298828125, 65.99565124511719, 10.212348937988281, 16.355865478515625, -69.42219543457031, -33.31788635253906, 57.663536071777344, 36.17460632324219, -5.7207489013671875, 60.36357879638672, 1.3564910888671875, -9.809417724609375, 22.41278839111328, 128.59457397460938, 122.08968353271484, -70.85813903808594, 0.5689792633056641, -37.53553009033203, 12.93316650390625, 135.36611938476562, 28.193883895874023, -22.475921630859375, -75.36360168457031, 91.23094940185547, 23.410001754760742, 20.08356475830078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000310.npy"}
|
|
{"epoch": 0.46863189720332576, "step": 311, "batch_size": 64, "mean": 40.06228256225586, "std": 64.4653091430664, "min": -154.79197692871094, "p10": -22.864789962768548, "median": 29.22226333618164, "p90": 122.09019012451175, "max": 283.0703430175781, "pos_frac": 0.78125, "sample": [109.23948669433594, 1.7808685302734375, 14.677925109863281, -3.5801467895507812, -31.139251708984375, -29.01300811767578, 41.31547927856445, 124.81629180908203, 0.0051441192626953125, 88.35667419433594, 10.301704406738281, -154.79197692871094, 4.148731231689453, 81.1749038696289, 100.26220703125, -0.8139934539794922, 104.27420043945312, 49.99370574951172, 140.2568359375, 42.870880126953125, 6.137016296386719, 56.16839599609375, 1.5018024444580078, 42.774627685546875, 115.72928619384766, 8.075115203857422, -26.377559661865234, -5.911960601806641, 15.298660278320312, 43.40020751953125, -36.721954345703125, 156.86923217773438, 172.8447723388672, 42.79712677001953, 9.645622253417969, 53.118255615234375, 6.569038391113281, 76.57524108886719, 29.836639404296875, 61.279327392578125, 283.0703430175781, 22.375106811523438, 0.5184707641601562, 128.93768310546875, -38.76121520996094, 12.43072509765625, 22.3489990234375, -4.269523620605469, 46.94895935058594, 22.98986053466797, 145.2342529296875, 72.06381225585938, 60.422760009765625, -12.524511337280273, 29.085845947265625, 89.41442108154297, 1.3624019622802734, -57.13957214355469, -14.668327331542969, 31.564804077148438, -1.2880630493164062, 29.358680725097656, 101.66563415527344, 69.09906005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000311.npy"}
|
|
{"epoch": 0.47014361300075586, "step": 312, "batch_size": 64, "mean": 47.89008712768555, "std": 55.385536193847656, "min": -89.50223541259766, "p10": -14.368461608886715, "median": 34.8928108215332, "p90": 125.78597412109379, "max": 156.1853790283203, "pos_frac": 0.859375, "sample": [30.375682830810547, 4.224605560302734, -20.48577880859375, 9.386215209960938, 27.41985321044922, 8.954841613769531, 51.0596809387207, -2.3304519653320312, 28.101104736328125, 24.067420959472656, 145.8800048828125, 91.20909118652344, 104.2175064086914, 87.19493103027344, 2.7614517211914062, 82.45313262939453, 9.059043884277344, 1.5513286590576172, 10.224138259887695, 95.35693359375, 139.4761962890625, 17.207252502441406, 14.674800872802734, 23.38420867919922, 46.752899169921875, 129.3038330078125, 4.287078857421875, -10.8846435546875, 31.595779418945312, 79.2844467163086, 143.1422576904297, -60.6251220703125, 74.61383819580078, 63.16798400878906, -89.50223541259766, 46.93357467651367, 105.07111358642578, 17.425384521484375, 111.19502258300781, 156.1853790283203, 149.19552612304688, 1.3678321838378906, 22.97937774658203, 70.45350646972656, 117.57763671875, 35.68675231933594, 34.09886932373047, 115.75981903076172, 58.263580322265625, -19.23208236694336, -15.861526489257812, 94.1667709350586, 133.09201049804688, 77.74934387207031, 26.469940185546875, 87.98724365234375, 78.73372650146484, 50.106117248535156, -73.24714660644531, 91.75106811523438, 86.9766845703125, -25.370155334472656, 22.52752685546875, 10.363395690917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000312.npy"}
|
|
{"epoch": 0.47165532879818595, "step": 313, "batch_size": 64, "mean": 24.455005645751953, "std": 60.30954360961914, "min": -102.78276062011719, "p10": -46.61180114746094, "median": 11.454793930053711, "p90": 107.61633529663087, "max": 181.94790649414062, "pos_frac": 0.6875, "sample": [4.109434127807617, 25.261863708496094, 48.53192901611328, 8.877971649169922, 16.269805908203125, 50.37700653076172, 5.041584014892578, 43.03645324707031, 79.44036865234375, -63.78038024902344, 2.2385406494140625, 49.32263946533203, 10.687240600585938, 32.18931579589844, 2.4510269165039062, -11.217689514160156, 3.3201980590820312, -43.336490631103516, -19.063003540039062, -2.114349365234375, 49.299415588378906, 93.4454574584961, 1.789590835571289, 156.2192840576172, -91.97989654541016, -46.50189208984375, -11.779769897460938, 25.16253662109375, 102.262451171875, 40.38872528076172, 147.8176727294922, 108.40277099609375, -72.14138793945312, -4.950922012329102, 125.87861633300781, 28.775314331054688, 2.3504257202148438, 11.283489227294922, -5.7201995849609375, 142.39920043945312, 41.69546890258789, 25.22922134399414, 57.39958190917969, -10.358024597167969, 181.94790649414062, 0.05535888671875, -5.4574127197265625, -18.221092224121094, 29.5545654296875, 11.6260986328125, 105.78131866455078, 40.37150573730469, 37.99742126464844, -1.8402748107910156, -0.6087074279785156, -94.64894104003906, 75.8995590209961, -102.78276062011719, 119.41007232666016, -46.658905029296875, -61.93931198120117, 26.034515380859375, 4.9697265625, 105.61901092529297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000313.npy"}
|
|
{"epoch": 0.47316704459561604, "step": 314, "batch_size": 64, "mean": 48.839603424072266, "std": 62.12349319458008, "min": -65.25445556640625, "p10": -13.607751655578607, "median": 31.517477989196777, "p90": 140.2769744873047, "max": 184.2587890625, "pos_frac": 0.75, "sample": [30.30987548828125, 8.541980743408203, 24.259063720703125, 141.03573608398438, 52.905513763427734, 85.2902603149414, -4.3460235595703125, 115.70040130615234, -62.530845642089844, 31.472715377807617, 6.826812744140625, -6.457096099853516, 169.70687866210938, 50.75428771972656, -6.744438171386719, 184.2587890625, 58.1349983215332, 31.23972511291504, 124.28388214111328, 6.045021057128906, 31.562240600585938, -4.157234191894531, 126.11662292480469, 138.50653076171875, 121.3927001953125, -55.67266845703125, 27.612083435058594, 151.26168823242188, 4.4839324951171875, -7.308319091796875, 99.19770050048828, 54.12432861328125, 23.96725845336914, -4.8558197021484375, 2.829225540161133, 48.476688385009766, 81.936767578125, -30.169540405273438, 22.434999465942383, 91.39949035644531, -16.30750846862793, -1.0089645385742188, 142.43045043945312, 72.55537414550781, 118.9105453491211, 6.15643310546875, 85.40380859375, 123.25089263916016, -6.297676086425781, 147.26467895507812, 52.66407775878906, 132.716796875, 90.61788940429688, 11.182113647460938, 90.46539306640625, 25.792266845703125, -25.053497314453125, 35.97162628173828, 15.317832946777344, -6.74346923828125, 45.78394317626953, -65.25445556640625, -58.45181655883789, 144.54168701171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000314.npy"}
|
|
{"epoch": 0.47467876039304613, "step": 315, "batch_size": 64, "mean": 37.93355178833008, "std": 58.24225616455078, "min": -111.54374694824219, "p10": -36.79367980957031, "median": 41.63639259338379, "p90": 115.74356079101564, "max": 150.920166015625, "pos_frac": 0.71875, "sample": [-16.048553466796875, 85.11874389648438, 110.19799041748047, -4.934480667114258, -0.18834877014160156, 39.762794494628906, -111.54374694824219, -1.2744808197021484, -70.19183349609375, 44.455299377441406, -79.60306549072266, 34.58331298828125, -18.02933120727539, 136.26034545898438, -2.4983596801757812, 52.850685119628906, 31.425308227539062, -26.454795837402344, 20.202709197998047, 1.1263751983642578, 47.55730438232422, 78.10679626464844, 1.671895980834961, 50.94642639160156, 26.801071166992188, 47.489871978759766, 50.25439453125, -28.849807739257812, 43.50999069213867, 37.194610595703125, 44.69703674316406, 60.2604866027832, 106.83561706542969, 16.320480346679688, 26.070106506347656, 96.27790832519531, 128.1884307861328, -40.19819641113281, -41.445892333984375, 57.02626037597656, 108.23391723632812, -6.120819091796875, 111.06265258789062, 128.05398559570312, -18.119979858398438, 74.5430908203125, 59.024620056152344, 150.920166015625, -41.81875991821289, 24.872459411621094, 64.71615600585938, 8.196495056152344, 117.74966430664062, 1.9824562072753906, 87.62922668457031, 11.152427673339844, 109.93943786621094, 61.951171875, 139.03517150878906, -54.47279357910156, 66.2131118774414, -6.452138900756836, 140.99346923828125, 54.53077697753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000315.npy"}
|
|
{"epoch": 0.47619047619047616, "step": 316, "batch_size": 64, "mean": 51.014610290527344, "std": 60.20137023925781, "min": -59.59406280517578, "p10": -18.70350570678711, "median": 47.30632972717285, "p90": 134.68924255371093, "max": 167.39833068847656, "pos_frac": 0.734375, "sample": [-8.962997436523438, 18.405624389648438, 72.36039733886719, -39.99986267089844, -25.343612670898438, -13.343879699707031, 60.535888671875, 38.97984313964844, 31.87518310546875, 126.2101058959961, 142.51144409179688, 117.37005615234375, 89.0654296875, 115.64987182617188, 30.52313232421875, -46.09754943847656, -59.59406280517578, 7.902595520019531, -10.129518508911133, 105.55825805664062, 14.597810745239258, 134.81707763671875, -1.6970672607421875, 7.713905334472656, 27.917022705078125, 45.68507766723633, 64.99105834960938, 11.280776977539062, -11.68735122680664, 23.67292022705078, 134.20428466796875, 104.87977600097656, 90.69371032714844, 167.39833068847656, -10.180803298950195, 59.3673095703125, -19.105850219726562, 122.05461120605469, 79.29329681396484, 98.47127532958984, 151.86166381835938, 61.544700622558594, 44.5805549621582, -4.461071014404297, 143.0809783935547, 11.495843887329102, 1.1094551086425781, 149.88925170898438, -9.091773986816406, 48.927581787109375, -42.73777770996094, -37.90660095214844, 11.641975402832031, -17.76470184326172, 61.79411315917969, 59.73789978027344, -2.4069480895996094, 134.39096069335938, 111.27845764160156, 112.27799224853516, 64.79558563232422, 62.92474365234375, 140.77133178710938, 109.35735321044922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000316.npy"}
|
|
{"epoch": 0.47770219198790626, "step": 317, "batch_size": 64, "mean": 51.09688186645508, "std": 60.59086227416992, "min": -122.08529663085938, "p10": -15.49648284912109, "median": 47.96299362182617, "p90": 141.83881225585938, "max": 162.643798828125, "pos_frac": 0.8125, "sample": [84.88249969482422, 89.39555358886719, 62.8154182434082, 115.50067138671875, 52.18104553222656, 162.643798828125, -23.453643798828125, 52.445091247558594, -46.09626770019531, 40.51824188232422, 4.301929473876953, 45.420162200927734, -9.696907043457031, 11.481185913085938, 51.308349609375, 31.331584930419922, 96.78886413574219, 34.11711120605469, 89.25569915771484, 151.68760681152344, -45.847137451171875, -4.441490173339844, 37.68671417236328, -66.95133972167969, 32.847747802734375, 44.88920593261719, 114.96125793457031, 80.3258056640625, 81.57721710205078, -122.08529663085938, 139.89962768554688, 16.35577392578125, 68.3025894165039, 149.53958129882812, 142.66989135742188, -52.24665832519531, 71.03675842285156, 5.998279571533203, 79.1403579711914, 146.28720092773438, 31.803165435791016, 51.08552551269531, 154.33309936523438, -5.982902526855469, 24.1937255859375, 6.157260894775391, 14.99853515625, 128.28062438964844, 17.616744995117188, -0.2879180908203125, 66.32923889160156, 26.74264144897461, 48.407691955566406, 144.68441772460938, -12.331016540527344, 61.98126220703125, -16.853111267089844, 59.937469482421875, 138.4315643310547, 111.52641296386719, 1.27532958984375, 120.67976379394531, 32.8985595703125, 47.51829528808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000317.npy"}
|
|
{"epoch": 0.47921390778533635, "step": 318, "batch_size": 64, "mean": 36.882423400878906, "std": 75.24278259277344, "min": -115.8583755493164, "p10": -57.978688812255854, "median": 38.987335205078125, "p90": 150.29450073242188, "max": 208.8206787109375, "pos_frac": 0.65625, "sample": [40.49473190307617, 122.1384048461914, -9.714797973632812, -96.377685546875, 133.95095825195312, 128.3490753173828, 41.74871826171875, -45.556121826171875, 65.04461669921875, 150.08993530273438, 23.06180191040039, -54.50074005126953, 70.89804077148438, 58.25660705566406, 135.5499725341797, -4.414180755615234, 22.21636962890625, -44.28113555908203, 44.352882385253906, 5.347440719604492, -6.635097503662109, 69.35294342041016, 150.60086059570312, -11.745574951171875, 17.36034393310547, 70.34078979492188, 150.8555145263672, -90.21035766601562, 121.20851135253906, 82.33702087402344, -22.09771728515625, 29.946151733398438, -54.05272674560547, 37.47993850708008, 5.8056640625, 208.8206787109375, 181.7231903076172, -35.03327941894531, 152.4437713623047, 10.219596862792969, -59.46923828125, 46.55976867675781, 72.07605743408203, -115.8583755493164, -3.5479736328125, 63.19001770019531, 15.518409729003906, 157.8106231689453, -1.6670646667480469, 99.08517456054688, 0.6224384307861328, 150.38217163085938, 42.77958679199219, 86.43075561523438, -85.35519409179688, -28.260238647460938, 67.70698547363281, -78.62117004394531, 51.98112487792969, -3.3561172485351562, 84.33346557617188, -71.9587173461914, -26.48922348022461, 41.20679473876953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000318.npy"}
|
|
{"epoch": 0.48072562358276644, "step": 319, "batch_size": 64, "mean": 49.97394561767578, "std": 69.18146514892578, "min": -87.4578857421875, "p10": -32.581853485107416, "median": 51.397705078125, "p90": 148.5510238647461, "max": 212.68121337890625, "pos_frac": 0.734375, "sample": [60.090126037597656, 155.04904174804688, 2.0537643432617188, 153.92779541015625, 18.077482223510742, 130.39874267578125, 1.5246658325195312, 50.85638427734375, 51.93902587890625, 58.216522216796875, 139.47891235351562, 130.22573852539062, -5.022274017333984, -0.28388214111328125, 19.833602905273438, 63.487579345703125, 13.092018127441406, 168.0342254638672, 5.4988555908203125, 81.29779815673828, 4.734882354736328, -21.582839965820312, 60.916290283203125, 70.28617858886719, 188.36167907714844, -9.439399719238281, 143.54029846191406, 72.98066711425781, 149.15036010742188, 141.33863830566406, -35.72776794433594, -57.73316955566406, 54.90293884277344, 212.68121337890625, -8.523689270019531, 72.23943328857422, -37.864898681640625, 100.05872344970703, 66.73711395263672, -62.480472564697266, 147.15257263183594, 47.80821228027344, -25.24138641357422, -1.811187744140625, 11.682426452636719, 13.936456680297852, -85.66194152832031, -9.289085388183594, -46.95641326904297, 106.24357604980469, -12.084518432617188, 50.468719482421875, 34.38551330566406, -24.958770751953125, 43.10492706298828, 109.03369140625, 66.07765197753906, 68.04768371582031, 72.00065612792969, 10.059915542602539, 97.55474853515625, -87.4578857421875, 151.2078857421875, 60.67649841308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000319.npy"}
|
|
{"epoch": 0.48223733938019653, "step": 320, "batch_size": 64, "mean": 42.0404052734375, "std": 69.72310638427734, "min": -137.82240295410156, "p10": -25.353012084960934, "median": 13.836936950683594, "p90": 131.72347717285157, "max": 248.60540771484375, "pos_frac": 0.671875, "sample": [106.59037780761719, -54.51286697387695, 1.1943016052246094, 32.760101318359375, 14.69952392578125, 45.98151397705078, 139.7594757080078, 3.6863784790039062, 99.3821792602539, -3.304811477661133, -54.54963684082031, 129.5576171875, -0.04646492004394531, -0.8395423889160156, -44.74195098876953, 129.15570068359375, 71.48242950439453, 4.719856262207031, -18.51610565185547, 107.51274108886719, -26.217309951782227, 0.9600486755371094, -137.82240295410156, 1.2511234283447266, 12.974349975585938, 248.60540771484375, 158.72381591796875, 124.42572784423828, 122.61283874511719, -11.164093017578125, 47.83805847167969, -17.92298698425293, 12.635566711425781, 15.79779052734375, -0.9020557403564453, 132.65170288085938, 8.995964050292969, -26.999252319335938, -6.374713897705078, -16.094966888427734, 55.068424224853516, 159.0986785888672, 7.8334503173828125, 68.46473693847656, 12.2974853515625, 69.27275085449219, 161.34121704101562, 62.500770568847656, -3.1890869140625, -23.33631706237793, 3.268360137939453, -54.43267059326172, 166.28909301757812, -6.2793426513671875, -3.800060272216797, 50.30445098876953, 20.0452880859375, 108.88607788085938, 115.80532836914062, 82.68975067138672, 81.3987808227539, -4.113983154296875, 102.41410827636719, 104.81340026855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000320.npy"}
|
|
{"epoch": 0.4837490551776266, "step": 321, "batch_size": 64, "mean": 45.44639587402344, "std": 68.41667175292969, "min": -108.30992126464844, "p10": -49.290906524658205, "median": 41.124467849731445, "p90": 136.62226104736328, "max": 152.1738739013672, "pos_frac": 0.765625, "sample": [110.78596496582031, 94.98861694335938, 3.1538467407226562, 108.50981903076172, 115.08848571777344, -34.192115783691406, 152.1738739013672, 88.80657196044922, 139.73878479003906, 44.40211486816406, 149.70040893554688, 40.3001594543457, 55.2816276550293, 77.17884826660156, -108.30992126464844, 10.328311920166016, 24.574317932128906, 79.64466094970703, 41.94877624511719, 130.53717041015625, 31.331436157226562, 7.631134033203125, -68.8581314086914, -0.6615447998046875, 62.773521423339844, -85.18846130371094, -80.77098846435547, -88.13752746582031, 68.3579330444336, 31.655746459960938, 34.241455078125, 66.75027465820312, 119.48741149902344, 74.07077026367188, 1.0632858276367188, 92.767578125, 78.35438537597656, -1.7593746185302734, 29.09780502319336, 144.72955322265625, 3.269550323486328, -2.414836883544922, 36.525264739990234, 105.17076873779297, -2.2746734619140625, -100.25874328613281, 129.85745239257812, 150.1943359375, 136.91246032714844, 68.74211120605469, 8.714607238769531, 2.981271743774414, 39.73191833496094, 134.28585815429688, 135.94512939453125, -17.944007873535156, 30.432861328125, 79.04783630371094, 149.36904907226562, -49.64661407470703, 16.51807403564453, -1.150390625, 61.444236755371094, -48.46092224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000321.npy"}
|
|
{"epoch": 0.4852607709750567, "step": 322, "batch_size": 64, "mean": 55.794349670410156, "std": 62.57339096069336, "min": -104.43124389648438, "p10": -11.645841217041013, "median": 46.84053039550781, "p90": 141.57916717529298, "max": 157.50262451171875, "pos_frac": 0.84375, "sample": [-21.32661247253418, 87.18598175048828, 145.12005615234375, 17.183380126953125, 111.45591735839844, 15.85784912109375, 157.50262451171875, 113.5786361694336, 134.6995391845703, 37.817718505859375, 51.083106994628906, 58.667205810546875, 136.88873291015625, 150.7797393798828, 75.15826416015625, 1.793121337890625, 28.81134033203125, 48.28587341308594, 6.002704620361328, 20.38153076171875, 98.32288360595703, -0.20275115966796875, -88.05207061767578, 139.77996826171875, 126.45390319824219, 13.002593994140625, 37.871185302734375, 36.58592987060547, 0.9958953857421875, -9.41046142578125, -46.199676513671875, 111.711181640625, 51.21574401855469, 137.91802978515625, 110.63784790039062, 73.12332916259766, 12.236747741699219, 149.44979858398438, 8.82830810546875, -9.8011474609375, 86.4361572265625, 49.24591064453125, 1.7894363403320312, 141.671630859375, 36.73876953125, 139.3134307861328, 145.4398193359375, 81.62239074707031, -23.61297607421875, -104.43124389648438, 43.04861068725586, 15.358261108398438, 30.033740997314453, 50.2303466796875, 141.36341857910156, 26.8155517578125, 43.65142059326172, 45.39518737792969, 80.2552490234375, -48.2484016418457, 149.90530395507812, -12.436424255371094, 94.64387512207031, 25.215028762817383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000322.npy"}
|
|
{"epoch": 0.48677248677248675, "step": 323, "batch_size": 64, "mean": 37.715667724609375, "std": 76.87120819091797, "min": -131.70709228515625, "p10": -48.928752136230464, "median": 14.44375228881836, "p90": 148.4111129760742, "max": 190.63336181640625, "pos_frac": 0.671875, "sample": [-2.7220840454101562, -91.3580322265625, 71.37771606445312, 41.50679016113281, -16.648353576660156, 132.3573760986328, 28.536117553710938, 10.563343048095703, 24.81255340576172, 148.68394470214844, 36.648162841796875, -32.39012145996094, 147.2730712890625, 5.083045959472656, 124.28563690185547, -35.9838981628418, -54.7220458984375, 103.7032470703125, 101.04906463623047, 7.51812744140625, 74.77684020996094, 147.77450561523438, 16.653884887695312, 13.141609191894531, 162.94735717773438, 3.9212112426757812, -50.75230407714844, 67.92582702636719, 0.5849094390869141, -36.20751953125, 190.63336181640625, -44.673797607421875, 9.8726806640625, -71.65000915527344, 155.32997131347656, 15.286048889160156, -80.370361328125, 144.0039825439453, 84.50119018554688, -0.9648723602294922, 10.632301330566406, 66.68236541748047, -9.841201782226562, -7.329582214355469, 166.2613067626953, 169.18922424316406, 145.55599975585938, -131.70709228515625, 166.819091796875, 1.2237319946289062, -124.9831314086914, -3.2010574340820312, 117.78398132324219, 18.07152557373047, -18.616943359375, 74.8292236328125, 100.52764892578125, 85.31916809082031, 13.601455688476562, -6.383056640625, 40.45357131958008, -3.539306640625, 2.2928619384765625, -12.147384643554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000323.npy"}
|
|
{"epoch": 0.48828420256991684, "step": 324, "batch_size": 64, "mean": 51.399356842041016, "std": 63.3670539855957, "min": -121.5915756225586, "p10": -11.520677375793452, "median": 39.59480667114258, "p90": 139.3904983520508, "max": 186.45294189453125, "pos_frac": 0.796875, "sample": [80.19667053222656, 40.81355285644531, 36.50555419921875, 11.746627807617188, -72.57466125488281, 155.19813537597656, 59.783164978027344, 60.640907287597656, -27.885574340820312, -0.27211761474609375, -0.15024566650390625, 122.99064636230469, 66.32464599609375, 54.93495559692383, 107.24858093261719, 23.911128997802734, 140.328369140625, -30.86133575439453, 100.1954574584961, 3.100372314453125, 53.75691223144531, 43.225830078125, 3.052288055419922, 87.37184143066406, 38.376060485839844, 16.19952392578125, -7.171302795410156, 24.23236846923828, 183.79385375976562, 12.67794418334961, 7.509674072265625, -121.5915756225586, 57.028160095214844, 131.69952392578125, 24.801193237304688, 69.1949462890625, 38.022850036621094, -13.384695053100586, 91.60423278808594, 99.9255142211914, 38.31031036376953, -42.62290954589844, 186.45294189453125, 22.571983337402344, 156.97967529296875, 24.978816986083984, 3.3391647338867188, 137.20213317871094, 68.49127197265625, 130.56124877929688, 163.95123291015625, 0.5195960998535156, 3.2677459716796875, 108.430908203125, 141.20834350585938, -5.5086822509765625, 48.570716857910156, -1.2590789794921875, -15.963325500488281, 62.004695892333984, 128.81240844726562, 132.50677490234375, 24.87128448486328, -0.6184940338134766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000324.npy"}
|
|
{"epoch": 0.4897959183673469, "step": 325, "batch_size": 64, "mean": 46.71160125732422, "std": 76.18929290771484, "min": -193.06565856933594, "p10": -35.92511196136474, "median": 47.76154708862305, "p90": 133.7035110473633, "max": 212.95587158203125, "pos_frac": 0.75, "sample": [35.64639663696289, 127.93865966796875, 2.88397216796875, 120.93036651611328, -38.54822540283203, 83.04710388183594, 90.725341796875, 61.28260040283203, 68.12478637695312, 68.72713470458984, 69.56187438964844, 125.21040344238281, -29.804513931274414, -27.328765869140625, 49.53498840332031, -8.85330581665039, 129.18923950195312, 102.6632080078125, 119.87230682373047, -121.09269714355469, 161.77548217773438, -28.38220977783203, 129.08975219726562, 1.6102352142333984, -0.3358268737792969, 171.02084350585938, 19.255069732666016, 135.63819885253906, -58.01762771606445, 66.68799591064453, -81.99699401855469, 55.448917388916016, 105.95825958251953, 50.799102783203125, -77.43529510498047, 52.53826904296875, -67.82453918457031, 111.30355834960938, 47.120887756347656, 212.95587158203125, 62.54145812988281, 21.865217208862305, 14.25103759765625, 203.6915283203125, -193.06565856933594, 18.80506134033203, 40.915740966796875, 17.06389617919922, 106.57508850097656, 97.7571029663086, 152.2724151611328, -7.401954650878906, 41.23895263671875, -12.39373779296875, 25.54022979736328, -19.32990264892578, 46.61925506591797, 56.059383392333984, 168.5554962158203, 48.40220642089844, 17.583450317382812, 42.552223205566406, 22.498130798339844, -19.975006103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000325.npy"}
|
|
{"epoch": 0.491307634164777, "step": 326, "batch_size": 64, "mean": 20.587383270263672, "std": 71.63412475585938, "min": -130.21841430664062, "p10": -79.06906967163086, "median": 12.315673828125, "p90": 116.09791336059571, "max": 159.5070037841797, "pos_frac": 0.578125, "sample": [-15.513786315917969, -8.260921478271484, 6.559940338134766, 24.48385238647461, 50.186798095703125, -106.59552001953125, 159.5070037841797, 131.107177734375, -6.9139404296875, 81.19197845458984, -46.49540710449219, 40.97067642211914, 15.934856414794922, 7.199005126953125, 62.918025970458984, 111.95347595214844, 7.723478317260742, 81.52955627441406, -8.4765625, 40.76695251464844, 124.71919250488281, -3.8346405029296875, 22.522216796875, 75.19410705566406, -79.10718536376953, -37.152130126953125, -108.77201843261719, -7.555450439453125, -27.6409912109375, -6.897682189941406, 153.40621948242188, -130.21841430664062, 54.89134979248047, -8.88669204711914, 51.00560760498047, 14.622909545898438, 4.655548095703125, -30.731842041015625, -60.94499969482422, -33.91656494140625, 109.21568298339844, 144.57919311523438, -78.98013305664062, -8.018630981445312, 117.42643737792969, 30.08001708984375, 28.268436431884766, -104.41852569580078, 96.27410125732422, 145.96002197265625, -4.298702239990234, -9.841440200805664, -82.92002868652344, 112.9980239868164, -11.840835571289062, 10.008438110351562, -102.47401428222656, 90.49951171875, 62.73392868041992, 22.512176513671875, 104.22725677490234, -75.54313659667969, 16.570465087890625, 109.43902587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000326.npy"}
|
|
{"epoch": 0.4928193499622071, "step": 327, "batch_size": 64, "mean": 37.49703598022461, "std": 75.57402801513672, "min": -135.0878448486328, "p10": -67.27401809692383, "median": 27.33310317993164, "p90": 147.4863250732422, "max": 187.007080078125, "pos_frac": 0.6875, "sample": [52.34337615966797, -64.48595428466797, -34.903175354003906, 152.0792236328125, 19.950698852539062, 42.44952392578125, -88.85459899902344, 74.1219711303711, -95.30403137207031, 80.88441467285156, 156.38681030273438, 29.560367584228516, -87.99146270751953, 125.10148620605469, 29.18230438232422, 148.32789611816406, 135.46603393554688, 13.14388656616211, 51.38963317871094, -68.46890258789062, -0.08901023864746094, 11.057315826416016, 75.78832244873047, 175.84909057617188, 145.5226593017578, 9.673721313476562, 35.2860107421875, 93.38307189941406, 83.78473663330078, 3.1161346435546875, 20.163345336914062, 5.030719757080078, -86.61601257324219, 113.96676635742188, 104.26936340332031, -4.314643859863281, 37.56980895996094, 37.85508728027344, -7.2108917236328125, 2.5143203735351562, 156.2717742919922, 25.483901977539062, -135.0878448486328, -2.328582763671875, 101.9363021850586, 187.007080078125, -33.17486572265625, 124.84208679199219, 150.56021118164062, 20.716835021972656, 87.49024200439453, -7.724847793579102, 24.462753295898438, 10.718555450439453, -45.68168258666992, -25.503137588500977, 125.75196075439453, 114.87572479248047, 40.894805908203125, -3.6964378356933594, -11.575958251953125, -98.65695190429688, 74.64492797851562, -9.396018981933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000327.npy"}
|
|
{"epoch": 0.4943310657596372, "step": 328, "batch_size": 64, "mean": 45.51738739013672, "std": 66.31256103515625, "min": -118.0896987915039, "p10": -26.58735637664795, "median": 23.636722564697266, "p90": 136.9728759765625, "max": 185.57659912109375, "pos_frac": 0.75, "sample": [105.40608215332031, 4.018503189086914, 7.336919784545898, 25.736831665039062, 135.02943420410156, 6.299413681030273, -6.095363616943359, 105.85070037841797, 88.11515045166016, 19.974952697753906, 21.47490692138672, 131.41339111328125, 136.48707580566406, 16.720043182373047, 58.119781494140625, -55.85844421386719, 145.60995483398438, 20.7405948638916, 37.63478088378906, -7.064044952392578, 34.75834655761719, -2.3892822265625, 3.934877395629883, 29.006877899169922, -27.407379150390625, -36.737388610839844, 172.79327392578125, 21.53661346435547, -71.65776062011719, 40.477012634277344, -118.0896987915039, 130.8994140625, 99.45307922363281, 0.843505859375, 137.4653778076172, -25.72974395751953, 20.767547607421875, -3.5320587158203125, 185.57659912109375, 34.729164123535156, 130.37969970703125, 157.50559997558594, -3.7757110595703125, -26.954904556274414, 9.492477416992188, 123.1267318725586, -4.679044723510742, 16.087448120117188, -2.4726104736328125, 13.719547271728516, 74.0962905883789, -54.42152404785156, 80.00376892089844, 30.268585205078125, 15.56353759765625, 85.87142944335938, 106.24999237060547, -4.659688949584961, 137.1810760498047, 37.81188201904297, 90.05697631835938, 1.0556373596191406, 153.6203155517578, 124.3364486694336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000328.npy"}
|
|
{"epoch": 0.4958427815570673, "step": 329, "batch_size": 64, "mean": 46.87541961669922, "std": 69.58319091796875, "min": -85.9804916381836, "p10": -31.27470989227294, "median": 29.904918670654297, "p90": 151.83830413818362, "max": 195.8583984375, "pos_frac": 0.734375, "sample": [9.672369003295898, 6.595020294189453, 48.10546112060547, 30.906639099121094, -53.735992431640625, 26.447601318359375, 195.8583984375, -16.259246826171875, 100.96551513671875, 22.477561950683594, 116.79287719726562, 67.1728515625, -9.855108261108398, 109.17877197265625, 1.2426738739013672, 23.346168518066406, 35.857505798339844, 136.76687622070312, -18.691680908203125, 32.85884094238281, -2.0240478515625, 38.1373291015625, 157.50021362304688, 61.6065673828125, -85.9804916381836, 154.0101318359375, 160.14451599121094, 130.50433349609375, 41.583740234375, 49.402015686035156, 157.45303344726562, 125.2745361328125, -34.97364807128906, 132.865234375, 1.134054183959961, 10.174575805664062, 135.14413452148438, 1.9842357635498047, 148.80931091308594, 85.55865478515625, 173.76449584960938, 137.73751831054688, 5.451732635498047, -62.525115966796875, -4.576026916503906, 137.14202880859375, 3.224853515625, 29.691200256347656, 8.544540405273438, 28.735065460205078, -14.224285125732422, 52.933929443359375, -1.3223724365234375, -36.311153411865234, 153.13644409179688, 97.16259765625, -22.64385414123535, 82.5542984008789, -14.4075927734375, -68.2752685546875, 30.118637084960938, 3.47332763671875, -0.6938266754150391, -52.676116943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000329.npy"}
|
|
{"epoch": 0.4973544973544973, "step": 330, "batch_size": 64, "mean": 50.88603591918945, "std": 83.30777740478516, "min": -155.07962036132812, "p10": -68.12362442016601, "median": 51.912479400634766, "p90": 149.21941223144532, "max": 182.55947875976562, "pos_frac": 0.75, "sample": [1.8994979858398438, -24.251670837402344, -16.411611557006836, -133.93568420410156, 51.44872283935547, -155.07962036132812, 128.88238525390625, 27.99591827392578, -32.01720428466797, 139.99679565429688, 138.83164978027344, 65.24534606933594, -93.13359069824219, -2.7642593383789062, 28.038761138916016, 143.78341674804688, 162.66915893554688, -75.18042755126953, 182.55947875976562, 158.2365264892578, 103.01608276367188, 1.0707855224609375, 127.8697509765625, 0.5383739471435547, 143.3718719482422, 129.6853790283203, 0.96295166015625, 12.9552001953125, -65.12445831298828, 10.957015991210938, 70.86629486083984, 131.45355224609375, -11.150848388671875, 60.70827865600586, -69.40898132324219, 148.01348876953125, 101.38785552978516, 153.659912109375, 103.63371276855469, 27.246639251708984, 14.830184936523438, 96.31327819824219, 59.751373291015625, 101.1875228881836, 145.05056762695312, 139.66244506835938, 2.3831615447998047, 53.063167572021484, -10.715911865234375, 14.629196166992188, -3.8724136352539062, 24.766921997070312, 149.73623657226562, 141.64605712890625, -36.50743103027344, 30.186630249023438, 52.37623596191406, -77.08154296875, 161.89846801757812, -99.05848693847656, 124.11018371582031, 136.85438537597656, 2.2369308471679688, 154.73284912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000330.npy"}
|
|
{"epoch": 0.4988662131519274, "step": 331, "batch_size": 64, "mean": 62.81810760498047, "std": 76.81495666503906, "min": -156.16162109375, "p10": -11.008517837524405, "median": 65.0147476196289, "p90": 154.3046829223633, "max": 238.7324981689453, "pos_frac": 0.828125, "sample": [160.0355682373047, 10.874725341796875, 27.953327178955078, 106.43492126464844, 130.3847198486328, 165.00009155273438, 102.20442199707031, 96.68739318847656, 112.87472534179688, 9.393203735351562, -2.387500762939453, 175.9651336669922, 153.39132690429688, 150.09017944335938, 121.27894592285156, 11.224769592285156, 165.6597900390625, 36.48536682128906, 38.20850372314453, 154.6961212158203, 108.9208984375, 4.307769775390625, 12.153671264648438, 7.723968505859375, -14.703239440917969, 145.05303955078125, 101.08051300048828, 238.7324981689453, 23.059188842773438, 109.89422607421875, 87.88632202148438, 36.46864700317383, 150.17527770996094, 77.73194885253906, 5.604667663574219, -0.20937347412109375, 62.26628875732422, -108.12266540527344, -93.48037719726562, 67.7632064819336, -69.88949584960938, 162.1951446533203, 41.32115173339844, -2.121490478515625, 28.079851150512695, -156.16162109375, 72.75348663330078, 1.21514892578125, -16.083656311035156, 36.54026794433594, 27.37000274658203, 98.84870910644531, 103.21701049804688, 29.26397705078125, 58.50981903076172, 8.163169860839844, -1.0051145553588867, 138.33172607421875, 112.57380676269531, -81.48954772949219, 149.12197875976562, 101.9468994140625, 83.10574340820312, 145.7900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000331.npy"}
|
|
{"epoch": 0.5003779289493575, "step": 332, "batch_size": 64, "mean": 53.15788650512695, "std": 70.09272766113281, "min": -108.43559265136719, "p10": -7.384926605224608, "median": 31.92724609375, "p90": 155.93524627685548, "max": 191.93804931640625, "pos_frac": 0.765625, "sample": [-70.53814697265625, -1.0647659301757812, 0.37845611572265625, -2.420196533203125, 4.716920852661133, 131.67031860351562, 20.924179077148438, 36.621482849121094, 28.550949096679688, 52.264957427978516, -4.583778381347656, 191.93804931640625, 129.7507781982422, 93.99191284179688, 157.1025390625, 14.234634399414062, 16.771221160888672, -0.16249465942382812, 100.0296630859375, 50.46751403808594, 84.00719451904297, 12.522994995117188, -108.43559265136719, 149.09361267089844, 123.3302001953125, 130.68408203125, -39.29350280761719, 22.07196044921875, 6.0876617431640625, -7.733497619628906, 41.85521697998047, 153.21156311035156, 24.927459716796875, -3.515960693359375, -6.57159423828125, 24.7470703125, 5.49302864074707, 176.7357177734375, -82.74950408935547, 10.670976638793945, 103.32656860351562, 125.87991333007812, 100.5296859741211, -5.131608963012695, 41.25962829589844, 4.165718078613281, 103.43638610839844, 122.76753234863281, 128.904541015625, 80.76669311523438, 175.90773010253906, 7.4127655029296875, 69.30853271484375, -2.751190185546875, 37.27332305908203, -20.742172241210938, 158.7659149169922, 9.31717300415039, 22.612045288085938, 158.68133544921875, 187.04229736328125, 118.61940002441406, -28.334415435791016, 35.30354309082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000332.npy"}
|
|
{"epoch": 0.5018896447467877, "step": 333, "batch_size": 64, "mean": 55.44878387451172, "std": 70.7477035522461, "min": -112.84300994873047, "p10": -20.058519935607904, "median": 45.07669448852539, "p90": 158.51385498046875, "max": 213.30355834960938, "pos_frac": 0.765625, "sample": [12.110511779785156, -22.28266716003418, 13.726699829101562, 87.21876525878906, 20.549057006835938, -0.4517784118652344, 23.360698699951172, 70.91180419921875, 6.114109039306641, -44.45404052734375, 65.65319061279297, 84.39825439453125, 100.32503509521484, 110.54708099365234, 139.46466064453125, 31.17694091796875, 41.881378173828125, 159.57984924316406, 191.37359619140625, 165.56246948242188, -7.3719329833984375, 70.04652404785156, 55.124290466308594, 52.05621337890625, 43.28486633300781, -61.050453186035156, -23.00064468383789, 53.075775146484375, -1.0279083251953125, 46.86852264404297, 160.99363708496094, 150.70498657226562, -22.762481689453125, 81.33954620361328, 108.42573547363281, 32.11288070678711, 182.03863525390625, 5.392753601074219, -3.6572532653808594, 33.538848876953125, -1.9463520050048828, 59.80549621582031, -112.84300994873047, 4.623273849487305, 80.25826263427734, -86.73553466796875, 213.30355834960938, 17.709571838378906, -14.868843078613281, 37.23316192626953, -3.4754714965820312, -6.9014434814453125, 108.7778549194336, 156.0265350341797, 10.050752639770508, 1.4348831176757812, 90.13118743896484, 13.504692077636719, 146.9170379638672, 68.12763214111328, 190.11639404296875, 90.39362335205078, 142.42385864257812, 131.75680541992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000333.npy"}
|
|
{"epoch": 0.5034013605442177, "step": 334, "batch_size": 64, "mean": 39.732994079589844, "std": 79.7843017578125, "min": -136.07305908203125, "p10": -61.91798171997069, "median": 23.24773406982422, "p90": 151.0055892944336, "max": 204.35284423828125, "pos_frac": 0.6875, "sample": [-71.52152252197266, -136.07305908203125, 71.16022491455078, 95.31758117675781, 76.09402465820312, -23.656341552734375, -115.34197998046875, 121.1352767944336, 26.216629028320312, -129.77215576171875, 3.6243743896484375, 204.35284423828125, -9.799190521240234, 90.02742004394531, 4.304931640625, 103.10125732421875, -20.801788330078125, -19.94516944885254, 45.3555908203125, 90.89910888671875, 90.12144470214844, 166.33197021484375, 20.757530212402344, 75.27754211425781, 81.90374755859375, -72.31961059570312, 24.40301513671875, 0.08930397033691406, 129.06008911132812, 7.913349151611328, -3.027772903442383, 0.6626129150390625, 162.44821166992188, -106.976806640625, 98.83855438232422, 151.59385681152344, 57.62066650390625, 22.092453002929688, 30.20074462890625, -6.908729553222656, 8.487312316894531, -32.43474578857422, -11.199493408203125, -10.35833740234375, 158.43435668945312, -0.7496719360351562, -0.9676132202148438, 13.538002014160156, -45.502349853515625, 114.26850891113281, 132.67147827148438, 60.199256896972656, -67.02667236328125, 100.17127990722656, -49.997703552246094, 20.959259033203125, 44.55418395996094, 166.07421875, 0.961151123046875, 21.594545364379883, 149.63296508789062, 97.7366714477539, 193.70773315429688, 143.39712524414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000334.npy"}
|
|
{"epoch": 0.5049130763416477, "step": 335, "batch_size": 64, "mean": 44.931480407714844, "std": 76.76238250732422, "min": -118.82328796386719, "p10": -36.57210273742676, "median": 32.93596649169922, "p90": 156.59714050292968, "max": 200.80770874023438, "pos_frac": 0.75, "sample": [103.67216491699219, 63.00330352783203, -36.84125518798828, 187.81744384765625, 23.52764892578125, 111.51547241210938, 31.7425537109375, 2.0138797760009766, 200.80770874023438, 106.56394958496094, 165.22775268554688, 146.0228271484375, 34.12937927246094, 61.93598937988281, 161.4525146484375, 152.82351684570312, 9.065727233886719, 27.69706916809082, 34.180137634277344, 158.01002502441406, -47.59980392456055, 4.09326171875, 41.26622772216797, 167.21470642089844, -55.40606689453125, 4.32647705078125, 88.31178283691406, -32.413734436035156, -30.888635635375977, 52.013710021972656, 143.74020385742188, 6.939872741699219, 6.274444580078125, 40.62651824951172, 133.08746337890625, -28.701213836669922, 156.6986083984375, -6.194911956787109, 35.69255828857422, 156.36038208007812, 7.415672302246094, -6.830705642700195, 58.244537353515625, 26.85826873779297, 80.39506530761719, 107.8795166015625, -21.22600555419922, -118.82328796386719, 27.592212677001953, 3.5626087188720703, 52.39286804199219, -35.9440803527832, -10.772804260253906, -118.50279235839844, -118.80816650390625, -65.18476104736328, 151.5441131591797, 99.90707397460938, 1.4807891845703125, 74.895751953125, -7.796470642089844, 8.715950012207031, 12.866889953613281, 85.94277954101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000335.npy"}
|
|
{"epoch": 0.5064247921390779, "step": 336, "batch_size": 64, "mean": 46.33979034423828, "std": 79.14208984375, "min": -109.5553970336914, "p10": -47.13423690795898, "median": 25.736769676208496, "p90": 158.63580169677735, "max": 250.7657470703125, "pos_frac": 0.75, "sample": [-54.106597900390625, -2.03656005859375, 153.2376708984375, 36.35539245605469, 250.7657470703125, 157.4558868408203, 84.18478393554688, 0.48328399658203125, 159.1414794921875, 0.48047447204589844, 3.5895862579345703, -7.025880813598633, 162.97137451171875, -5.1201324462890625, 143.02210998535156, 14.50149917602539, 8.093215942382812, 87.79167175292969, 176.93881225585938, -101.05328369140625, -43.07085418701172, 49.191162109375, 185.23977661132812, 58.385475158691406, -0.2601165771484375, 127.48039245605469, 46.16603469848633, -109.5553970336914, -37.55455017089844, -57.38098907470703, 174.8247528076172, -71.28288269042969, 59.464874267578125, 25.34466552734375, 6.239307403564453, 9.76136589050293, 0.8726348876953125, -2.7892227172851562, 55.26720428466797, 144.05589294433594, 8.650482177734375, -18.530845642089844, 64.06510925292969, 7.593406677246094, 138.31979370117188, 30.832489013671875, 61.67359924316406, 2.067413330078125, 26.128873825073242, 114.33445739746094, 8.95693588256836, 0.9518280029296875, 70.29058074951172, 150.54864501953125, 237.0491485595703, 60.577667236328125, -55.879547119140625, 5.337371826171875, 85.0733413696289, -22.448654174804688, 37.566314697265625, 89.8587646484375, 21.535293579101562, -48.87568664550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000336.npy"}
|
|
{"epoch": 0.5079365079365079, "step": 337, "batch_size": 64, "mean": 37.79745864868164, "std": 72.64236450195312, "min": -141.12118530273438, "p10": -37.43363418579101, "median": 29.866249084472656, "p90": 140.8827758789063, "max": 193.23324584960938, "pos_frac": 0.6875, "sample": [9.104591369628906, -90.74910736083984, -141.12118530273438, -26.86737823486328, 42.70152282714844, 129.0233917236328, -12.791353225708008, 129.4150848388672, 35.492095947265625, -5.153465270996094, 164.81521606445312, 13.989738464355469, 43.56694793701172, -125.04203796386719, -56.96356201171875, -16.455162048339844, 32.88043212890625, 112.63552856445312, 37.05182647705078, -16.743423461914062, 1.6979904174804688, 1.374948501586914, -24.508384704589844, -67.76673889160156, 116.84742736816406, 86.53797149658203, -0.8459320068359375, 36.11076354980469, 58.43889617919922, 177.1906280517578, 26.802757263183594, 4.995841979980469, 146.63507080078125, -5.5111236572265625, 120.80191040039062, 78.80520629882812, 148.23348999023438, -9.639022827148438, -0.3125457763671875, 85.52659606933594, 148.9884033203125, 13.611709594726562, 145.79750061035156, 24.84686279296875, 73.75711059570312, -27.200241088867188, 75.09866333007812, 18.535049438476562, 47.6015625, 34.782989501953125, 89.14811706542969, 193.23324584960938, -1.7596054077148438, -22.70953369140625, 26.852066040039062, 111.71455383300781, 8.147867202758789, -41.819374084472656, 83.62745666503906, 90.07965850830078, -102.55064392089844, 21.080917358398438, 128.099365234375, 39.86812973022461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000337.npy"}
|
|
{"epoch": 0.509448223733938, "step": 338, "batch_size": 64, "mean": 40.37804412841797, "std": 74.4179458618164, "min": -156.9330291748047, "p10": -28.718961334228513, "median": 22.926803588867188, "p90": 147.80108947753908, "max": 176.4697265625, "pos_frac": 0.765625, "sample": [-28.252349853515625, 25.73208236694336, 158.95840454101562, 139.2209930419922, 140.36170959472656, 130.86727905273438, -100.40413665771484, 122.6511459350586, 21.825164794921875, 150.98939514160156, 168.20751953125, 11.919807434082031, 2.71533203125, 43.057830810546875, 4.054779052734375, -85.86431884765625, 10.186241149902344, -109.95478820800781, -9.462387084960938, 2.5709609985351562, -28.91893768310547, 137.83966064453125, 24.13974380493164, 9.857664108276367, 10.485282897949219, 89.99653625488281, -156.9330291748047, 103.70064544677734, 82.4027099609375, 64.50684356689453, -2.1316299438476562, 80.5394287109375, 13.014484405517578, 31.347759246826172, 79.63233184814453, 83.70106506347656, 4.820690155029297, 16.71282958984375, 176.4697265625, 14.838211059570312, 20.97197723388672, 130.7366943359375, -27.496978759765625, 113.913818359375, 3.824657440185547, 24.0284423828125, -47.279457092285156, -26.956146240234375, 45.12401580810547, -20.814579010009766, 38.62040710449219, 162.37400817871094, 2.5900001525878906, 3.8068161010742188, 71.05642700195312, 80.00151062011719, 0.3986968994140625, 154.01736450195312, 128.08914184570312, -79.11422729492188, -27.17069435119629, 44.44232940673828, 155.03167724609375, -1.4038124084472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000338.npy"}
|
|
{"epoch": 0.5109599395313681, "step": 339, "batch_size": 64, "mean": 44.85552215576172, "std": 69.71337127685547, "min": -135.01565551757812, "p10": -32.690727233886705, "median": 38.551692962646484, "p90": 147.3462112426758, "max": 183.02749633789062, "pos_frac": 0.78125, "sample": [45.461708068847656, 33.412750244140625, 16.668289184570312, 3.9022445678710938, -14.767005920410156, 149.78732299804688, 2.6849212646484375, 53.55078887939453, 12.031002044677734, -82.96981048583984, 62.908233642578125, -13.581741333007812, 10.469772338867188, 34.60945510864258, 52.15329360961914, 13.169960021972656, -88.35107421875, 122.12693786621094, 136.6273193359375, 10.472757339477539, 141.65028381347656, 49.84613037109375, 80.33479309082031, 61.342445373535156, 40.163490295410156, 154.25982666015625, 0.687286376953125, 32.04640579223633, -40.37232208251953, 61.85573196411133, 59.80105209350586, 107.233642578125, 167.81272888183594, 164.16363525390625, 66.28838348388672, 4.513988494873047, 94.63433074951172, -53.08769226074219, 93.86489868164062, 71.15452575683594, 161.4171600341797, 36.93989562988281, -12.65880012512207, -10.318321228027344, 32.8638916015625, -0.919708251953125, 31.693634033203125, 92.385009765625, -109.49588775634766, -41.58744812011719, 14.223329544067383, 6.444709777832031, 113.92205810546875, 129.37493896484375, -135.01565551757812, 183.02749633789062, -7.9849853515625, 44.479736328125, 2.6855010986328125, 110.01781463623047, 92.55934143066406, 43.55300521850586, -0.6076068878173828, 175.19375610351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000339.npy"}
|
|
{"epoch": 0.5124716553287982, "step": 340, "batch_size": 64, "mean": 45.176639556884766, "std": 83.20428466796875, "min": -153.53819274902344, "p10": -60.708130645751936, "median": 26.96573829650879, "p90": 160.8554931640625, "max": 178.07020568847656, "pos_frac": 0.625, "sample": [80.18943786621094, 154.6017303466797, 14.226795196533203, 78.79509735107422, -1.7888336181640625, 167.2375946044922, 2.3347339630126953, -1.963876724243164, -45.302635192871094, 171.32107543945312, -3.6762771606445312, 95.06678771972656, 69.20359802246094, 136.0986328125, 169.92739868164062, 111.63774108886719, 33.67131042480469, -69.71473693847656, 56.13055419921875, 93.37200927734375, -27.51506233215332, -9.117265701293945, -87.57768249511719, 93.2404556274414, -12.512664794921875, 178.07020568847656, -10.706863403320312, 143.801025390625, -19.856178283691406, 159.33334350585938, -123.37120056152344, 138.16311645507812, 5.514129638671875, 29.528884887695312, 151.64620971679688, 52.92023849487305, 140.01104736328125, -0.6639041900634766, -24.61083984375, 153.20693969726562, 115.95780181884766, -34.842987060546875, -1.7185821533203125, 161.50784301757812, 62.067317962646484, -3.978811264038086, -73.56486511230469, -90.49266052246094, 2.5072555541992188, 161.61419677734375, -10.162277221679688, 63.06096267700195, 87.07097625732422, -11.638162612915039, -153.53819274902344, -67.31048583984375, 157.69515991210938, 22.74175262451172, 60.84111785888672, 176.89315795898438, 24.402591705322266, 6.102500915527344, -16.469131469726562, 11.686393737792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000340.npy"}
|
|
{"epoch": 0.5139833711262283, "step": 341, "batch_size": 64, "mean": 52.252132415771484, "std": 78.69599151611328, "min": -112.87210845947266, "p10": -46.06010742187499, "median": 33.72007942199707, "p90": 161.33065338134767, "max": 185.648193359375, "pos_frac": 0.75, "sample": [-54.1690559387207, 60.37591552734375, 159.746337890625, 72.9901123046875, 126.40177154541016, 75.12677001953125, 132.38937377929688, 185.648193359375, 163.611328125, -33.157958984375, 146.9562225341797, 35.146793365478516, -112.87210845947266, -6.000679016113281, 4.153165817260742, 5.546142578125, 134.81436157226562, 5.622001647949219, 130.10812377929688, 162.214111328125, 123.84278869628906, 160.43844604492188, 162.71009826660156, 76.929931640625, -84.41070556640625, 148.8150177001953, 108.66804504394531, 21.012187957763672, -22.342308044433594, -16.624740600585938, 23.87997817993164, 12.013900756835938, -17.106399536132812, 4.41387939453125, 22.185379028320312, 151.94622802734375, 69.17111206054688, 12.615018844604492, 102.14220428466797, -58.989768981933594, -30.06079864501953, -64.85195922851562, 79.18993377685547, 175.6865234375, 4.607540130615234, 161.71302795410156, 23.275665283203125, 81.35713195800781, 101.9731674194336, 177.87893676757812, 67.86932373046875, 8.155815124511719, 123.45825958251953, -51.589599609375, 2.3513946533203125, 121.38458251953125, 77.32340240478516, 32.293365478515625, -3.0255603790283203, -28.111618041992188, -17.73638153076172, 10.657485961914062, 6.727910995483398, -112.35238647460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000341.npy"}
|
|
{"epoch": 0.5154950869236583, "step": 342, "batch_size": 64, "mean": 52.22775650024414, "std": 76.41122436523438, "min": -126.61204528808594, "p10": -36.62967300415038, "median": 34.49880409240723, "p90": 162.46493682861328, "max": 225.7092742919922, "pos_frac": 0.734375, "sample": [28.28803253173828, 30.488311767578125, 0.44495391845703125, -8.098762512207031, 3.688100814819336, 161.13958740234375, 34.65605545043945, 132.08560180664062, -47.39094543457031, 1.9691619873046875, -6.056631088256836, 36.00746154785156, -2.773397445678711, 155.04470825195312, -2.4026870727539062, -48.35844421386719, 2.729856491088867, -126.61204528808594, -2.1476917266845703, -32.701011657714844, 26.63681411743164, 159.69627380371094, 111.49177551269531, 163.03294372558594, -38.313385009765625, 61.11540222167969, 27.541305541992188, 85.1571273803711, 126.59381103515625, 59.470638275146484, 48.8066291809082, 56.17525863647461, 35.491424560546875, 134.26315307617188, 45.41923522949219, -7.273763656616211, 128.37322998046875, 14.216659545898438, -45.42169189453125, 154.7174530029297, 20.555450439453125, 164.60122680664062, 72.23016357421875, 83.19522094726562, -43.42619323730469, 170.1590576171875, 1.0484695434570312, -3.2331619262695312, 225.7092742919922, 182.5048370361328, 119.1224136352539, -95.2008056640625, 22.404747009277344, 158.50997924804688, -15.697628021240234, 164.3279571533203, 69.99681091308594, -3.2145309448242188, 4.941963195800781, 71.18869018554688, 77.0157241821289, 34.341552734375, 6.6324462890625, 197.67236328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000342.npy"}
|
|
{"epoch": 0.5170068027210885, "step": 343, "batch_size": 64, "mean": 37.11636734008789, "std": 72.06706237792969, "min": -154.95980834960938, "p10": -20.13066101074218, "median": 23.236492156982422, "p90": 129.4223899841309, "max": 248.20989990234375, "pos_frac": 0.75, "sample": [85.2857666015625, 8.2237548828125, -7.380088806152344, 5.8812408447265625, -6.239387512207031, 71.95938110351562, -14.719696044921875, -0.4109783172607422, 3.81817626953125, -5.393280029296875, -133.42074584960938, 22.362457275390625, 44.91606140136719, 96.48613739013672, 248.20989990234375, -5.799419403076172, 146.936767578125, 186.6005859375, 131.66995239257812, 117.39892578125, 29.840469360351562, 3.976095199584961, 69.02438354492188, 124.1780776977539, 7.107143402099609, 63.99812698364258, 20.364242553710938, -42.73523712158203, 33.05438232421875, -1.9824352264404297, -101.09862518310547, 46.248191833496094, 93.80671691894531, 21.139854431152344, 29.7003173828125, -2.658355712890625, 87.25375366210938, 132.4923095703125, -135.5441131591797, 149.7220001220703, -22.44964599609375, 58.08087158203125, 32.79125213623047, 174.50933837890625, 70.20895385742188, 14.782035827636719, 57.6461181640625, 27.650371551513672, 8.00238037109375, 123.90269470214844, 104.8993148803711, 9.401481628417969, 4.681632995605469, 80.0834732055664, 66.0743179321289, 22.946990966796875, 7.4316558837890625, 21.886993408203125, 52.47967529296875, 23.52599334716797, -154.95980834960938, -1.0359344482421875, -38.08162307739258, 6.716157913208008], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000343.npy"}
|
|
{"epoch": 0.5185185185185185, "step": 344, "batch_size": 64, "mean": 54.39847946166992, "std": 75.69498443603516, "min": -110.30701446533203, "p10": -38.39604110717773, "median": 49.607526779174805, "p90": 151.70134429931642, "max": 262.7179260253906, "pos_frac": 0.734375, "sample": [-68.49859619140625, 138.38546752929688, 28.585243225097656, 49.49702072143555, 161.28793334960938, -1.3422603607177734, -0.9004592895507812, 28.61917495727539, 20.242088317871094, 79.15080261230469, 23.95135498046875, 10.68157958984375, 19.652936935424805, 100.89083862304688, 94.36284637451172, 61.085933685302734, 91.54725646972656, 262.7179260253906, -13.847122192382812, 85.99966430664062, -55.001922607421875, -38.93072509765625, -37.14844512939453, 175.14198303222656, 2.6320762634277344, -14.487617492675781, -48.71067810058594, 1.8495864868164062, 183.12049865722656, 99.88534545898438, -38.94580078125, 173.65603637695312, -10.631462097167969, 111.95773315429688, 49.71803283691406, 155.54612731933594, 148.32301330566406, 6.065071105957031, 107.92488861083984, -110.30701446533203, -0.3898887634277344, 13.04110336303711, -15.904472351074219, 4.72547721862793, -14.099098205566406, -5.491006851196289, 146.19720458984375, 145.59402465820312, 67.59417724609375, 92.18266296386719, 112.11486053466797, 57.828521728515625, 100.49173736572266, 3.4562244415283203, 153.14920043945312, 20.526063919067383, 75.79241943359375, 94.98381042480469, 19.398971557617188, 111.80309295654297, -96.97026062011719, 113.55401611328125, 131.22019958496094, 116.97731018066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000344.npy"}
|
|
{"epoch": 0.5200302343159486, "step": 345, "batch_size": 64, "mean": 46.11676025390625, "std": 74.4412841796875, "min": -130.9083709716797, "p10": -25.332147979736327, "median": 37.34104537963867, "p90": 147.48099060058595, "max": 201.0017547607422, "pos_frac": 0.765625, "sample": [54.07830047607422, 14.376720428466797, 173.75296020507812, -92.06732177734375, 11.34372329711914, 5.405975341796875, 0.11513519287109375, -57.90179443359375, 48.852577209472656, -20.88396453857422, 143.02215576171875, 99.20404052734375, 120.01838684082031, 108.76802825927734, -79.10096740722656, 119.49805450439453, -88.75408172607422, 11.628044128417969, 35.54730987548828, 128.1426239013672, 201.0017547607422, 6.165105819702148, 123.62391662597656, 56.421051025390625, 113.20093536376953, -14.0924072265625, 11.065940856933594, 36.85417938232422, -0.9105377197265625, 54.66307830810547, 81.22075653076172, 180.91497802734375, 110.37796020507812, 48.223594665527344, 49.52452087402344, -130.9083709716797, 161.35183715820312, 181.78952026367188, 77.68428039550781, -19.025386810302734, 148.07972717285156, 47.68378448486328, -25.97418212890625, 146.0839385986328, 37.827911376953125, -21.162151336669922, 25.17662811279297, -23.834068298339844, 18.800857543945312, 28.092933654785156, 21.44501495361328, 15.463676452636719, -15.083076477050781, 30.5855712890625, 8.232673645019531, 85.7698974609375, 1.1633434295654297, -107.90675354003906, -12.675411224365234, 51.052040100097656, 67.31629943847656, 88.0748291015625, 115.67323303222656, 157.393310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000345.npy"}
|
|
{"epoch": 0.5215419501133787, "step": 346, "batch_size": 64, "mean": 54.4464111328125, "std": 77.57891845703125, "min": -158.39959716796875, "p10": -45.30438041687011, "median": 45.08579444885254, "p90": 156.3579284667969, "max": 192.86279296875, "pos_frac": 0.78125, "sample": [97.55693817138672, 117.30984497070312, 102.251708984375, 107.05774688720703, 99.34928894042969, 53.46076965332031, 27.322921752929688, 80.07848358154297, 151.0029296875, 170.7132110595703, 157.124755859375, 6.636787414550781, 92.2529296875, 148.93826293945312, -39.32294845581055, 161.1121063232422, -107.05276489257812, 45.283321380615234, -47.86785125732422, 63.5968132019043, 79.53318786621094, 93.60757446289062, 68.77915954589844, 44.888267517089844, 151.07528686523438, 192.86279296875, -158.39959716796875, 21.45794677734375, 10.096721649169922, -86.87905883789062, 9.718536376953125, -65.57454681396484, 145.2584686279297, -1.1627349853515625, 25.403453826904297, 35.85826873779297, 7.765037536621094, -11.222328186035156, 120.17500305175781, 130.6308135986328, 178.78762817382812, 119.49655151367188, -13.410758972167969, 171.0178680419922, 27.028549194335938, 30.46782684326172, 107.865234375, 72.43460083007812, -54.52986145019531, 84.11383056640625, 29.263565063476562, 10.957061767578125, 21.42640495300293, 0.4866523742675781, -72.66727447509766, 14.907546997070312, 20.092979431152344, 110.88917541503906, -14.526832580566406, 180.87295532226562, -7.849483489990234, -0.04400825500488281, 12.243621826171875, 154.56866455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000346.npy"}
|
|
{"epoch": 0.5230536659108088, "step": 347, "batch_size": 64, "mean": 42.219913482666016, "std": 72.37174987792969, "min": -123.97256469726562, "p10": -36.09007568359374, "median": 25.652297973632812, "p90": 150.3979187011719, "max": 185.278564453125, "pos_frac": 0.6875, "sample": [28.649919509887695, -6.5933685302734375, 29.333694458007812, 77.54475402832031, -99.99894714355469, 25.679275512695312, 96.58840942382812, 79.13760375976562, 105.60159301757812, 142.7044677734375, 6.748077392578125, 140.185546875, 14.30657958984375, -88.54163360595703, -53.56843566894531, 177.52255249023438, 17.475540161132812, 58.6596565246582, -19.243316650390625, 8.720298767089844, 25.625320434570312, 4.211362838745117, 86.01454162597656, -0.552642822265625, -40.70582580566406, 17.09307861328125, 78.77445220947266, 127.6582260131836, 159.9915771484375, -43.28155517578125, 45.046730041503906, -25.04817008972168, -23.69208526611328, 143.27487182617188, -6.051610946655273, 72.51849365234375, -4.588062286376953, 123.04669952392578, 2.4747314453125, 51.286964416503906, 43.1423225402832, 23.43260955810547, -25.319992065429688, 88.74502563476562, 3.741537094116211, 169.930908203125, -46.496864318847656, -7.231975555419922, 185.278564453125, 172.90850830078125, 170.17030334472656, 138.12757873535156, -7.2742767333984375, 41.20362854003906, -123.97256469726562, 66.23229217529297, 53.638511657714844, 3.2706832885742188, -5.785678863525391, 0.40584373474121094, -16.080951690673828, -3.419525146484375, 153.45065307617188, 89.96802520751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000347.npy"}
|
|
{"epoch": 0.5245653817082389, "step": 348, "batch_size": 64, "mean": 50.82662582397461, "std": 82.22248077392578, "min": -129.86326599121094, "p10": -38.53766708374023, "median": 31.452219009399414, "p90": 173.1926712036133, "max": 246.30755615234375, "pos_frac": 0.6875, "sample": [173.6387939453125, 158.20529174804688, 14.674148559570312, 1.6536636352539062, 175.2186279296875, 53.348663330078125, -38.6343994140625, -9.41412353515625, -129.86326599121094, 2.7267379760742188, 185.0521240234375, -59.53944396972656, 28.852798461914062, 68.69388580322266, 132.44580078125, 143.29519653320312, 28.31707000732422, 182.99888610839844, 43.61837387084961, 182.1737060546875, 11.139781951904297, 121.99259948730469, 149.80239868164062, 6.691795349121094, -0.5324630737304688, 153.1595916748047, -6.1690216064453125, 147.803466796875, -46.200523376464844, -9.049850463867188, 79.73199462890625, -0.29755401611328125, 3.1123523712158203, 246.30755615234375, 35.90998077392578, -78.84358215332031, 34.051639556884766, -77.99891662597656, -29.57671356201172, 83.47600555419922, 179.08154296875, 159.82119750976562, 59.413543701171875, 0.32302093505859375, 67.72560119628906, -19.014633178710938, -40.49388885498047, -38.31195831298828, -4.0199432373046875, 71.20234680175781, -21.52730941772461, 172.15171813964844, -18.465017318725586, 64.89232635498047, 86.42310333251953, 9.746971130371094, 84.31671142578125, 38.91011047363281, -36.619354248046875, 5.229574203491211, -10.284576416015625, 94.45826721191406, 171.48483276367188, 14.486873626708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000348.npy"}
|
|
{"epoch": 0.5260770975056689, "step": 349, "batch_size": 64, "mean": 52.27601623535156, "std": 76.94021606445312, "min": -99.97285461425781, "p10": -27.32353973388672, "median": 35.07212829589844, "p90": 165.43682250976562, "max": 192.7700958251953, "pos_frac": 0.671875, "sample": [-65.508056640625, 32.556671142578125, 165.7373504638672, 34.732688903808594, 41.84442901611328, -3.5237579345703125, 85.4158706665039, 165.14535522460938, 69.82538604736328, 10.014083862304688, -1.946969985961914, 35.41156768798828, 147.08201599121094, 31.109527587890625, 33.33318328857422, 85.66583251953125, 141.49354553222656, -75.50789642333984, -99.97285461425781, 163.45298767089844, 48.80845642089844, 119.49147033691406, -26.16607666015625, -0.5636100769042969, 146.14488220214844, 25.39019775390625, 6.903038024902344, 64.32508850097656, -15.809829711914062, 135.677734375, 119.27371215820312, 138.68792724609375, -6.3770751953125, 17.428892135620117, -7.221794128417969, 156.26788330078125, 36.77833557128906, 185.5229034423828, -92.76049041748047, 52.21508026123047, 186.79855346679688, 5.795478820800781, 7.62103271484375, 51.14813232421875, 88.09213256835938, 167.14163208007812, 165.56173706054688, -3.0579566955566406, -21.128341674804688, -2.4593372344970703, 94.94194030761719, -27.819595336914062, -2.9614105224609375, 2.103668212890625, 55.25335693359375, 189.55035400390625, -33.18592834472656, -49.72422790527344, -24.76886749267578, 133.09471130371094, -0.5414066314697266, -12.304801940917969, 83.3662338256836, 192.7700958251953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000349.npy"}
|
|
{"epoch": 0.527588813303099, "step": 350, "batch_size": 64, "mean": 64.83696746826172, "std": 74.66466522216797, "min": -98.09709930419922, "p10": -18.40805168151855, "median": 70.4087142944336, "p90": 164.3608642578125, "max": 226.1267852783203, "pos_frac": 0.796875, "sample": [128.8555908203125, -15.138633728027344, 77.99967956542969, -5.959075927734375, 96.7693099975586, 18.390642166137695, 165.0477294921875, 94.98954772949219, 75.02093505859375, 153.8000946044922, 9.6824951171875, -61.59159851074219, 78.17361450195312, 109.02159118652344, 52.51360321044922, 77.36361694335938, 126.2492446899414, 17.391864776611328, -10.299224853515625, -5.825525283813477, 76.10684204101562, 80.66067504882812, -98.09709930419922, 128.53500366210938, 68.94107818603516, 6.0009918212890625, -97.51719665527344, 226.1267852783203, -10.260736465454102, 39.6064338684082, 134.88125610351562, -54.88533020019531, 162.7581787109375, 10.933670043945312, 131.09619140625, 176.33123779296875, 78.85494995117188, 142.29949951171875, 171.10476684570312, 204.84039306640625, 71.87635040283203, 177.30709838867188, 158.20802307128906, -23.452110290527344, 44.23469161987305, 5.532096862792969, 7.065193176269531, 23.318578720092773, -19.80923080444336, 76.78333282470703, 81.95829772949219, 50.26436233520508, 1.4271068572998047, 46.046173095703125, 42.788780212402344, -69.56869506835938, 54.71533203125, 38.1473388671875, -8.454231262207031, 54.225616455078125, 166.52651977539062, 140.6799774169922, 111.76895904541016, 157.203369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000350.npy"}
|
|
{"epoch": 0.5291005291005291, "step": 351, "batch_size": 64, "mean": 48.71472930908203, "std": 82.54020690917969, "min": -165.2015380859375, "p10": -42.26030731201171, "median": 37.16118049621582, "p90": 152.64814758300784, "max": 169.13955688476562, "pos_frac": 0.734375, "sample": [-3.6180362701416016, 140.51992797851562, 159.11599731445312, 123.9415054321289, -2.3735694885253906, 141.6260986328125, -11.69100570678711, 35.300445556640625, 167.15353393554688, 33.36540222167969, 16.944061279296875, 155.96029663085938, 10.847494125366211, -23.079833984375, 20.31857681274414, -10.181449890136719, 11.706375122070312, 7.706993103027344, 114.06636810302734, 94.91766357421875, 92.42925262451172, -121.39842987060547, 159.6783905029297, 13.792251586914062, 144.9197998046875, 169.13955688476562, 88.17514038085938, 111.68019104003906, -165.2015380859375, 55.743408203125, 141.83636474609375, -81.56421661376953, 99.91029357910156, -4.199920654296875, 119.68748474121094, -36.99994659423828, -32.030128479003906, 128.5777587890625, 37.44288635253906, 36.87947463989258, 91.67230987548828, 164.66049194335938, 28.602523803710938, -44.69542694091797, 71.78880310058594, 25.91808319091797, 80.82474517822266, 62.36445617675781, 4.2586822509765625, 114.2815170288086, -44.514747619628906, 4.073604583740234, 126.18101501464844, 97.23648834228516, 160.1116485595703, 24.866058349609375, -0.37561798095703125, 99.11918640136719, -162.7001190185547, 14.217687606811523, -151.3725128173828, 120.2030029296875, 107.30804443359375, -17.33209228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000351.npy"}
|
|
{"epoch": 0.5306122448979592, "step": 352, "batch_size": 64, "mean": 55.07990646362305, "std": 77.00191497802734, "min": -122.73631286621094, "p10": -23.971829032897947, "median": 38.93388557434082, "p90": 158.97426910400392, "max": 263.9610900878906, "pos_frac": 0.734375, "sample": [148.25650024414062, 190.81826782226562, 152.21217346191406, 10.430709838867188, 150.36776733398438, -122.73631286621094, -8.017192840576172, -2.1005935668945312, 70.79180908203125, 80.12338256835938, 72.5589370727539, 134.38833618164062, -94.21286010742188, -2.314403533935547, 75.70944213867188, 25.968536376953125, -24.19671630859375, -4.478721618652344, -51.44475555419922, 21.67206573486328, 100.3121337890625, 70.37162780761719, 31.638946533203125, 165.6867218017578, 159.74574279785156, 93.61367797851562, 39.69739532470703, -120.89458465576172, 29.071552276611328, -24.397960662841797, 38.17037582397461, -2.564573287963867, 7.7472076416015625, 76.14764404296875, 52.874114990234375, 105.82955932617188, 30.66359519958496, 160.209228515625, 170.22071838378906, 53.52271270751953, 157.17416381835938, 6.863243103027344, 263.9610900878906, 26.791902542114258, 102.32157135009766, 147.97573852539062, -12.790489196777344, 18.92138671875, -23.447092056274414, 41.21217727661133, -4.3166656494140625, 85.172607421875, 36.477752685546875, 134.241943359375, 156.62615966796875, -2.2678680419921875, -29.43114471435547, 162.98086547851562, -7.634960174560547, 7.8834075927734375, 0.003253936767578125, 114.01525115966797, 45.39717102050781, 35.52043151855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000352.npy"}
|
|
{"epoch": 0.5321239606953893, "step": 353, "batch_size": 64, "mean": 52.58208465576172, "std": 74.44818115234375, "min": -166.10914611816406, "p10": -12.840472412109374, "median": 30.524492263793945, "p90": 151.8291976928711, "max": 176.68325805664062, "pos_frac": 0.75, "sample": [81.5833969116211, 176.68325805664062, -35.816707611083984, 45.50950241088867, -104.70976257324219, 26.95138168334961, 94.86869049072266, -10.099967956542969, 174.2556610107422, 23.064537048339844, -7.833560943603516, 25.22747039794922, 97.26272583007812, -11.650848388671875, 149.39869689941406, 21.179079055786133, 40.29682922363281, 139.63751220703125, 122.46755981445312, 151.860595703125, 119.87367248535156, 171.3721923828125, 2.0522327423095703, 96.87480163574219, -14.675247192382812, -6.898845672607422, 169.73318481445312, 17.729515075683594, 131.29440307617188, 91.26274108886719, 94.77522277832031, -1.7888298034667969, 80.36444091796875, 0.4791984558105469, -166.10914611816406, 151.7559356689453, -65.95719909667969, -51.88022232055664, 150.45277404785156, 145.0830841064453, 66.97398376464844, 154.29385375976562, 172.85592651367188, 2.3169021606445312, -13.350311279296875, 44.25540542602539, 130.22665405273438, 12.523782730102539, -5.051265716552734, 19.106582641601562, 39.21534729003906, 26.42877197265625, -3.762939453125, 34.09760284423828, -1.1290817260742188, 11.862625122070312, 4.231529235839844, 5.5526275634765625, -6.247642517089844, 21.695777893066406, 119.9149169921875, 6.7946929931640625, 147.72793579101562, 58.78971862792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000353.npy"}
|
|
{"epoch": 0.5336356764928194, "step": 354, "batch_size": 64, "mean": 59.49475860595703, "std": 80.80826568603516, "min": -114.81748962402344, "p10": -56.381929779052726, "median": 57.98297882080078, "p90": 162.84708251953126, "max": 194.22743225097656, "pos_frac": 0.765625, "sample": [179.273681640625, 39.76907730102539, -36.22692108154297, 133.97618103027344, -65.80628204345703, 67.55062103271484, 22.08538055419922, 133.28530883789062, 140.88975524902344, 12.09292221069336, 129.83946228027344, 163.09469604492188, 96.17109680175781, 73.81690216064453, 15.568687438964844, 1.3286399841308594, 163.84576416015625, -38.87116622924805, 148.50741577148438, 194.22743225097656, 85.07078552246094, 158.91696166992188, 65.45539855957031, 69.07084655761719, -83.6407470703125, -2.7293338775634766, -114.81748962402344, 129.8679962158203, 34.761253356933594, 55.695167541503906, 29.652679443359375, 117.17996215820312, 167.73080444335938, 56.66680908203125, 51.876495361328125, 15.791572570800781, 92.42889404296875, -84.69951629638672, 162.26931762695312, 16.169754028320312, 109.75792694091797, 153.43699645996094, -72.49793243408203, -80.41189575195312, 176.47967529296875, -60.71971893310547, 188.74440002441406, 130.017822265625, 7.276002883911133, -25.48033905029297, 28.239646911621094, 60.68053436279297, 114.25386047363281, 160.05479431152344, 52.6357536315918, 24.910648345947266, -14.837318420410156, 21.901226043701172, -14.426925659179688, 107.862548828125, 59.29914855957031, 151.88082885742188, -46.26042175292969, -22.26864242553711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000354.npy"}
|
|
{"epoch": 0.5351473922902494, "step": 355, "batch_size": 64, "mean": 60.377410888671875, "std": 86.48314666748047, "min": -146.48037719726562, "p10": -61.97069778442382, "median": 57.95406532287598, "p90": 169.20809478759764, "max": 200.64593505859375, "pos_frac": 0.71875, "sample": [106.7264404296875, 177.87045288085938, -66.45396423339844, 1.7430572509765625, 186.8216552734375, 74.92414855957031, 47.817420959472656, -13.623708724975586, -70.3554916381836, 73.97359466552734, 57.27226257324219, 27.790069580078125, 93.8225326538086, -68.17861938476562, 156.71038818359375, 113.41935729980469, -3.2681007385253906, 59.86222457885742, -78.42225646972656, 135.73159790039062, -70.27339172363281, 70.32562255859375, 25.052780151367188, 169.10939025878906, 142.82510375976562, 45.4993896484375, 117.26898956298828, 37.790218353271484, 121.75009155273438, 200.64593505859375, -51.509742736816406, 17.107303619384766, -10.394359588623047, 100.71954345703125, 11.871322631835938, -146.48037719726562, 108.58293151855469, -10.511627197265625, 58.635868072509766, 158.23045349121094, -17.070274353027344, 18.914215087890625, 136.16384887695312, -10.238418579101562, -3.9476146697998047, 187.92657470703125, 169.25039672851562, 10.80813217163086, 162.14173889160156, -138.58409118652344, 157.84515380859375, 159.47340393066406, 174.2006378173828, -2.870471954345703, 161.62339782714844, -32.69843292236328, -7.161247253417969, 28.345455169677734, 181.47140502929688, 7.0460205078125, 139.52154541015625, 30.073518753051758, 99.77564239501953, 141.71527099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000355.npy"}
|
|
{"epoch": 0.5366591080876795, "step": 356, "batch_size": 64, "mean": 49.85102081298828, "std": 92.7762451171875, "min": -169.23538208007812, "p10": -71.62035369873047, "median": 47.434736251831055, "p90": 167.37619934082034, "max": 229.5451202392578, "pos_frac": 0.671875, "sample": [158.32241821289062, 175.8282012939453, 59.88853454589844, -83.00442504882812, 149.36448669433594, 18.065155029296875, 1.0441513061523438, 229.5451202392578, 99.49690246582031, 1.9215946197509766, -118.96369934082031, -149.47332763671875, 101.91796875, -101.00281524658203, 162.78524780273438, 176.54635620117188, 13.757438659667969, 24.38723373413086, 152.67689514160156, -1.0561962127685547, 172.25534057617188, -169.23538208007812, 169.34375, 34.782684326171875, 4.4573974609375, 174.8424530029297, -73.83412170410156, -41.85630798339844, 75.70964050292969, 48.119171142578125, 87.26309204101562, -7.020164489746094, -9.190183639526367, 3.538482666015625, -65.1332015991211, 121.41885375976562, 144.31048583984375, -66.45489501953125, 59.93611145019531, 138.0402069091797, -6.895313262939453, 6.0345611572265625, 161.36126708984375, -29.795242309570312, 51.29591369628906, 97.98847198486328, 98.75199890136719, -0.48352813720703125, -56.923789978027344, 83.11521911621094, -2.506763458251953, 104.70281219482422, 157.9774169921875, 173.88751220703125, -8.555763244628906, 118.71952056884766, 46.750301361083984, -23.36731719970703, 161.32437133789062, -28.790023803710938, -80.05876159667969, 157.02523803710938, 41.34722900390625, 94.21920776367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000356.npy"}
|
|
{"epoch": 0.5381708238851096, "step": 357, "batch_size": 64, "mean": 42.56879425048828, "std": 79.78372192382812, "min": -171.39349365234375, "p10": -48.586166763305656, "median": 37.44011688232422, "p90": 153.80740051269532, "max": 198.76121520996094, "pos_frac": 0.71875, "sample": [146.31356811523438, 0.4952411651611328, 144.80526733398438, -21.921646118164062, -171.39349365234375, 154.72491455078125, 39.8681640625, 60.94179153442383, 30.879226684570312, 35.01206970214844, -13.77264404296875, 52.17174530029297, -42.941917419433594, 12.629558563232422, 165.89524841308594, 84.138671875, 148.73797607421875, 85.62327575683594, 84.3367691040039, 4.179607391357422, 107.71260833740234, 55.778900146484375, 64.41789245605469, 18.643465042114258, 25.798110961914062, 91.25153350830078, -5.38648796081543, -7.460197448730469, -63.93450927734375, 61.934566497802734, -56.97526550292969, 8.504110336303711, -26.403217315673828, 96.20821380615234, 57.929176330566406, 167.08938598632812, 157.32337951660156, 46.84584045410156, 105.64382934570312, 165.26910400390625, -19.97557830810547, 65.67111206054688, 6.9085693359375, 5.069450378417969, 170.56149291992188, 33.17607116699219, 2.6196517944335938, -20.0955810546875, -51.005130767822266, 34.4534912109375, 41.43851852416992, 198.76121520996094, 94.58495330810547, 15.608936309814453, 116.82337188720703, 46.78251647949219, -1.5946483612060547, -1.268280029296875, 140.67971801757812, -119.71217346191406, -97.49275207519531, -159.2805633544922, 151.66653442382812, -0.8920078277587891], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000357.npy"}
|
|
{"epoch": 0.5396825396825397, "step": 358, "batch_size": 64, "mean": 59.906036376953125, "std": 83.51438903808594, "min": -127.78465270996094, "p10": -38.18862419128418, "median": 42.3057975769043, "p90": 168.5807632446289, "max": 262.801513671875, "pos_frac": 0.78125, "sample": [126.67216491699219, 71.2938232421875, 102.37278747558594, 169.74658203125, -54.87065124511719, 179.30072021484375, 156.09637451171875, -40.30315399169922, -8.60440444946289, 22.08038330078125, -11.531208038330078, 128.83114624023438, 44.570274353027344, 119.25994873046875, 152.36404418945312, 50.61991882324219, -57.907501220703125, 0.5885467529296875, 116.435302734375, 22.651851654052734, 14.721435546875, 38.7916374206543, -33.410484313964844, 109.31754302978516, 160.25094604492188, 9.954841613769531, 32.51031494140625, 36.75617980957031, -36.15293884277344, 21.486011505126953, -78.15776824951172, 44.560768127441406, -86.896240234375, 6.521886825561523, -36.94207763671875, 54.888397216796875, 120.69482421875, 33.7105712890625, 16.405189514160156, 0.44895362854003906, 171.77601623535156, 216.79473876953125, 262.801513671875, 10.46822738647461, 205.65350341796875, 165.8605194091797, 92.10352325439453, -38.72285842895508, 40.05082702636719, 69.07147216796875, -1.350494384765625, -33.624122619628906, -127.78465270996094, 154.93417358398438, 10.330780029296875, 154.86065673828125, 26.874317169189453, 86.97409057617188, 16.352783203125, 104.9964599609375, 135.34796142578125, 136.2830810546875, 54.365386962890625, 200.44142150878906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000358.npy"}
|
|
{"epoch": 0.5411942554799698, "step": 359, "batch_size": 64, "mean": 29.710281372070312, "std": 75.5799789428711, "min": -120.67880249023438, "p10": -58.332238006591794, "median": 26.32537841796875, "p90": 137.76069946289064, "max": 197.89683532714844, "pos_frac": 0.65625, "sample": [-53.537750244140625, 91.23410034179688, -23.279571533203125, 7.4796600341796875, 61.93517303466797, 37.806358337402344, -25.817794799804688, 30.36273956298828, -52.704559326171875, -79.0609130859375, 197.89683532714844, 59.29498291015625, -4.303436279296875, 34.58184814453125, -2.800811767578125, 78.81146240234375, 157.12054443359375, -59.102317810058594, -5.9344482421875, -117.28945922851562, 26.318668365478516, 10.590740203857422, 31.233306884765625, -56.53538513183594, 20.38739013671875, 143.33425903320312, 70.60897827148438, 22.699100494384766, 52.90887451171875, -110.66508483886719, 186.98809814453125, 187.50506591796875, -43.600738525390625, 138.20924377441406, 45.0791015625, 27.764328002929688, 1.2460098266601562, 109.55318450927734, 37.33583068847656, 3.5740890502929688, -79.77519989013672, 104.18109893798828, 136.71409606933594, 74.7278823852539, 16.60193634033203, 81.5047607421875, -45.958534240722656, -101.83590698242188, -50.12214660644531, 72.53189086914062, 39.8868293762207, -5.204099655151367, -120.67880249023438, -12.135591506958008, 87.2633285522461, -3.7371749877929688, 82.04353332519531, 6.497785568237305, 60.37384033203125, 134.1593475341797, 26.332088470458984, 163.15322875976562, 7.458484649658203, -9.752395629882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000359.npy"}
|
|
{"epoch": 0.5427059712773998, "step": 360, "batch_size": 64, "mean": 50.43164825439453, "std": 77.4580078125, "min": -119.71013641357422, "p10": -31.207313156127924, "median": 27.683589935302734, "p90": 158.82595825195312, "max": 212.16708374023438, "pos_frac": 0.65625, "sample": [-27.065597534179688, 155.8745880126953, 26.5220947265625, 9.126800537109375, 58.00792694091797, 104.96578216552734, -3.679779052734375, 34.00802993774414, 35.08987808227539, 165.71542358398438, 0.20949745178222656, -64.018798828125, -8.520889282226562, -73.8941650390625, 143.24166870117188, -73.65010070800781, 198.27395629882812, -32.98233413696289, -0.4124488830566406, 143.35572814941406, 33.5600700378418, -3.8135147094726562, -55.13897705078125, 10.731842041015625, -2.4787750244140625, -4.728691101074219, 132.63487243652344, -0.29712677001953125, 100.18025970458984, -10.788772583007812, 42.48230743408203, 16.060630798339844, 101.71517944335938, -36.819488525390625, 28.84508514404297, 159.87698364257812, -13.913726806640625, 96.31529235839844, 160.19564819335938, 24.4390869140625, 195.53860473632812, -14.499481201171875, 150.36325073242188, -12.495687484741211, 212.16708374023438, 19.620872497558594, 156.37356567382812, 16.92107391357422, 80.93603515625, 132.43820190429688, 124.34845733642578, 62.62811279296875, 174.1166534423828, -18.292383193969727, -10.417402267456055, 74.59898376464844, -5.743711471557617, -119.71013641357422, 17.21001625061035, 131.6486053466797, 35.525360107421875, 22.392173767089844, 105.69257354736328, 127.03941345214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000360.npy"}
|
|
{"epoch": 0.54421768707483, "step": 361, "batch_size": 64, "mean": 60.17532730102539, "std": 76.42255401611328, "min": -111.41645812988281, "p10": -28.907833862304685, "median": 58.73846626281738, "p90": 163.62916107177733, "max": 189.60162353515625, "pos_frac": 0.765625, "sample": [78.13707733154297, 119.62561798095703, 17.546680450439453, -8.773460388183594, 165.3381805419922, 30.09855079650879, -52.01382827758789, -111.41645812988281, 57.334049224853516, 41.03020095825195, 120.67863464355469, -44.969966888427734, -4.183326721191406, 153.230224609375, 109.68807983398438, 99.88573455810547, -25.202625274658203, 6.133571624755859, 60.14288330078125, 10.649688720703125, 113.8701171875, 61.217735290527344, 12.174400329589844, 163.64230346679688, 146.58016967773438, 163.59849548339844, 140.94338989257812, -99.69816589355469, 148.02139282226562, 137.73660278320312, 27.979358673095703, -5.2548828125, 121.93881225585938, 189.60162353515625, 169.54244995117188, 86.926513671875, 157.32298278808594, 107.0728988647461, -60.11517333984375, -8.002906799316406, -1.4655323028564453, 2.3941268920898438, 169.3223419189453, 157.3163299560547, 5.311199188232422, 165.0183868408203, 136.8306884765625, 10.656883239746094, -55.93268585205078, -27.263107299804688, 74.72364807128906, 17.861526489257812, 77.896728515625, 12.52035140991211, 171.78521728515625, 26.604520797729492, 146.58169555664062, 0.16930389404296875, 14.786422729492188, 67.91761016845703, 65.14252471923828, 48.14027404785156, -3.542346954345703, -29.612716674804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000361.npy"}
|
|
{"epoch": 0.54572940287226, "step": 362, "batch_size": 64, "mean": 63.55701446533203, "std": 96.1629867553711, "min": -163.8782958984375, "p10": -36.570859527587885, "median": 50.05332946777344, "p90": 165.07369842529297, "max": 389.85540771484375, "pos_frac": 0.703125, "sample": [-58.346885681152344, 3.343870162963867, -12.2303466796875, 1.6960315704345703, -9.627647399902344, -163.8782958984375, 36.249839782714844, 71.7408447265625, 389.85540771484375, 157.96241760253906, 25.87334442138672, -23.82516860961914, 22.866477966308594, -17.958084106445312, -3.407562255859375, -5.857120513916016, 94.49043273925781, 77.08523559570312, 98.75810241699219, 175.07293701171875, -2.1313304901123047, 40.85498046875, 12.755613327026367, 112.69024658203125, 142.66873168945312, -0.87835693359375, 146.75064086914062, -3.301971435546875, 4.624385833740234, 126.10041809082031, 246.746826171875, -0.2598094940185547, -27.845367431640625, 161.73757934570312, 35.12561798095703, 212.68304443359375, 156.0919189453125, 24.6551513671875, 83.64046478271484, -25.499359130859375, 1.1312522888183594, 145.62237548828125, 161.3587646484375, 166.5034637451172, 31.849136352539062, 59.251678466796875, 128.75498962402344, 196.43006896972656, 129.04139709472656, 146.92214965820312, -40.31035614013672, 31.642852783203125, 151.44583129882812, -123.80335998535156, 80.51522064208984, 90.56929779052734, 154.8878173828125, -99.77377319335938, -56.419532775878906, 119.45066833496094, 194.30699157714844, -83.05360412597656, 93.14627075195312, 81.10597229003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000362.npy"}
|
|
{"epoch": 0.54724111866969, "step": 363, "batch_size": 64, "mean": 60.52595520019531, "std": 83.82249450683594, "min": -153.4051055908203, "p10": -14.682393836975093, "median": 39.49409866333008, "p90": 181.4228271484375, "max": 288.1968078613281, "pos_frac": 0.859375, "sample": [95.06781768798828, -120.19497680664062, 170.79052734375, 52.73369598388672, 92.57034301757812, 33.98735809326172, 12.711349487304688, 195.4229278564453, 22.62738037109375, -52.66471481323242, 1.9210662841796875, 76.88168334960938, 288.1968078613281, 222.573974609375, 15.938056945800781, 188.97390747070312, 31.993366241455078, 98.81136322021484, 27.501792907714844, 148.05615234375, 19.379535675048828, 122.94131469726562, 12.69822883605957, 182.2377471923828, 160.79051208496094, 20.59440803527832, 1.543426513671875, 21.190475463867188, -16.755775451660156, 35.157752990722656, -9.844503402709961, -153.4051055908203, 81.7228012084961, -111.3326416015625, 81.21456909179688, 69.24967956542969, 43.8304443359375, 118.25362396240234, 25.75887107849121, -1.510009765625, 10.311283111572266, 11.822578430175781, 32.917327880859375, 179.52134704589844, 2.9291820526123047, 190.73794555664062, 112.78575134277344, 48.85204315185547, 161.49359130859375, 8.749656677246094, 138.6123809814453, 1.5714950561523438, 140.15220642089844, 97.83073425292969, -66.07603454589844, 76.99459838867188, 182.29774475097656, -25.060832977294922, 81.59071350097656, 27.83792495727539, 2.554746627807617, 90.10812377929688, 46.31837463378906, 11.192983627319336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000363.npy"}
|
|
{"epoch": 0.5487528344671202, "step": 364, "batch_size": 64, "mean": 67.76409149169922, "std": 99.25979614257812, "min": -166.92250061035156, "p10": -36.52510375976562, "median": 64.81753540039062, "p90": 192.04533996582032, "max": 268.62762451171875, "pos_frac": 0.734375, "sample": [203.24859619140625, -6.314544677734375, 29.253211975097656, 177.98959350585938, 189.84927368164062, 14.406074523925781, 70.80646514892578, 138.82867431640625, -16.018043518066406, 205.58761596679688, 221.97547912597656, 135.96725463867188, -11.309505462646484, -58.712974548339844, 118.66326904296875, 163.10655212402344, 44.98804473876953, -19.210376739501953, 268.62762451171875, 0.35150146484375, 192.98651123046875, 69.36276245117188, 140.9468536376953, 167.47804260253906, -144.91783142089844, -40.782859802246094, 186.4671630859375, 4.750268936157227, 0.9549655914306641, 138.87498474121094, 168.21697998046875, 68.6978988647461, -7.490631103515625, 81.7865982055664, 21.944900512695312, 1.0802459716796875, 130.3124542236328, 209.70196533203125, -38.580650329589844, 23.467607498168945, 129.4603729248047, 90.61425018310547, -31.72882843017578, -111.27021789550781, 236.5615234375, 10.32980728149414, -4.0893402099609375, 164.66796875, 6.3146514892578125, 141.10345458984375, 10.541458129882812, 149.99501037597656, -127.09039306640625, 60.937171936035156, -166.92250061035156, 14.72930908203125, 80.32015228271484, 127.47380065917969, -2.057851791381836, 10.278717041015625, -3.176990509033203, -29.705162048339844, 164.96475219726562, 167.3090362548828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000364.npy"}
|
|
{"epoch": 0.5502645502645502, "step": 365, "batch_size": 64, "mean": 62.72900390625, "std": 73.85322570800781, "min": -106.23329162597656, "p10": -7.631257438659663, "median": 57.08635139465332, "p90": 161.3269256591797, "max": 298.74945068359375, "pos_frac": 0.796875, "sample": [43.69297790527344, 18.028940200805664, 93.46661376953125, 0.214630126953125, 62.002227783203125, 1.6005897521972656, 153.593994140625, 118.98050689697266, 135.0629425048828, 79.14642333984375, 161.76254272460938, 68.23693084716797, -106.23329162597656, -2.827320098876953, 22.117462158203125, 160.31048583984375, -1.8627128601074219, -1.6514015197753906, -9.507225036621094, 0.7660980224609375, 84.372802734375, 81.40113067626953, 136.66983032226562, 3.324289321899414, -0.9160900115966797, -91.41461181640625, 73.12947845458984, 168.9321746826172, 41.72510528564453, 97.46257781982422, 5.9228363037109375, 4.435548782348633, 165.92703247070312, 68.66455078125, 146.6258087158203, 87.9705810546875, 152.1905517578125, -3.253999710083008, 59.71681594848633, 52.022544860839844, 166.30528259277344, 54.45588684082031, 49.280303955078125, -27.458038330078125, 105.04850769042969, 154.70257568359375, 5.57380485534668, -13.858320236206055, 298.74945068359375, 62.828460693359375, 83.99681854248047, 4.018613815307617, 143.270751953125, 13.715972900390625, -1.3525047302246094, 54.196502685546875, 5.09771728515625, -32.32246017456055, 86.57450866699219, 1.0859375, 171.94764709472656, 167.75015258789062, -9.508651733398438, 138.74688720703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000365.npy"}
|
|
{"epoch": 0.5517762660619804, "step": 366, "batch_size": 64, "mean": 52.58029556274414, "std": 96.54944610595703, "min": -176.3979949951172, "p10": -78.82183685302734, "median": 49.17613410949707, "p90": 165.72653503417968, "max": 238.67430114746094, "pos_frac": 0.703125, "sample": [-0.6569938659667969, 202.6416015625, 96.41112518310547, 79.11148071289062, -10.173942565917969, 153.62286376953125, -76.31190490722656, -37.34290313720703, -73.05572509765625, -8.132034301757812, -65.16168212890625, 146.33935546875, 20.231643676757812, 91.60787963867188, 76.39007568359375, 47.13031005859375, -91.96138000488281, 67.90054321289062, 2.561796188354492, 127.34455108642578, 76.34848022460938, 52.805686950683594, 70.2230224609375, -13.148723602294922, 24.602506637573242, 139.28843688964844, 51.22195816040039, 163.73709106445312, -176.3979949951172, -2.0897769927978516, 197.44268798828125, 5.043739318847656, -2.6742191314697266, 238.67430114746094, -106.61487579345703, 1.3032588958740234, -79.89752197265625, -94.18637084960938, 16.85568618774414, 43.91215133666992, 2.4975929260253906, -35.4212760925293, 3.020143508911133, 42.918701171875, -105.16481018066406, 16.993789672851562, 6.188459396362305, 232.68064880371094, 187.7023468017578, 70.70945739746094, 63.69047546386719, 158.6260528564453, 150.2918243408203, -114.39527893066406, 164.3466339111328, -44.088775634765625, 154.5516357421875, 163.9668731689453, 178.0947265625, 151.45252990722656, 166.0008544921875, 105.81969451904297, 165.08645629882812, 124.62393188476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000366.npy"}
|
|
{"epoch": 0.5532879818594104, "step": 367, "batch_size": 64, "mean": 52.391990661621094, "std": 80.5484848022461, "min": -171.24725341796875, "p10": -37.06993560791015, "median": 35.91695022583008, "p90": 155.15159149169924, "max": 192.83453369140625, "pos_frac": 0.796875, "sample": [1.7104454040527344, 192.83453369140625, 81.0494155883789, -11.967941284179688, 96.35975646972656, -1.403717041015625, 152.30886840820312, 151.7305908203125, 95.53977966308594, 3.105396270751953, 28.958343505859375, 23.643798828125, 10.383522033691406, 30.78839111328125, 157.30026245117188, 9.711261749267578, 71.15449523925781, -4.456352233886719, -54.024322509765625, 156.90328979492188, 8.966384887695312, 92.76469421386719, -9.346704483032227, -68.56581115722656, 90.4501953125, -163.9475860595703, -38.90528869628906, 93.9353256225586, 11.501972198486328, 26.652618408203125, 111.04116821289062, 145.7301025390625, 127.8575668334961, 109.72018432617188, 112.46910858154297, 3.778820037841797, 69.90263366699219, -69.38516235351562, 14.335676193237305, 155.5795135498047, 37.04753875732422, -171.24725341796875, 11.149072647094727, 183.06251525878906, 111.72469329833984, -32.787445068359375, 82.29414367675781, 19.361614227294922, 138.42234802246094, 23.609844207763672, 32.67781066894531, 10.126091003417969, -121.55115509033203, 147.86212158203125, 72.63551330566406, 168.07830810546875, 34.78636169433594, -20.602109909057617, 112.41371154785156, 160.58302307128906, 17.236801147460938, 78.33592987060547, 87.549560546875, 154.15310668945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000367.npy"}
|
|
{"epoch": 0.5547996976568406, "step": 368, "batch_size": 64, "mean": 50.212066650390625, "std": 78.47350311279297, "min": -166.54183959960938, "p10": -42.32307205200195, "median": 51.64575386047363, "p90": 158.68585968017578, "max": 178.56578063964844, "pos_frac": 0.734375, "sample": [-166.54183959960938, 137.4682159423828, 10.883171081542969, 25.724878311157227, 74.61080932617188, 108.0179214477539, 153.11355590820312, 78.7069320678711, -27.192588806152344, -18.783798217773438, 168.1355438232422, 4.181476593017578, 24.782012939453125, 42.533660888671875, 168.64013671875, 131.21896362304688, -22.841110229492188, -68.14118957519531, -67.415283203125, 90.75736236572266, 67.79742431640625, 33.50028991699219, 59.71692657470703, -39.385963439941406, 166.1200714111328, 19.065258026123047, -38.87391662597656, 159.21849060058594, 178.56578063964844, 48.74070358276367, -6.951955795288086, 175.68936157226562, 93.29711151123047, 1.4737892150878906, 34.495357513427734, 105.42768859863281, 55.062469482421875, 91.2667236328125, 28.72724151611328, 103.24464416503906, 57.004608154296875, -5.308286666870117, 129.73501586914062, 23.35040283203125, -32.86564636230469, -26.475318908691406, 47.53666305541992, -28.587799072265625, 157.44305419921875, -43.58183288574219, 69.13981628417969, 151.3825225830078, 162.9080810546875, 136.68707275390625, 10.560380935668945, 109.57496643066406, 54.550804138183594, -65.20918273925781, -88.58011627197266, 91.3507080078125, 117.86441040039062, 22.135515213012695, -99.51860046386719, 78.41848754882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000368.npy"}
|
|
{"epoch": 0.5563114134542706, "step": 369, "batch_size": 64, "mean": 43.130043029785156, "std": 80.4517593383789, "min": -150.8566131591797, "p10": -45.253584480285625, "median": 36.39826011657715, "p90": 166.3796356201172, "max": 238.51123046875, "pos_frac": 0.703125, "sample": [29.69695281982422, -97.03104400634766, 48.303993225097656, 95.0421142578125, -110.25160217285156, 55.68170166015625, 43.48238754272461, -2.5563812255859375, 168.4282684326172, 21.749980926513672, 21.22669219970703, 136.87094116210938, 142.29249572753906, 110.88431549072266, 164.50804138183594, 5.884132385253906, 73.11334228515625, 59.261016845703125, 23.80974578857422, 35.127132415771484, -11.1451416015625, -150.8566131591797, 63.943023681640625, -78.89251708984375, 45.70149230957031, 91.15663146972656, -25.16916275024414, 76.37590026855469, 65.7742919921875, -26.93617057800293, 125.70584106445312, -23.680511474609375, 58.653663635253906, 170.48751831054688, 83.58021545410156, 155.60023498535156, 179.40785217285156, 6.631561279296875, 32.24114990234375, -86.81694793701172, -17.825347900390625, 23.298072814941406, -11.599407196044922, 60.89452362060547, -10.409542083740234, 44.17823028564453, 238.51123046875, 3.9952621459960938, 173.72808837890625, -20.00128173828125, -3.8029403686523438, 37.66938781738281, 0.3355140686035156, 167.18174743652344, 1.9822463989257812, -5.491743087768555, -0.8133945465087891, 175.92800903320312, 16.306312561035156, 46.18475341796875, -53.103904724121094, 60.160369873046875, -104.72164916992188, 160.4514923095703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000369.npy"}
|
|
{"epoch": 0.5578231292517006, "step": 370, "batch_size": 64, "mean": 61.234703063964844, "std": 90.80966186523438, "min": -201.83445739746094, "p10": -31.405409240722648, "median": 61.85676574707031, "p90": 179.33501281738282, "max": 217.95486450195312, "pos_frac": 0.75, "sample": [76.17837524414062, 177.27310180664062, 124.32814025878906, 62.198951721191406, 217.95486450195312, 6.662757873535156, 181.69920349121094, -1.5172882080078125, 168.50445556640625, 14.349628448486328, 27.343643188476562, 80.84001159667969, 1.8263015747070312, 167.90240478515625, 195.3200225830078, 110.694580078125, 74.57133483886719, -110.85103607177734, -22.373458862304688, 71.3009033203125, 61.0625, 109.70283508300781, 96.99392700195312, 85.70027160644531, 175.2373809814453, 89.13446044921875, 82.26405334472656, 5.062992095947266, -10.037681579589844, -3.591522216796875, -2.084075927734375, -11.537681579589844, -8.572257995605469, -84.02140808105469, 27.6947021484375, 72.2543716430664, 204.6221923828125, 43.08583068847656, 164.39370727539062, 158.34385681152344, 195.21792602539062, -201.83445739746094, 19.658281326293945, 87.62745666503906, 12.923025131225586, 180.21868896484375, 116.70567321777344, 135.7764434814453, 150.9163818359375, 185.27252197265625, -167.6877899169922, 2.6726608276367188, 164.6754150390625, -3.0498600006103516, -35.2762451171875, 28.4359130859375, 31.8272705078125, -1.4522552490234375, -67.79547882080078, 3.084585189819336, 40.958534240722656, 157.9403076171875, -59.2241096496582, 61.51457977294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000370.npy"}
|
|
{"epoch": 0.5593348450491308, "step": 371, "batch_size": 64, "mean": 53.3720588684082, "std": 83.63795471191406, "min": -98.4363784790039, "p10": -58.331488037109374, "median": 40.85672378540039, "p90": 162.42227630615236, "max": 218.11856079101562, "pos_frac": 0.71875, "sample": [158.48724365234375, -86.73228454589844, 189.00222778320312, 33.943641662597656, 76.06997680664062, 8.220428466796875, 133.9048614501953, 19.70322608947754, -22.598678588867188, 12.077985763549805, -16.44012451171875, 128.07379150390625, -79.03813171386719, 181.71888732910156, 0.02739715576171875, 218.11856079101562, -2.961000442504883, 117.79866027832031, 25.096824645996094, 164.1087188720703, 56.52110290527344, -89.61677551269531, -1.8572406768798828, 2.5527801513671875, 32.873443603515625, 146.24276733398438, 137.5234375, 208.48263549804688, 128.3157958984375, 76.54678344726562, 150.6446533203125, 184.22561645507812, -59.88420867919922, -9.929000854492188, 96.486328125, 29.47412109375, -14.873741149902344, -98.4363784790039, -24.770675659179688, 176.60911560058594, -84.23059844970703, 25.76633071899414, 1.6359844207763672, 82.09115600585938, 63.733795166015625, -44.868003845214844, 116.68376922607422, 87.73941802978516, -46.44274139404297, 42.732330322265625, 120.357666015625, -15.07632827758789, 7.0177154541015625, 101.0596923828125, 145.56631469726562, 0.1051788330078125, 139.81277465820312, 38.981117248535156, 75.77485656738281, 67.15715789794922, 145.00881958007812, 86.49134063720703, -54.708473205566406, -72.290283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000371.npy"}
|
|
{"epoch": 0.5608465608465608, "step": 372, "batch_size": 64, "mean": 31.984268188476562, "std": 99.10271453857422, "min": -169.83364868164062, "p10": -100.53034210205078, "median": 14.17365837097168, "p90": 172.2485809326172, "max": 200.60989379882812, "pos_frac": 0.640625, "sample": [-28.248367309570312, 15.886890411376953, 165.22982788085938, 98.73786926269531, 77.93843078613281, 98.1623764038086, 180.82244873046875, -38.856178283691406, 151.32821655273438, 140.11962890625, 196.51962280273438, -7.00604248046875, 113.50025939941406, 195.53823852539062, 50.04704284667969, 127.99467468261719, 136.9172821044922, -163.06051635742188, 11.183784484863281, 17.30622100830078, 6.701473236083984, -0.9382915496826172, 36.2344970703125, 171.81983947753906, 7.593723297119141, -22.063514709472656, -101.00067138671875, -3.756805419921875, 179.22348022460938, -99.43290710449219, 100.47686004638672, 34.74400329589844, -62.361812591552734, -2.472698211669922, -121.99223327636719, 9.808355331420898, -169.83364868164062, 12.460426330566406, 137.62582397460938, 20.2800350189209, 200.60989379882812, 74.70889282226562, 156.60379028320312, 39.64210510253906, -69.00155639648438, 90.03582763671875, -166.3041534423828, 2.2893600463867188, -0.5366764068603516, 2.8506546020507812, 8.683975219726562, -150.00819396972656, 110.16792297363281, 5.067527770996094, -44.78492736816406, -56.571250915527344, -139.9888153076172, 65.07891082763672, 177.66461181640625, 172.4323272705078, -91.30595397949219, -24.578140258789062, -7.491218566894531, 18.550708770751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000372.npy"}
|
|
{"epoch": 0.562358276643991, "step": 373, "batch_size": 64, "mean": 44.88999557495117, "std": 97.07161712646484, "min": -175.6559295654297, "p10": -90.12879943847656, "median": 47.73423194885254, "p90": 172.44042358398437, "max": 215.1804656982422, "pos_frac": 0.6875, "sample": [-143.80323791503906, 159.29652404785156, 133.87538146972656, 185.21923828125, 155.009765625, -140.92938232421875, -45.377967834472656, 123.1182861328125, 117.84528350830078, 57.12323760986328, 136.1373291015625, -138.68829345703125, 17.807205200195312, -45.98936462402344, -102.93872833251953, -16.000537872314453, 72.91996002197266, 104.7560806274414, -175.6559295654297, -54.56416320800781, 74.80470275878906, -12.220115661621094, 93.62255096435547, -92.3515396118164, 171.4735565185547, 157.40390014648438, -28.522628784179688, -13.926681518554688, 82.44390106201172, 105.20597076416016, 172.78707885742188, 0.555419921875, 46.84212112426758, 171.63156127929688, -84.9424057006836, 0.3923759460449219, 9.786651611328125, 96.85617065429688, 2.8734512329101562, 154.40594482421875, 114.17753601074219, -76.55126953125, -99.43560791015625, -35.17923355102539, 50.49845886230469, 121.82935333251953, -62.50270080566406, 179.01104736328125, 20.842140197753906, 55.514373779296875, -13.696426391601562, 177.40919494628906, 2.8390655517578125, 205.48318481445312, 48.6263427734375, 37.728084564208984, 75.02609252929688, 75.19064331054688, 21.447471618652344, 215.1804656982422, 44.71504211425781, -1.8563423156738281, 33.99607849121094, 174.38397216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000373.npy"}
|
|
{"epoch": 0.563869992441421, "step": 374, "batch_size": 64, "mean": 73.99679565429688, "std": 92.26471710205078, "min": -193.73585510253906, "p10": -17.540626525878906, "median": 66.01351165771484, "p90": 186.24233703613282, "max": 290.3999328613281, "pos_frac": 0.8125, "sample": [290.3999328613281, 171.29037475585938, 157.71649169921875, -11.807365417480469, 32.41609573364258, 148.66009521484375, 187.0751190185547, 192.60971069335938, 181.30157470703125, 50.16320037841797, 81.81708526611328, 0.6874237060546875, 107.50920104980469, 237.42132568359375, 99.43204498291016, 30.025421142578125, 151.93572998046875, 4.66351318359375, 33.4200439453125, 67.58683013916016, 27.922569274902344, 142.09359741210938, 48.245235443115234, -133.650146484375, 108.85589599609375, 107.84331512451172, 58.89351272583008, 163.728759765625, -7.30303955078125, 126.21249389648438, 65.37174987792969, 85.39617919921875, -0.3509063720703125, -16.616226196289062, 100.94476318359375, 66.6552734375, -8.539093017578125, -193.73585510253906, 235.78805541992188, 99.93527221679688, 41.35957717895508, -115.5978012084961, 30.321752548217773, 212.2647247314453, 206.6807861328125, 56.867408752441406, -75.07583618164062, -17.936798095703125, 63.38038635253906, 49.952613830566406, 134.99282836914062, 140.13278198242188, 184.29917907714844, 70.40205383300781, 3.3815650939941406, 2.5406932830810547, 61.08043670654297, 112.23464965820312, 158.44357299804688, 163.00738525390625, -54.26560974121094, -36.86082458496094, 50.87696838378906, 1.2970123291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000374.npy"}
|
|
{"epoch": 0.5653817082388511, "step": 375, "batch_size": 64, "mean": 53.71406555175781, "std": 91.5151596069336, "min": -169.88059997558594, "p10": -67.89316101074218, "median": 38.685386657714844, "p90": 168.38937530517578, "max": 261.0669250488281, "pos_frac": 0.703125, "sample": [-71.31375885009766, -88.17996978759766, 67.60401153564453, 127.38827514648438, 118.83196258544922, 31.257131576538086, 199.59808349609375, -6.104190826416016, 22.17095947265625, 102.55657958984375, -5.281440734863281, 140.14207458496094, -1.23089599609375, 160.80101013183594, 5.257938385009766, -2.9932594299316406, 37.376434326171875, -102.79620361328125, 112.5049819946289, -4.392341613769531, 64.18647766113281, 180.04931640625, 168.54562377929688, -99.99484252929688, 142.82858276367188, 170.35336303710938, 4.596992492675781, -38.072479248046875, 78.93314361572266, 158.37486267089844, 12.511360168457031, 3.3727474212646484, 8.972734451293945, -57.461647033691406, 38.40081787109375, 96.3905258178711, 94.52997589111328, 37.61070251464844, 177.2526397705078, 96.83544921875, 147.16372680664062, 55.85545349121094, 127.4798583984375, -169.88059997558594, -36.29637145996094, 38.96995544433594, 197.7886962890625, 15.251968383789062, 261.0669250488281, 106.70042419433594, 168.02479553222656, 89.29287719726562, -24.84076690673828, 157.9779815673828, -40.73036575317383, 5.145179748535156, 109.23766326904297, -101.59501647949219, -59.911766052246094, 163.6682586669922, -8.165931701660156, 117.91875457763672, 15.381172180175781, -79.21633911132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000375.npy"}
|
|
{"epoch": 0.5668934240362812, "step": 376, "batch_size": 64, "mean": 64.81566619873047, "std": 98.02803039550781, "min": -154.53067016601562, "p10": -68.99368743896484, "median": 58.46989059448242, "p90": 183.50187835693362, "max": 308.5480041503906, "pos_frac": 0.640625, "sample": [43.20379638671875, -5.69647216796875, -2.6287403106689453, 35.13372802734375, -6.824058532714844, -12.817028045654297, -7.6955108642578125, 134.9705047607422, -12.52935791015625, 58.14079284667969, 102.06594848632812, 102.56629943847656, -154.53067016601562, -1.5469284057617188, -81.52290344238281, 10.2716064453125, 195.5726318359375, 83.13259887695312, -3.2114524841308594, 308.5480041503906, 65.74822998046875, 197.13909912109375, 5.76953125, 159.77359008789062, 215.500732421875, -21.95831298828125, 147.3446044921875, 149.33473205566406, 154.412353515625, 107.24344635009766, 113.95359802246094, 58.798988342285156, 171.30105590820312, 51.17878723144531, 94.1197738647461, 186.25657653808594, 216.94094848632812, -64.76983642578125, 175.40077209472656, -70.80390930175781, 67.29456329345703, -88.60481262207031, 171.0601806640625, 141.67166137695312, 127.38824462890625, 216.55386352539062, -84.89810943603516, 137.11248779296875, -52.55096435546875, 154.57066345214844, -81.17705535888672, 12.90386962890625, 25.544235229492188, 52.40907669067383, 177.07424926757812, -7.104822158813477, 152.60813903808594, -16.98781967163086, 167.09503173828125, 114.98948669433594, -46.59376525878906, -6.543581008911133, -0.7012901306152344, -82.19819641113281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000376.npy"}
|
|
{"epoch": 0.5684051398337112, "step": 377, "batch_size": 64, "mean": 47.00309753417969, "std": 84.81757354736328, "min": -163.08633422851562, "p10": -51.163864135742166, "median": 34.43528938293457, "p90": 161.22923736572267, "max": 210.0860595703125, "pos_frac": 0.71875, "sample": [127.44950866699219, -31.353904724121094, 139.8924560546875, 137.74322509765625, 79.96461486816406, 7.352470397949219, -114.92784881591797, -89.2220458984375, 52.202659606933594, 28.474571228027344, 166.22515869140625, 0.32483673095703125, 122.99999237060547, -2.772003173828125, 154.68321228027344, 139.40670776367188, -114.52156829833984, 80.93399810791016, -59.653846740722656, 2.6959686279296875, 186.29168701171875, -2.1516265869140625, 78.19895935058594, 31.2698974609375, 124.64093017578125, -155.9750518798828, 21.927207946777344, 210.0860595703125, 37.4902458190918, -26.097890853881836, 156.7491455078125, -9.288721084594727, 131.60260009765625, 1.1548995971679688, -13.093765258789062, -65.76349639892578, 49.8211669921875, 31.380332946777344, 174.69393920898438, 23.277450561523438, 0.1313629150390625, 87.87203216552734, 7.904298782348633, 91.79979705810547, 76.54940795898438, 171.39013671875, 81.11723327636719, -11.558155059814453, -20.11767578125, 48.48674011230469, 150.43154907226562, 23.275054931640625, -20.349435806274414, 166.3332061767578, 7.5252532958984375, -11.793649673461914, 163.14927673339844, 73.9372787475586, -163.08633422851562, 10.416801452636719, 103.36997985839844, -10.133338928222656, 73.68168640136719, 93.75361633300781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000377.npy"}
|
|
{"epoch": 0.5699168556311414, "step": 378, "batch_size": 64, "mean": 43.057861328125, "std": 83.45757293701172, "min": -162.2574005126953, "p10": -30.31665935516357, "median": 21.117774963378906, "p90": 164.15951385498047, "max": 211.45880126953125, "pos_frac": 0.6875, "sample": [139.69888305664062, 85.78984832763672, 0.8065662384033203, 21.03643798828125, 91.19896697998047, 164.1031494140625, -56.85331726074219, 20.864093780517578, 64.42179870605469, 148.10708618164062, -162.2574005126953, 2.8845958709716797, 211.45880126953125, -8.026248931884766, 170.70985412597656, 110.68329620361328, 77.1049575805664, 1.6684532165527344, 168.07955932617188, -5.455295562744141, 72.90855407714844, -2.085113525390625, 14.410869598388672, 44.08959197998047, 164.1836700439453, 12.9654541015625, 76.3432388305664, 17.346832275390625, -94.26972961425781, 16.80008316040039, 95.2647705078125, 24.986968994140625, -92.72872924804688, -3.481077194213867, 124.50210571289062, 23.59996795654297, -21.547958374023438, 21.199111938476562, 148.30496215820312, -1.1980304718017578, 194.8572540283203, 77.37211608886719, -6.46844482421875, 139.17672729492188, 24.16253662109375, 140.98863220214844, -15.07504653930664, 166.8368682861328, 83.65251922607422, 12.305097579956055, 3.686809539794922, -10.629974365234375, 5.880863189697266, -133.47317504882812, -31.850505828857422, 72.93252563476562, -26.73768424987793, 101.20736694335938, -0.4353160858154297, 69.94440460205078, -12.949508666992188, -159.21986389160156, -10.737321853637695, 182.65658569335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000378.npy"}
|
|
{"epoch": 0.5714285714285714, "step": 379, "batch_size": 64, "mean": 61.99034881591797, "std": 90.77055358886719, "min": -138.94259643554688, "p10": -44.94855308532714, "median": 37.792049407958984, "p90": 178.74525299072266, "max": 259.66192626953125, "pos_frac": 0.8125, "sample": [93.555419921875, -6.91461181640625, 155.80911254882812, -9.705429077148438, 19.67156982421875, 62.30766296386719, 22.73842430114746, -138.94259643554688, 74.23802185058594, 9.983505249023438, 28.98682403564453, 58.201385498046875, -73.23835754394531, 132.8937530517578, 13.694572448730469, 259.66192626953125, 26.9639892578125, -48.792903900146484, 155.0638427734375, 194.38511657714844, 255.12579345703125, 113.2286376953125, 169.6055145263672, 150.58981323242188, 80.86721801757812, -70.11907958984375, 115.1153335571289, -95.93693542480469, 34.88349151611328, -62.94793701171875, 236.67599487304688, 39.67047882080078, 2.2218017578125, 53.3984489440918, -129.38607788085938, 197.64532470703125, 162.42376708984375, 18.57733917236328, 23.9647216796875, -2.937023162841797, -35.97840118408203, 7.451316833496094, 79.5304946899414, 180.33779907226562, 175.02931213378906, 17.79513931274414, 169.77125549316406, 245.07455444335938, 4.963207244873047, 5.2396392822265625, 114.83689880371094, 16.982696533203125, 35.51800537109375, 65.97281646728516, 136.90530395507812, 1.5931320190429688, 35.91361999511719, 69.34385681152344, 42.68015670776367, 4.504617691040039, 120.76984405517578, 23.896835327148438, 132.35560607910156, -6.333282470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000379.npy"}
|
|
{"epoch": 0.5729402872260015, "step": 380, "batch_size": 64, "mean": 73.35437774658203, "std": 91.5849609375, "min": -172.58038330078125, "p10": -31.9120246887207, "median": 78.81344223022461, "p90": 175.66658782958987, "max": 244.46224975585938, "pos_frac": 0.78125, "sample": [49.725040435791016, 156.15631103515625, 94.13088989257812, 45.2486572265625, -74.08514404296875, -123.2807388305664, 168.356201171875, 195.12481689453125, -3.9518394470214844, 158.676025390625, 133.4004364013672, 34.83396530151367, -4.585292816162109, 81.67105102539062, 1.3221931457519531, -33.46992492675781, -5.013721466064453, 66.32470703125, 146.43936157226562, 169.9336395263672, 88.04228210449219, 75.9558334350586, -11.919998168945312, -172.58038330078125, 162.95603942871094, 3.675994873046875, 136.34481811523438, 70.72399139404297, -36.17295837402344, 21.414030075073242, 152.9119873046875, 179.25543212890625, 93.8710708618164, 56.19148635864258, 244.46224975585938, 8.666820526123047, 160.36239624023438, 140.65115356445312, 47.04139709472656, -0.08718109130859375, 155.37939453125, -144.41586303710938, 168.8473358154297, 171.6885528564453, 63.105003356933594, 153.03457641601562, -28.27692413330078, 105.99864196777344, -21.751495361328125, 135.9992218017578, 24.70721435546875, 3.828084945678711, -75.07185363769531, 198.70773315429688, 181.8467254638672, 168.8387451171875, 10.26860237121582, 204.32723999023438, 141.26840209960938, 177.3714599609375, 84.70250701904297, 95.29581451416016, 12.82377815246582, 27.434295654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000380.npy"}
|
|
{"epoch": 0.5744520030234316, "step": 381, "batch_size": 64, "mean": 28.03717803955078, "std": 93.15133666992188, "min": -221.94873046875, "p10": -63.07189102172852, "median": 13.377239227294922, "p90": 164.19524078369142, "max": 275.6016540527344, "pos_frac": 0.640625, "sample": [30.2589111328125, 12.557998657226562, -0.2008228302001953, -112.36463928222656, -210.3308868408203, 9.828922271728516, -0.01316070556640625, 34.42312240600586, 179.8115234375, 92.84355163574219, 0.34905242919921875, -5.083957672119141, 20.023788452148438, 39.8582649230957, 7.301298141479492, 275.6016540527344, 120.33999633789062, 0.017124176025390625, 86.28080749511719, -4.229156494140625, 20.14015007019043, -63.271080017089844, 7.331838607788086, 225.10899353027344, -103.03849029541016, 74.10370635986328, -61.910430908203125, -62.60711669921875, 108.57427978515625, -3.488431930541992, 77.61444854736328, 156.6211700439453, 14.196479797363281, 22.726181030273438, -19.653812408447266, 180.73358154296875, 14.527462005615234, -25.25057601928711, -144.8915252685547, 17.32525634765625, 166.274169921875, -30.741668701171875, -7.003715515136719, 1.9967708587646484, 59.55213165283203, 176.6692657470703, 133.49977111816406, 73.23334503173828, -2.4251155853271484, 24.018661499023438, 5.3221435546875, 115.79259490966797, 159.3444061279297, -25.564315795898438, 28.706806182861328, 6.4790496826171875, 94.06373596191406, -221.94873046875, -77.0547103881836, 45.65875244140625, -1.07330322265625, -62.035545349121094, -48.927162170410156, 168.37652587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000381.npy"}
|
|
{"epoch": 0.5759637188208617, "step": 382, "batch_size": 64, "mean": 51.353416442871094, "std": 77.28844451904297, "min": -159.2082977294922, "p10": -23.023704528808594, "median": 39.16351318359375, "p90": 156.25498962402344, "max": 218.78253173828125, "pos_frac": 0.734375, "sample": [-28.493560791015625, 6.116098403930664, 218.78253173828125, 64.28226470947266, 186.26742553710938, -21.0859375, 87.81999206542969, 22.13927459716797, 139.516845703125, 82.27378845214844, 72.32188415527344, -18.610916137695312, -21.099563598632812, 37.43768310546875, 140.2938995361328, 36.081756591796875, -23.524566650390625, -20.07916259765625, -159.2082977294922, 149.785888671875, -2.165597915649414, 42.92558288574219, 16.341659545898438, 5.796058654785156, -21.855026245117188, 0.711883544921875, 88.67276000976562, 57.89739227294922, -79.42340850830078, 175.1446533203125, 101.56364440917969, 20.55545425415039, -72.01083374023438, -66.92334747314453, 169.92742919921875, 6.044685363769531, -43.3707389831543, 4.6159515380859375, 54.047889709472656, 155.8883056640625, 81.36868286132812, 217.43121337890625, -1.9342498779296875, 11.831161499023438, 37.597198486328125, 102.51129913330078, -13.546245574951172, -9.790077209472656, 40.729827880859375, 121.35245513916016, 78.052490234375, 125.80680847167969, -14.771289825439453, 60.19553756713867, 142.62893676757812, 2.259868621826172, 73.56716918945312, 27.140438079833984, 156.41213989257812, 186.36935424804688, 100.483154296875, 112.25877380371094, 73.24812316894531, 10.013984680175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000382.npy"}
|
|
{"epoch": 0.5774754346182918, "step": 383, "batch_size": 64, "mean": 48.11333465576172, "std": 99.38385009765625, "min": -208.13204956054688, "p10": -80.47786254882811, "median": 38.9200553894043, "p90": 181.05057678222659, "max": 244.88766479492188, "pos_frac": 0.703125, "sample": [-3.345338821411133, 152.44776916503906, 99.53396606445312, 18.54412078857422, 8.185325622558594, -55.53905487060547, 131.15582275390625, 56.438804626464844, 23.38378143310547, -97.39258575439453, 0.2605628967285156, -111.71766662597656, 78.76319885253906, 74.28849029541016, 115.78076934814453, 53.485626220703125, 178.41064453125, -74.60428619384766, 190.79486083984375, 163.3702850341797, 42.64665222167969, -24.5743408203125, 191.5936737060547, -72.5096435546875, 166.4411163330078, 6.803131103515625, 50.93165588378906, -15.344131469726562, 16.15971565246582, 244.88766479492188, 135.732421875, -82.99510955810547, -37.025108337402344, 195.92486572265625, -24.597179412841797, 97.99038696289062, 135.82455444335938, -208.13204956054688, 12.422157287597656, 57.460113525390625, -128.22503662109375, 41.52386474609375, 17.043609619140625, 25.206743240356445, 182.18197631835938, -90.49632263183594, 36.316246032714844, 54.49609375, -58.317657470703125, 234.15203857421875, 32.83690643310547, 188.544921875, 102.0545883178711, 17.769241333007812, -10.442329406738281, 166.92349243164062, 134.5166015625, 26.749664306640625, -20.123451232910156, -119.99134826660156, 169.46778869628906, -51.67303466796875, 169.37193298339844, 67.48111724853516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000383.npy"}
|
|
{"epoch": 0.5789871504157218, "step": 384, "batch_size": 64, "mean": 52.28948211669922, "std": 82.88658905029297, "min": -230.05288696289062, "p10": -42.77564582824707, "median": 53.01929473876953, "p90": 171.85755004882813, "max": 212.12100219726562, "pos_frac": 0.8125, "sample": [-56.82954406738281, 169.57208251953125, 58.84405517578125, 130.94903564453125, 7.502429962158203, 87.17496490478516, 94.07709503173828, 37.08692169189453, 182.52024841308594, -41.63837814331055, 53.88897705078125, 54.80012512207031, 80.09228515625, 13.335517883300781, -15.626167297363281, 39.6494140625, 10.005195617675781, 199.87689208984375, -0.090087890625, 172.8370361328125, -17.28076934814453, 68.41749572753906, 15.594154357910156, -43.26304626464844, -125.2806396484375, 98.27487182617188, 212.12100219726562, 73.10283660888672, -122.29730224609375, 117.85746765136719, 175.4329071044922, 120.70536804199219, 1.6366310119628906, 80.35942077636719, 90.74641418457031, 15.24334716796875, 175.61801147460938, 41.05995559692383, -230.05288696289062, 163.9144287109375, 50.936553955078125, 81.36355590820312, 111.24779510498047, -63.42589569091797, 178.13787841796875, 68.36737060546875, 48.183292388916016, 11.826009750366211, 50.404537200927734, 137.29078674316406, 72.38485717773438, 16.262939453125, 71.63043212890625, 52.14961242675781, 13.131217956542969, 2.108814239501953, -101.85517120361328, 48.1817626953125, 9.370391845703125, -5.538227081298828, 82.46395874023438, 131.71893310546875, 56.28459167480469, 33.963050842285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000384.npy"}
|
|
{"epoch": 0.5804988662131519, "step": 385, "batch_size": 64, "mean": 68.67587280273438, "std": 84.5517807006836, "min": -85.11714935302734, "p10": -36.835636901855466, "median": 56.229434967041016, "p90": 177.66674194335937, "max": 244.42420959472656, "pos_frac": 0.734375, "sample": [-78.82825469970703, -8.72452163696289, 165.49147033691406, 114.1689453125, 43.96177673339844, 18.24566650390625, 56.69525146484375, 133.03912353515625, 30.282485961914062, 34.32569885253906, -65.96043395996094, 108.49673461914062, 94.51554107666016, 166.74893188476562, 46.75188446044922, 144.4915313720703, 174.738037109375, 138.70677185058594, 6.492279052734375, 49.50508499145508, -34.46223449707031, -35.74403381347656, 21.130599975585938, 177.3763427734375, 179.8498077392578, 227.43577575683594, 45.960025787353516, 75.69038391113281, 55.76361846923828, 134.36119079589844, 119.62843322753906, -85.11714935302734, 71.86940002441406, -5.7751007080078125, 63.224857330322266, 187.63511657714844, 148.72430419921875, 103.94284057617188, 106.34986114501953, 118.5797348022461, -23.996234893798828, -50.91582489013672, 157.04385375976562, 45.32554626464844, 17.397850036621094, 244.42420959472656, 206.1530303955078, -32.52256774902344, -14.86343002319336, -19.075088500976562, -37.303466796875, 24.804706573486328, -39.28706359863281, -19.7974853515625, 118.19278717041016, 89.37438201904297, 14.398200988769531, 169.68138122558594, -43.01201629638672, 193.71888732910156, -1.9947853088378906, 4.249076843261719, 177.79119873046875, 165.90084838867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000385.npy"}
|
|
{"epoch": 0.582010582010582, "step": 386, "batch_size": 64, "mean": 45.03410339355469, "std": 84.08084106445312, "min": -188.5599822998047, "p10": -63.82795562744138, "median": 56.70466995239258, "p90": 141.2563919067383, "max": 221.78240966796875, "pos_frac": 0.734375, "sample": [114.67507934570312, -107.2370376586914, -3.8742942810058594, 53.761016845703125, 66.70704650878906, -18.348175048828125, 0.6646652221679688, 82.36993408203125, 92.33387756347656, 94.53723907470703, 23.063705444335938, 83.3839111328125, -146.5677947998047, 12.963371276855469, 37.90582275390625, 136.67987060546875, 155.18832397460938, 111.0709228515625, 142.48263549804688, 123.90251159667969, 11.245626449584961, 74.11345672607422, 68.09259796142578, -75.08506774902344, -78.84818267822266, 154.07186889648438, 118.14437866210938, -29.67431640625, 111.30225372314453, 90.1756362915039, 46.823280334472656, 118.53482055664062, -9.436607360839844, 49.0953369140625, -28.781330108642578, -34.82196044921875, 77.91683197021484, 16.550323486328125, 88.29193115234375, 183.45175170898438, 20.020263671875, -42.99760437011719, 76.69712829589844, 118.82421875, 125.69031524658203, -40.760990142822266, -72.7552490234375, 33.67523956298828, 1.8452892303466797, 198.50360107421875, 14.263893127441406, -36.30253601074219, -129.81576538085938, 138.39515686035156, 221.78240966796875, 59.64832305908203, 65.27662658691406, 87.47833251953125, 61.87605285644531, 161.70291137695312, 14.704513549804688, -28.986263275146484, 15.151664733886719, -188.5599822998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000386.npy"}
|
|
{"epoch": 0.5835222978080121, "step": 387, "batch_size": 64, "mean": 65.30567932128906, "std": 77.607421875, "min": -133.75833129882812, "p10": -13.245108413696288, "median": 46.8934211730957, "p90": 174.53201751708986, "max": 272.20989990234375, "pos_frac": 0.796875, "sample": [67.63442993164062, 8.163383483886719, 143.42190551757812, -9.569719314575195, 4.313385009765625, 206.26097106933594, 67.37837219238281, -21.417621612548828, 35.83055114746094, 272.20989990234375, 13.824159622192383, -3.5765933990478516, 41.50786590576172, 52.27897644042969, 177.23634338378906, 4.187278747558594, 31.892318725585938, 164.16737365722656, 170.1800537109375, 32.434234619140625, 165.58877563476562, 176.0390625, 117.89218139648438, -31.834102630615234, 4.767219543457031, 137.41360473632812, -6.311561584472656, 20.01445960998535, 153.91763305664062, 61.475341796875, -1.2382621765136719, -13.875381469726562, 40.13495635986328, 171.0155792236328, -26.708038330078125, 90.62540435791016, 58.457035064697266, 39.39176940917969, 31.644290924072266, 13.128494262695312, -0.5793781280517578, 72.55667114257812, 92.76586151123047, -133.75833129882812, -44.55250549316406, 61.3159065246582, 135.6255340576172, 189.6053466796875, 52.987884521484375, 164.275146484375, -11.774471282958984, 36.23851013183594, 37.888946533203125, 74.53157043457031, 70.96844482421875, 40.47197341918945, -33.66596221923828, 31.500288009643555, 89.46327209472656, 198.1637725830078, 230.72024536132812, 77.0804214477539, 25.876752853393555, 61.96159362792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000387.npy"}
|
|
{"epoch": 0.5850340136054422, "step": 388, "batch_size": 64, "mean": 40.6041145324707, "std": 96.14115905761719, "min": -161.63604736328125, "p10": -67.84878387451171, "median": 24.287090301513672, "p90": 164.8514633178711, "max": 286.619384765625, "pos_frac": 0.640625, "sample": [184.84573364257812, 81.18925476074219, 108.68313598632812, 201.52711486816406, 55.301063537597656, 148.6824188232422, 37.74873352050781, 40.94303894042969, 31.358421325683594, -25.66614532470703, 42.52467346191406, 155.89019775390625, -18.716964721679688, 112.30144500732422, -9.013656616210938, 286.619384765625, 2.4222068786621094, 247.9613494873047, 15.532394409179688, 140.40371704101562, 151.6954345703125, 1.6607818603515625, 160.5677032470703, 38.606689453125, 163.6092987060547, -161.63604736328125, 70.53329467773438, 63.17713928222656, -1.4789562225341797, 4.855302810668945, -3.8815784454345703, -10.795270919799805, -5.775413513183594, 156.80902099609375, -5.054685592651367, 17.21575927734375, 32.90827560424805, -49.08184814453125, 40.79651641845703, -89.59033203125, 41.29857635498047, 142.75265502929688, -143.1881103515625, 134.55357360839844, 167.32005310058594, 1.17926025390625, 14.318611145019531, -1.1031341552734375, -0.744964599609375, 165.38381958007812, -11.505943298339844, 168.86790466308594, -60.05461120605469, -2.6567230224609375, -151.89007568359375, -60.438446044921875, -71.02464294433594, 73.64472961425781, -15.053882598876953, 3.1328659057617188, 40.25794982910156, -125.19669342041016, -132.4349822998047, 5.566888809204102], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000388.npy"}
|
|
{"epoch": 0.5865457294028723, "step": 389, "batch_size": 64, "mean": 65.24828338623047, "std": 87.1871566772461, "min": -166.44308471679688, "p10": -26.199392318725586, "median": 49.3715705871582, "p90": 175.08346557617188, "max": 259.6009216308594, "pos_frac": 0.703125, "sample": [-53.30342102050781, -19.410017013549805, 103.0494384765625, 73.97256469726562, 130.8256072998047, 25.980209350585938, 135.4544677734375, 143.57867431640625, 210.3380126953125, 114.27294921875, -19.224395751953125, -26.90404510498047, 64.61985778808594, 130.8777313232422, 33.21424865722656, 98.12974548339844, 104.29833984375, 160.38726806640625, 94.29434204101562, 9.686641693115234, 162.01101684570312, 175.20028686523438, -3.8364810943603516, 37.372467041015625, 126.7838134765625, -8.31793212890625, 165.02212524414062, -23.965293884277344, 52.91340637207031, 83.64064025878906, -40.58842468261719, 150.09344482421875, -15.767379760742188, 173.57882690429688, 180.07681274414062, 35.23114013671875, 196.394287109375, 91.97136688232422, 44.344303131103516, -166.44308471679688, 216.49273681640625, -4.900257110595703, 6.391546249389648, -24.55520248413086, 45.829734802246094, -15.437042236328125, 174.81088256835938, -40.59235763549805, -68.37806701660156, 29.012359619140625, 8.267036437988281, 2.578155517578125, 0.365142822265625, -2.9084396362304688, -35.81287384033203, 201.1384735107422, 171.54603576660156, 142.88206481933594, 71.62701416015625, 259.6009216308594, -13.446161270141602, 133.02003479003906, 2.6179141998291016, -14.11307144165039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000389.npy"}
|
|
{"epoch": 0.5880574452003023, "step": 390, "batch_size": 64, "mean": 50.54768371582031, "std": 79.68131256103516, "min": -164.2787628173828, "p10": -46.082144165039054, "median": 35.02114295959473, "p90": 161.76946716308595, "max": 217.21826171875, "pos_frac": 0.75, "sample": [-57.19708251953125, 85.82894134521484, 76.90520477294922, 27.359161376953125, 142.9454345703125, 33.71318817138672, 149.2838134765625, 117.48011779785156, -57.04242706298828, 27.74591827392578, 1.7211742401123047, -24.684921264648438, 138.91445922851562, 43.57505798339844, -1.2730789184570312, 202.7958984375, 154.9564666748047, -49.91290283203125, 102.08975219726562, -59.230751037597656, 98.53848266601562, 8.446784973144531, 5.3021087646484375, 175.56979370117188, 159.9281005859375, 162.55862426757812, 15.125213623046875, -72.07073974609375, 1.7931404113769531, 113.14424896240234, -37.143707275390625, 7.490546226501465, -20.379514694213867, 56.751258850097656, -9.333599090576172, -36.16606903076172, -164.2787628173828, 120.77975463867188, 165.574462890625, -0.7755165100097656, 151.70599365234375, 13.591463088989258, -3.36309814453125, 62.85643005371094, 0.6140174865722656, 87.84982299804688, 13.512771606445312, 15.162788391113281, 217.21826171875, 39.241981506347656, 165.84725952148438, 117.21440124511719, 11.532112121582031, 47.09751892089844, 5.186241149902344, 111.87698364257812, 0.14282798767089844, -74.74534606933594, 36.329097747802734, -18.47456932067871, 98.77947998046875, 71.11819458007812, 75.5207290649414, 182.40818786621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000390.npy"}
|
|
{"epoch": 0.5895691609977324, "step": 391, "batch_size": 64, "mean": 60.54807662963867, "std": 96.01428985595703, "min": -166.85215759277344, "p10": -71.8427635192871, "median": 62.42853927612305, "p90": 177.41195068359377, "max": 273.84661865234375, "pos_frac": 0.765625, "sample": [38.306488037109375, -166.85215759277344, 171.49554443359375, 54.58955383300781, 47.607749938964844, 179.08502197265625, 14.69784164428711, 179.21722412109375, 168.55368041992188, 273.84661865234375, 74.06779479980469, -139.70071411132812, 138.02618408203125, -12.514091491699219, 86.40863037109375, 200.22103881835938, -99.36539459228516, 50.981849670410156, -43.48863220214844, 34.42815399169922, -3.2889328002929688, 130.16220092773438, 108.63107299804688, 45.30583953857422, 36.91929626464844, -63.34846496582031, -143.06155395507812, -19.02411651611328, 67.45809936523438, -20.69304656982422, 265.690185546875, 128.82977294921875, 84.06645965576172, 1.1553878784179688, 86.996337890625, 2.2148971557617188, 4.16844367980957, 71.37806701660156, 125.12403869628906, 147.14834594726562, 128.06727600097656, 79.63223266601562, -150.02191162109375, -77.01351928710938, 52.911903381347656, 65.49351501464844, 188.85348510742188, 27.59961700439453, 156.70321655273438, 136.89764404296875, 26.25555419921875, -0.1580657958984375, 173.50811767578125, 138.67724609375, 181.0338134765625, 144.29310607910156, 102.52195739746094, 59.363563537597656, -75.4831771850586, 10.862640380859375, 36.71870422363281, 73.1846923828125, 104.4750747680664, -14.744369506835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000391.npy"}
|
|
{"epoch": 0.5910808767951625, "step": 392, "batch_size": 64, "mean": 66.38069915771484, "std": 97.29986572265625, "min": -178.83673095703125, "p10": -44.12140502929687, "median": 64.12966537475586, "p90": 188.08511505126955, "max": 325.83880615234375, "pos_frac": 0.78125, "sample": [-155.42886352539062, -1.2782745361328125, 35.45252227783203, 65.76080322265625, 34.41925048828125, -24.876670837402344, 166.63824462890625, -61.491722106933594, 91.67875671386719, 165.32069396972656, 71.39747619628906, -178.83673095703125, 127.5281982421875, 168.93142700195312, 214.508544921875, 66.94921875, 12.248916625976562, 167.2414093017578, 180.0395050048828, 13.18115234375, 58.34088897705078, -9.999191284179688, 73.3794937133789, -99.47163391113281, 121.201904296875, 165.64474487304688, 176.3108367919922, 62.49852752685547, 67.53890991210938, -110.54692077636719, 76.4786376953125, 211.11537170410156, 135.135986328125, 15.12384033203125, -47.71710205078125, 81.40301513671875, 35.3233642578125, 112.39540100097656, 87.4644775390625, 154.23745727539062, 206.90762329101562, 198.25994873046875, -64.14068603515625, 144.94097900390625, 1.1561813354492188, 2.4004669189453125, 6.273956298828125, 36.478424072265625, 191.53323364257812, -0.5158843994140625, -0.9021682739257812, 325.83880615234375, 7.398181915283203, 47.861419677734375, -28.066009521484375, 10.144220352172852, 243.32037353515625, -35.7314453125, 29.945404052734375, 130.2813720703125, 4.682184219360352, 120.08838653564453, 39.54637145996094, 105.42161560058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000392.npy"}
|
|
{"epoch": 0.5925925925925926, "step": 393, "batch_size": 64, "mean": 68.52921295166016, "std": 81.68241882324219, "min": -111.01506042480469, "p10": -20.786096191406248, "median": 57.88994789123535, "p90": 178.38607177734374, "max": 241.43267822265625, "pos_frac": 0.75, "sample": [93.40390014648438, -6.223794937133789, 123.16024780273438, 179.2439422607422, 171.32144165039062, 117.92620086669922, 38.58407211303711, 52.12200164794922, 72.50740814208984, 58.134098052978516, -27.74769401550293, 1.4620437622070312, 199.39312744140625, 143.1593475341797, 40.339317321777344, -54.66441345214844, -7.891944885253906, 184.0726776123047, 10.060165405273438, -6.576137542724609, -12.95166015625, 4.359888076782227, 101.29562377929688, 196.40447998046875, 125.31437683105469, 169.61830139160156, -22.1938419342041, 144.75439453125, 128.36988830566406, 130.33575439453125, 57.64579772949219, -68.93704223632812, 171.7364044189453, 178.86630249023438, 0.0040569305419921875, 45.21907043457031, -73.99588012695312, 186.24151611328125, -4.583976745605469, -17.50135612487793, 104.37974548339844, 20.360244750976562, 141.23129272460938, -16.844234466552734, 61.823829650878906, 145.24618530273438, 145.1047821044922, -2.0894317626953125, 43.850215911865234, 177.26553344726562, 166.46270751953125, 241.43267822265625, 68.20831298828125, -9.424263000488281, 21.43048858642578, 146.82110595703125, 32.40013122558594, 68.19020080566406, -111.01506042480469, -25.272384643554688, 12.669395446777344, 111.6939697265625, 15.258712768554688, 4.8975982666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000393.npy"}
|
|
{"epoch": 0.5941043083900227, "step": 394, "batch_size": 64, "mean": 59.82337188720703, "std": 75.97239685058594, "min": -149.69692993164062, "p10": -36.54034690856933, "median": 57.427520751953125, "p90": 155.79554901123052, "max": 224.23068237304688, "pos_frac": 0.796875, "sample": [26.593841552734375, 166.5009307861328, 124.17184448242188, 28.013580322265625, 12.833793640136719, 55.025543212890625, -11.101318359375, 54.07794189453125, 206.4715576171875, 47.67108154296875, 196.50442504882812, 127.17750549316406, 38.86516189575195, 54.813419342041016, -51.86542892456055, 43.07123565673828, -17.829681396484375, -13.641206741333008, -35.960784912109375, 19.395549774169922, 146.24838256835938, -36.78873062133789, 65.59934997558594, 59.829498291015625, 81.64196014404297, 189.0150146484375, 141.07962036132812, 136.54962158203125, 132.33753967285156, 8.159370422363281, -104.27456665039062, 22.098785400390625, 162.64952087402344, 80.27107238769531, 45.322296142578125, 108.0924072265625, -2.671926498413086, 90.34880065917969, 75.4002685546875, 14.749141693115234, 101.49540710449219, -60.83487319946289, -40.489288330078125, 70.70343780517578, 95.77169799804688, 224.23068237304688, -149.69692993164062, 130.25563049316406, -2.5168838500976562, 71.30473327636719, -90.1138916015625, 103.5628890991211, 44.13805389404297, 107.235107421875, 77.68278503417969, 73.13518524169922, 67.49517822265625, 136.9688720703125, 3.102142333984375, 39.544952392578125, 13.296112060546875, 159.88719177246094, 45.845802307128906, 120.24542999267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000394.npy"}
|
|
{"epoch": 0.5956160241874527, "step": 395, "batch_size": 64, "mean": 53.249305725097656, "std": 82.89020538330078, "min": -115.9710922241211, "p10": -39.68150672912598, "median": 32.27111053466797, "p90": 164.72976989746095, "max": 277.6018981933594, "pos_frac": 0.75, "sample": [127.78213500976562, 165.10589599609375, 115.86582946777344, 8.048355102539062, 136.81216430664062, 66.90133666992188, -2.5892772674560547, 104.91376495361328, 184.81900024414062, -14.752229690551758, 41.54754638671875, 84.0313720703125, 201.8144989013672, 138.7198486328125, 25.76244354248047, 172.8795623779297, -11.141189575195312, 97.68171691894531, 63.551971435546875, 9.283050537109375, -0.699615478515625, 24.583534240722656, 6.9888916015625, 70.4483642578125, 145.97055053710938, -115.9710922241211, 38.77977752685547, 161.66287231445312, 59.8734016418457, -9.730018615722656, 66.58451080322266, 277.6018981933594, 66.79518127441406, 46.53508758544922, -90.62779235839844, 20.8182373046875, -40.319637298583984, 11.081836700439453, 86.46717071533203, -45.171669006347656, -5.7317962646484375, 0.47493553161621094, 147.81150817871094, 17.56954574584961, 3.6439247131347656, 76.38279724121094, 0.13686370849609375, 213.1912384033203, -38.192535400390625, -11.170404434204102, 4.981849670410156, 260.0048828125, -63.40313720703125, 6.885211944580078, 40.61482238769531, 163.85214233398438, -60.97682571411133, 24.553329467773438, 92.51953887939453, -62.398414611816406, 11.55074691772461, -21.941574096679688, 8.995737075805664, 99.89168548583984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000395.npy"}
|
|
{"epoch": 0.5971277399848829, "step": 396, "batch_size": 64, "mean": 57.35359191894531, "std": 78.63361358642578, "min": -132.90798950195312, "p10": -24.06683692932129, "median": 40.3730354309082, "p90": 166.1868667602539, "max": 254.9993133544922, "pos_frac": 0.765625, "sample": [164.76266479492188, 254.9993133544922, 162.58453369140625, 153.48614501953125, 159.0095977783203, 103.30888366699219, 10.750286102294922, 165.00665283203125, 56.41504669189453, 33.36767578125, 90.58783721923828, 22.501617431640625, 124.50228118896484, -37.69607162475586, 47.26392364501953, -1.8876495361328125, 44.7161865234375, 75.38227081298828, 1.42999267578125, 57.41908264160156, 66.59510803222656, -32.7847900390625, 190.808837890625, 48.176307678222656, -3.5424575805664062, 36.029884338378906, 110.52079772949219, -132.90798950195312, 145.37429809570312, 97.78559875488281, 2.7858848571777344, -85.16374206542969, 0.5507659912109375, 153.08290100097656, 48.223602294921875, 31.295433044433594, 112.4991683959961, 21.594970703125, 200.24441528320312, 212.7264404296875, -8.03897476196289, 25.345815658569336, 1.1353111267089844, 87.73416137695312, -1.8990707397460938, 55.6290283203125, 4.575935363769531, 19.82396697998047, 81.04678344726562, -47.8944091796875, -11.583938598632812, -24.108776092529297, 13.719339370727539, 17.838241577148438, -4.175098419189453, 166.6926727294922, 68.36103057861328, 179.97744750976562, 6.635990142822266, 32.32018280029297, -23.968978881835938, -37.8776741027832, -12.5150146484375, 170.04995727539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000396.npy"}
|
|
{"epoch": 0.5986394557823129, "step": 397, "batch_size": 64, "mean": 32.343746185302734, "std": 94.72636413574219, "min": -209.1718292236328, "p10": -73.14048614501954, "median": 25.041030883789062, "p90": 148.99599151611332, "max": 295.2731628417969, "pos_frac": 0.671875, "sample": [-2.1942005157470703, 40.04334259033203, -71.7529296875, 57.906768798828125, -58.62554168701172, 70.41720581054688, 134.25318908691406, 64.46341705322266, 141.9608612060547, -23.611526489257812, 1.6828689575195312, 68.99324035644531, 22.55455780029297, 95.3099365234375, 81.7474594116211, 97.73360443115234, -184.06350708007812, -106.44218444824219, 57.01957321166992, -2.9995574951171875, 99.33140563964844, -2.1264495849609375, 79.1922607421875, 95.19732666015625, 9.266960144042969, -209.1718292236328, -73.73515319824219, 18.2689208984375, 177.760498046875, 51.24818420410156, 5.214775085449219, 21.321340560913086, 152.01104736328125, 51.2703857421875, 165.9059295654297, -25.443603515625, 57.185672760009766, 164.14028930664062, 135.04513549804688, 8.440446853637695, 10.798553466796875, 200.18927001953125, 135.31118774414062, 39.64080810546875, -176.84271240234375, 57.946014404296875, 295.2731628417969, -64.649169921875, -46.05439758300781, 9.699844360351562, -3.384288787841797, 82.84017944335938, -2.6588668823242188, 14.179580688476562, 0.2745647430419922, -36.982444763183594, 27.527503967285156, 56.386962890625, -49.71540832519531, 141.36631774902344, -154.98895263671875, -104.45508575439453, -1.8080272674560547, 175.385009765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000397.npy"}
|
|
{"epoch": 0.600151171579743, "step": 398, "batch_size": 64, "mean": 73.40287780761719, "std": 85.39968872070312, "min": -61.06438446044922, "p10": -19.15244102478027, "median": 64.35117721557617, "p90": 177.1030746459961, "max": 377.86334228515625, "pos_frac": 0.75, "sample": [155.81263732910156, 81.602294921875, 122.84278869628906, 83.31642150878906, 181.9708709716797, 20.17923355102539, 176.4910125732422, 207.52130126953125, -20.806610107421875, 177.36538696289062, 116.68698120117188, 21.39520263671875, -9.141510009765625, -9.701820373535156, 116.8919677734375, 17.935806274414062, 136.21182250976562, -6.819694519042969, 82.3701171875, -55.328792572021484, 109.45803833007812, 2.195547103881836, 181.22140502929688, 237.4895477294922, 0.220703125, 171.09185791015625, 148.75888061523438, 123.74105072021484, 42.401817321777344, 49.02106475830078, -13.37008285522461, 59.45893859863281, 21.936553955078125, 157.09140014648438, -0.44123077392578125, -5.639406204223633, 43.03288269042969, 158.18702697753906, 35.09208679199219, 77.09364318847656, -29.818252563476562, -26.74401092529297, 163.14442443847656, 79.85414123535156, 125.64059448242188, 128.0390625, 25.556814193725586, 3.7839584350585938, -15.292713165283203, 84.95710754394531, -3.0157089233398438, 377.86334228515625, 24.606117248535156, -3.4828262329101562, 73.40284729003906, -28.724510192871094, 203.39511108398438, -61.06438446044922, 69.24341583251953, 10.645631790161133, -37.4327507019043, 145.6238250732422, 172.2277374267578, 20.53806495666504], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000398.npy"}
|
|
{"epoch": 0.6016628873771731, "step": 399, "batch_size": 64, "mean": 82.09280395507812, "std": 98.18431854248047, "min": -107.77017211914062, "p10": -34.11383972167968, "median": 70.30317306518555, "p90": 197.80184783935547, "max": 407.1859130859375, "pos_frac": 0.78125, "sample": [138.7630157470703, 66.19066619873047, 133.66732788085938, 256.58544921875, -15.883865356445312, 126.0587158203125, 172.72674560546875, 46.07158660888672, -6.828296661376953, -13.260734558105469, -86.2545166015625, 68.37408447265625, 92.34136962890625, -54.183109283447266, 49.78611755371094, 75.98234558105469, 71.62249755859375, 115.1943359375, 88.59661865234375, 231.55322265625, -52.1771240234375, -30.977447509765625, -107.77017211914062, 125.7430419921875, 287.0908508300781, 22.592697143554688, 18.306598663330078, 407.1859130859375, 24.971656799316406, 11.301460266113281, 23.07648468017578, 2.3709850311279297, 101.88490295410156, 46.7987174987793, 38.81526184082031, -26.465431213378906, 68.98384857177734, 197.8974151611328, 52.34307098388672, 207.85813903808594, 42.226173400878906, 1.7859058380126953, 190.2220001220703, -35.4580078125, 191.6930694580078, 125.35417938232422, 74.44233703613281, 76.73731994628906, -64.30606079101562, -26.74378204345703, 169.97958374023438, 188.90859985351562, 168.20236206054688, 138.81256103515625, 149.16519165039062, 198.01596069335938, 137.2794189453125, 197.578857421875, 67.21928405761719, -43.34343338012695, 190.48062133789062, -14.019981384277344, 96.57909393310547, 56.193992614746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000399.npy"}
|
|
{"epoch": 0.6031746031746031, "step": 400, "batch_size": 64, "mean": 47.53325271606445, "std": 83.68318939208984, "min": -160.20333862304688, "p10": -51.48210296630858, "median": 40.28938865661621, "p90": 154.4694625854492, "max": 219.63351440429688, "pos_frac": 0.71875, "sample": [-22.51645278930664, 91.65380096435547, 47.980133056640625, -31.370162963867188, 121.07554626464844, 118.69309997558594, 27.22551727294922, 129.8376922607422, 89.09573364257812, 154.0493621826172, 59.10415267944336, -65.47386932373047, 40.98281478881836, 2.6877212524414062, 1.3128166198730469, -11.568115234375, 52.820159912109375, 159.20407104492188, 137.77975463867188, 128.8184356689453, 5.809478759765625, 40.1465950012207, 110.77993774414062, 3.1485538482666016, -123.12982177734375, 101.5401840209961, 15.15174674987793, 15.152542114257812, 18.15616226196289, -6.326936721801758, -75.404052734375, -6.800298690795898, 130.61708068847656, -9.934829711914062, 175.8665008544922, -6.130420684814453, -61.98280334472656, 31.238330841064453, 149.14361572265625, 107.48252868652344, 199.91616821289062, 120.53324127197266, -160.20333862304688, 122.74655151367188, 16.53356170654297, 40.43218231201172, 2.999237060546875, -36.994903564453125, -34.36664581298828, 219.63351440429688, 57.449520111083984, 154.64950561523438, -57.69090270996094, 53.247947692871094, -9.239324569702148, 180.65306091308594, 201.14285278320312, 65.00192260742188, 23.18103790283203, 44.22760009765625, -145.3167724609375, -10.878246307373047, 35.027976989746094, 113.5262451171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000400.npy"}
|
|
{"epoch": 0.6046863189720333, "step": 401, "batch_size": 64, "mean": 66.88381958007812, "std": 84.31990814208984, "min": -81.56483459472656, "p10": -34.24529953002929, "median": 50.24203109741211, "p90": 186.2841751098633, "max": 265.1585693359375, "pos_frac": 0.78125, "sample": [-11.168891906738281, 147.94259643554688, 159.34542846679688, 65.02487182617188, -42.5952262878418, 2.002574920654297, 21.147552490234375, 56.56395721435547, -41.38779067993164, 7.193397521972656, 194.85989379882812, -2.7458648681640625, -81.56483459472656, -6.9052734375, 32.77153778076172, -32.96533966064453, 104.88897705078125, 265.1585693359375, 157.2396697998047, 167.64651489257812, 65.30883026123047, 65.01866149902344, 43.183876037597656, 25.347583770751953, -49.802154541015625, -59.90135955810547, 75.18609619140625, 96.46286010742188, 72.9214096069336, 57.57262420654297, 107.32623291015625, 33.09663391113281, 8.084451675415039, 164.68853759765625, -5.224052429199219, 187.3311309814453, 6.677427291870117, 152.03634643554688, 24.43687629699707, 198.44952392578125, 15.061744689941406, 176.79666137695312, 109.8560791015625, -16.144357681274414, 194.29534912109375, 10.129276275634766, -34.793853759765625, 231.14144897460938, 162.23873901367188, 100.21451568603516, 105.70233917236328, 2.762350082397461, 8.599323272705078, 9.778030395507812, -55.561100006103516, 86.08116149902344, 78.39067077636719, 20.940942764282227, 170.1689453125, 183.84127807617188, 43.92010498046875, -9.298778533935547, 17.923160552978516, 237.86639404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000401.npy"}
|
|
{"epoch": 0.6061980347694633, "step": 402, "batch_size": 64, "mean": 52.37577819824219, "std": 93.89684295654297, "min": -150.0983123779297, "p10": -63.93274536132812, "median": 46.2678337097168, "p90": 186.67828521728518, "max": 229.50027465820312, "pos_frac": 0.734375, "sample": [86.93548583984375, 193.47210693359375, 135.46884155273438, 133.68252563476562, 126.96083068847656, 69.15121459960938, 216.1165313720703, 37.740081787109375, 40.65687561035156, 97.10126495361328, -39.685211181640625, 211.72625732421875, 58.27491760253906, -48.808929443359375, 58.99310302734375, 45.9710578918457, -0.484954833984375, 17.52775001525879, 2.9845657348632812, 31.286819458007812, 112.58757781982422, -0.3861808776855469, 7.318748474121094, -113.58660888671875, 229.50027465820312, 7.724948883056641, 84.38190460205078, 178.50588989257812, 69.72132873535156, 21.850250244140625, 190.1807403564453, 159.72264099121094, -12.517318725585938, 107.26841735839844, -13.762161254882812, 21.209741592407227, 165.45797729492188, 158.30152893066406, 6.720672607421875, 90.87429809570312, -47.13385772705078, -109.74751281738281, -140.93307495117188, 109.98384857177734, 35.63287353515625, 9.470212936401367, 52.23094177246094, -67.29493713378906, 8.507225036621094, 11.93759536743164, 149.4438018798828, 194.84671020507812, 142.9947052001953, 216.49134826660156, -56.08763122558594, -14.886861801147461, -24.04656219482422, -150.0983123779297, -134.75074768066406, 51.58317565917969, 46.56460952758789, 148.12301635742188, -75.07832336425781, 48.15149688720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000402.npy"}
|
|
{"epoch": 0.6077097505668935, "step": 403, "batch_size": 64, "mean": 68.65573120117188, "std": 94.16281127929688, "min": -155.46173095703125, "p10": -42.34323692321777, "median": 46.70886993408203, "p90": 183.04525756835938, "max": 329.24530029296875, "pos_frac": 0.796875, "sample": [-7.940528869628906, 1.05224609375, 251.23724365234375, 28.599090576171875, 173.67532348632812, 41.50105285644531, 215.27511596679688, 148.9097137451172, 22.300613403320312, -6.2859344482421875, 58.35303497314453, -49.03141784667969, 129.8506317138672, -50.640750885009766, 157.68568420410156, 99.489501953125, 59.513938903808594, 12.699373245239258, 42.630584716796875, 7.728803634643555, 0.8827381134033203, -63.71329879760742, 329.24530029296875, 172.86669921875, -20.173583984375, 130.26251220703125, 183.22360229492188, 6.197946548461914, 183.36985778808594, 19.781654357910156, -2.8671607971191406, 74.88119506835938, 47.28807067871094, 35.06348419189453, 53.66840362548828, -23.077281951904297, 8.568914413452148, 177.09149169921875, 20.525041580200195, 122.14675903320312, 33.3431396484375, 1.4044132232666016, 172.55836486816406, 17.009166717529297, 117.793212890625, -41.84393310546875, 140.3217315673828, 148.35592651367188, 90.63619995117188, 46.129669189453125, 188.64450073242188, 182.62911987304688, -155.46173095703125, -42.55722427368164, 239.2261962890625, -126.28590393066406, 159.72964477539062, 60.54228973388672, 73.37577819824219, 165.18182373046875, -58.22123718261719, 11.679420471191406, 45.099708557128906, 132.8408966064453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000403.npy"}
|
|
{"epoch": 0.6092214663643235, "step": 404, "batch_size": 64, "mean": 68.74818420410156, "std": 98.1288070678711, "min": -116.50263977050781, "p10": -55.23245391845703, "median": 59.34103012084961, "p90": 196.49537353515626, "max": 283.36822509765625, "pos_frac": 0.71875, "sample": [-51.103973388671875, 91.05184936523438, 114.44495391845703, 3.2908172607421875, 60.31134796142578, -2.7623062133789062, 106.13301849365234, 14.243659973144531, -56.776023864746094, -55.16053771972656, 197.42575073242188, -55.263275146484375, -116.50263977050781, -113.48135375976562, -1.3770484924316406, 238.1837158203125, -23.79637908935547, 3.8726844787597656, 180.8033447265625, 207.83297729492188, 117.49357604980469, 128.0115966796875, 116.106689453125, 201.97531127929688, -27.3297119140625, 140.84994506835938, -68.85182189941406, 230.39547729492188, 169.9950408935547, 151.1327362060547, 10.770111083984375, 120.6956787109375, -13.657087326049805, 152.73638916015625, -26.654685974121094, 4.803413391113281, 38.449440002441406, 135.0601806640625, 123.72539520263672, -5.011440277099609, 14.199867248535156, -16.260040283203125, 96.0625, 46.59370422363281, 165.7370147705078, 14.509937286376953, 122.39251708984375, 179.46743774414062, 186.83238220214844, 58.37071228027344, 0.9925346374511719, 273.71649169921875, 283.36822509765625, 80.46101379394531, 152.32614135742188, 130.14645385742188, 194.32449340820312, -61.00431823730469, -64.33761596679688, -55.05720520019531, 50.175628662109375, 64.5076904296875, 14.546501159667969, 25.745079040527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000404.npy"}
|
|
{"epoch": 0.6107331821617535, "step": 405, "batch_size": 64, "mean": 55.488685607910156, "std": 85.4180908203125, "min": -146.78318786621094, "p10": -31.429418182373045, "median": 26.8095703125, "p90": 179.5398162841797, "max": 232.89776611328125, "pos_frac": 0.6875, "sample": [163.95150756835938, -6.661006927490234, -69.80906677246094, 197.73895263671875, 26.16033172607422, 25.663970947265625, 179.89697265625, 12.2470703125, -7.200832366943359, -45.391937255859375, -16.635818481445312, 5.354681015014648, 4.712343215942383, -2.7696876525878906, 68.00064086914062, 13.760627746582031, 229.826171875, -0.7082729339599609, -7.850151062011719, 87.91635131835938, 167.01031494140625, 154.81878662109375, 8.33245849609375, 97.48463439941406, -1.369110107421875, 31.598590850830078, 138.46353149414062, 176.6479034423828, -36.958702087402344, 117.10303497314453, 19.585796356201172, -0.17819595336914062, 149.95562744140625, 192.81781005859375, -13.667083740234375, 88.33863067626953, 17.040721893310547, -146.78318786621094, 65.58126831054688, 57.505401611328125, 27.45880889892578, -50.5028076171875, 64.1071548461914, 37.940765380859375, 12.332801818847656, 14.747098922729492, 2.8963546752929688, 41.208168029785156, 204.26144409179688, 153.3019561767578, -25.430126190185547, -29.919097900390625, 113.43073272705078, 40.74254608154297, 178.70645141601562, 182.4129180908203, -12.065338134765625, 50.07419967651367, 138.87142944335938, -78.70973205566406, -14.918128967285156, 232.89776611328125, 157.9763946533203, -32.076698303222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000405.npy"}
|
|
{"epoch": 0.6122448979591837, "step": 406, "batch_size": 64, "mean": 73.45025634765625, "std": 96.19520568847656, "min": -169.12454223632812, "p10": -37.98498077392578, "median": 75.50471878051758, "p90": 188.441845703125, "max": 260.79071044921875, "pos_frac": 0.75, "sample": [178.35797119140625, 51.58009338378906, -35.95191955566406, -24.0263671875, 146.79615783691406, 105.31407928466797, 164.4127655029297, 158.5242919921875, -38.856292724609375, 229.89830017089844, 137.60415649414062, 154.61813354492188, -4.977672576904297, -25.922225952148438, 199.4300537109375, 260.79071044921875, 208.42083740234375, 21.367937088012695, 61.328704833984375, 179.80191040039062, 90.244873046875, 31.168540954589844, 98.873291015625, 70.4312515258789, 25.77983856201172, 138.5901641845703, -28.460216522216797, 121.56135559082031, -7.14579963684082, 61.89091873168945, -1.1534271240234375, 1.8055419921875, -139.99700927734375, -20.056285858154297, 49.26995849609375, 190.27117919921875, 86.79326629638672, 184.17340087890625, -80.57569885253906, 205.51492309570312, 103.68344116210938, 42.11454772949219, 50.043357849121094, 219.9230194091797, 18.078948974609375, 164.80885314941406, -65.07991027832031, 13.670869827270508, 127.89027404785156, 156.75534057617188, 20.843839645385742, -70.41017150878906, 183.0715789794922, -101.17707824707031, 6.926490783691406, -169.12454223632812, 103.63674926757812, 151.93392944335938, -17.590560913085938, 41.771514892578125, 154.0854949951172, 128.48373413085938, 80.57818603515625, 148.4065399169922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000406.npy"}
|
|
{"epoch": 0.6137566137566137, "step": 407, "batch_size": 64, "mean": 57.94728088378906, "std": 99.00751495361328, "min": -219.35821533203125, "p10": -47.92693634033203, "median": 57.00161361694336, "p90": 189.63733062744143, "max": 267.8251953125, "pos_frac": 0.703125, "sample": [-1.9526901245117188, -7.941511154174805, -20.070770263671875, 67.53740692138672, -34.76226043701172, 163.76625061035156, 195.5003204345703, -48.59065246582031, -112.42890930175781, 114.31622314453125, 91.27789306640625, 69.09843444824219, 3.5746002197265625, 12.9521484375, -104.24343872070312, 21.869407653808594, 97.11516571044922, 150.94546508789062, 203.73727416992188, 173.15809631347656, 74.95574951171875, 17.498918533325195, 71.95437622070312, 46.4658203125, 101.89224243164062, 86.77459716796875, 0.9177017211914062, 4.634613037109375, 153.21905517578125, 201.78445434570312, 113.32334899902344, 2.7537460327148438, 191.18032836914062, 185.25967407226562, 162.31082153320312, 8.03814697265625, -108.42727661132812, 139.52499389648438, -49.6397705078125, 30.303489685058594, -13.652481079101562, 126.60781860351562, 186.03700256347656, 90.1427993774414, 201.42381286621094, 267.8251953125, 146.26400756835938, 9.564109802246094, 234.10012817382812, -0.5265693664550781, 33.444549560546875, -219.35821533203125, -138.60763549804688, -46.378265380859375, -42.457054138183594, 28.723155975341797, -16.67057991027832, 168.006103515625, -24.89407730102539, -11.620803833007812, -6.327400207519531, 92.88409423828125, 70.99890899658203, 103.51397705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000407.npy"}
|
|
{"epoch": 0.6152683295540439, "step": 408, "batch_size": 64, "mean": 82.23027801513672, "std": 88.25061798095703, "min": -124.41932678222656, "p10": -16.201911163330077, "median": 78.21145248413086, "p90": 190.84077606201174, "max": 256.5212707519531, "pos_frac": 0.796875, "sample": [67.03024291992188, 175.2384490966797, 137.50306701660156, 95.91940307617188, 1.9844131469726562, -46.72161102294922, -124.41932678222656, 3.2992897033691406, 158.1443634033203, 81.7694320678711, 57.42577362060547, 213.301025390625, 0.2429370880126953, 137.0583038330078, 154.8640594482422, 256.5212707519531, 34.67177963256836, -16.644813537597656, 45.895591735839844, 35.0113525390625, 100.93179321289062, 8.736900329589844, -8.100488662719727, 144.87059020996094, 221.51312255859375, -86.41578674316406, 71.0575942993164, 217.06784057617188, 115.96488189697266, 153.4159698486328, 186.82687377929688, -32.05936050415039, 121.77957916259766, 7.805219650268555, 19.67120361328125, 176.2542266845703, 41.8975715637207, -17.997365951538086, 232.2273712158203, 88.72987365722656, 54.618492126464844, 174.36978149414062, 185.59149169921875, 192.56101989746094, 133.04661560058594, 18.99892234802246, 152.91778564453125, -7.375680923461914, 55.26605224609375, -5.9733123779296875, 172.40325927734375, 74.65347290039062, 207.8447265625, -112.02748107910156, 164.37628173828125, 125.10662841796875, 54.98088455200195, -1.8336601257324219, -0.32845306396484375, 61.36848068237305, 161.10568237304688, 86.55813598632812, -15.168472290039062, 97.40414428710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000408.npy"}
|
|
{"epoch": 0.6167800453514739, "step": 409, "batch_size": 64, "mean": 37.80602264404297, "std": 82.24010467529297, "min": -163.74095153808594, "p10": -40.110678100585936, "median": 22.16942310333252, "p90": 170.56757507324224, "max": 204.77615356445312, "pos_frac": 0.609375, "sample": [20.272262573242188, 3.193918228149414, -22.123611450195312, -28.074188232421875, -0.9025306701660156, -17.07010269165039, 22.14896583557129, -26.944469451904297, 29.474384307861328, -28.641807556152344, -28.24197006225586, 182.76412963867188, -12.02232551574707, 182.73880004882812, 89.99488830566406, 8.07196044921875, 63.4384765625, -3.1543655395507812, -36.79279708862305, 197.8609619140625, 140.37881469726562, -41.53262710571289, 110.00929260253906, 176.95703125, 22.18988037109375, -71.25541687011719, -22.974380493164062, 95.9481201171875, 48.171417236328125, -8.307769775390625, -42.58668518066406, 69.16307067871094, 109.61869049072266, -20.60556411743164, 146.13345336914062, -12.998014450073242, 155.65884399414062, 127.27814483642578, 51.380279541015625, 23.395185470581055, -95.43313598632812, -0.9260482788085938, 88.96974182128906, -68.21527099609375, 16.81391143798828, -32.07182312011719, 22.438827514648438, 70.57408142089844, -160.6885223388672, 183.6477813720703, 16.02713394165039, 98.23401641845703, -3.28680419921875, 188.35525512695312, -163.74095153808594, 14.928520202636719, 72.15402221679688, 68.6306381225586, 72.5782241821289, 63.188018798828125, 204.77615356445312, -29.595930099487305, 72.78887939453125, 67.42623901367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000409.npy"}
|
|
{"epoch": 0.618291761148904, "step": 410, "batch_size": 64, "mean": 66.93568420410156, "std": 79.33675384521484, "min": -119.08575439453125, "p10": -34.02813034057617, "median": 60.820640563964844, "p90": 171.3333923339844, "max": 292.56329345703125, "pos_frac": 0.8125, "sample": [57.983116149902344, 87.75759887695312, 26.198802947998047, -4.107791900634766, 173.19857788085938, 53.36920166015625, 93.99466705322266, 36.42496109008789, -57.617279052734375, -119.08575439453125, 82.6435317993164, 179.62965393066406, 6.043205261230469, -59.51519012451172, 153.6669921875, 86.79547882080078, 155.32814025878906, -40.434112548828125, 103.05270385742188, 188.7462921142578, 292.56329345703125, 49.05634307861328, 21.894851684570312, 110.76541137695312, -0.030609130859375, 130.78924560546875, 23.584274291992188, 105.7763671875, 42.62787628173828, -34.66820526123047, 120.19476318359375, -32.53462219238281, 22.506412506103516, 133.25946044921875, 28.151107788085938, 212.72372436523438, 179.41017150878906, 38.903141021728516, 59.60272979736328, 82.25714111328125, -28.427650451660156, -55.338165283203125, 93.63896179199219, 75.83024597167969, 146.13424682617188, 71.99620056152344, 156.7096405029297, -3.446338653564453, 11.680927276611328, 120.46160888671875, 36.065765380859375, 88.86251831054688, 225.90478515625, 66.66471862792969, 50.64100646972656, 62.038551330566406, 0.7301826477050781, 50.93275451660156, 3.9790191650390625, 139.27899169921875, 166.98129272460938, 66.23656463623047, -72.14166259765625, 17.56421661376953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000410.npy"}
|
|
{"epoch": 0.6198034769463341, "step": 411, "batch_size": 64, "mean": 64.17971801757812, "std": 92.57835388183594, "min": -209.34872436523438, "p10": -44.728845977783195, "median": 54.16144943237305, "p90": 178.10616912841797, "max": 315.4562683105469, "pos_frac": 0.734375, "sample": [-14.584877014160156, 157.69381713867188, 137.5013427734375, 172.95738220214844, 100.95247650146484, -23.953102111816406, 37.47096252441406, -209.34872436523438, 124.69605255126953, 7.69549560546875, 91.1674575805664, 199.13204956054688, 106.94019317626953, 94.26969909667969, 126.73291015625, 60.76433563232422, 90.35874938964844, -53.23664093017578, 72.31487274169922, 67.23513793945312, -7.853477478027344, 193.45399475097656, 194.27105712890625, 17.596162796020508, -48.99845886230469, 149.9679718017578, 4.443580627441406, 18.34811782836914, -61.239017486572266, -47.95893859863281, 25.767822265625, -19.412195205688477, -19.216636657714844, 47.558563232421875, 106.65672302246094, 45.08352279663086, -7.737459182739258, 161.0382080078125, 18.300048828125, 315.4562683105469, 172.90850830078125, 42.859046936035156, -27.052978515625, 178.1958770751953, 200.65838623046875, 177.8968505859375, -59.003326416015625, 42.87046813964844, -11.987415313720703, -72.32411193847656, 28.542856216430664, -33.373870849609375, 174.139404296875, 143.6857147216797, 86.02088165283203, 107.31707000732422, 117.27210235595703, 29.176095962524414, 238.61941528320312, 63.406890869140625, -37.19196319580078, 0.1884918212890625, 0.5541534423828125, 111.83832550048828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000411.npy"}
|
|
{"epoch": 0.6213151927437641, "step": 412, "batch_size": 64, "mean": 58.968833923339844, "std": 81.60546875, "min": -116.29402160644531, "p10": -40.79602317810058, "median": 49.26806831359863, "p90": 173.02437744140627, "max": 224.22633361816406, "pos_frac": 0.765625, "sample": [-70.5824966430664, 154.75099182128906, 57.523353576660156, 183.4659423828125, 38.87114715576172, 129.85421752929688, 46.62373352050781, 174.16287231445312, -80.07949829101562, -1.6789817810058594, 62.12841796875, 6.907707214355469, 89.201416015625, 170.36788940429688, 1.2109146118164062, -36.767738342285156, 154.72421264648438, -116.29402160644531, 49.23981475830078, 2.122711181640625, 182.310791015625, 134.8578338623047, 30.576416015625, 64.97158813476562, 96.27932739257812, -81.78678131103516, -0.52557373046875, 34.38947677612305, 28.565250396728516, 52.826385498046875, -6.098363876342773, 224.22633361816406, 3.2916316986083984, 210.61607360839844, -38.35099792480469, 26.935821533203125, 81.986572265625, -8.268306732177734, 152.57748413085938, 1.4975357055664062, -29.95908546447754, 9.832809448242188, 107.58863830566406, -51.87740707397461, 22.36947250366211, 194.42401123046875, 117.31024169921875, 138.41766357421875, 186.0658721923828, -63.24002456665039, 89.33477020263672, 108.55572509765625, 60.680973052978516, 20.778968811035156, 49.296321868896484, 154.07276916503906, -6.975214004516602, 77.72541046142578, -41.84389114379883, 71.60974884033203, 161.1351318359375, 10.87183952331543, 155.88363647460938, 25.3157958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000412.npy"}
|
|
{"epoch": 0.6228269085411943, "step": 413, "batch_size": 64, "mean": 54.47590255737305, "std": 107.49713134765625, "min": -201.25328063964844, "p10": -86.50793151855468, "median": 60.5157356262207, "p90": 182.93376922607422, "max": 365.02099609375, "pos_frac": 0.703125, "sample": [13.244815826416016, -80.43949890136719, 115.00724029541016, 73.31278228759766, -14.462547302246094, -27.500015258789062, -1.8846588134765625, 185.25428771972656, -55.968719482421875, 253.68136596679688, -187.56951904296875, 176.00869750976562, 120.88391876220703, -53.22956848144531, -6.7537078857421875, 19.90972900390625, -79.93243408203125, 156.22947692871094, -1.0862197875976562, 10.307397842407227, 204.11160278320312, 95.84397888183594, 66.85328674316406, 62.91615676879883, 125.15185546875, 48.25115203857422, 365.02099609375, 84.984619140625, -10.062934875488281, 40.19989776611328, 88.88525390625, 162.73658752441406, 91.30110168457031, 55.14588928222656, 184.59449768066406, -114.57037353515625, 114.97102355957031, 12.292007446289062, 81.84976959228516, 37.63916015625, -133.03524780273438, 170.15945434570312, 172.75042724609375, -97.9738998413086, 55.398895263671875, -89.10868835449219, -201.25328063964844, 79.58393096923828, 81.11851501464844, 6.482395172119141, -39.878509521484375, -90.29702758789062, 216.53164672851562, 3.3262672424316406, 69.778076171875, 152.1344757080078, 58.11531448364258, 182.5478057861328, 29.41921615600586, 73.46776580810547, -74.61061096191406, 139.11683654785156, 183.09918212890625, 126.45648193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000413.npy"}
|
|
{"epoch": 0.6243386243386243, "step": 414, "batch_size": 64, "mean": 65.86166381835938, "std": 86.13009643554688, "min": -148.95230102539062, "p10": -35.378626251220695, "median": 63.25679016113281, "p90": 186.07837524414066, "max": 218.25961303710938, "pos_frac": 0.765625, "sample": [154.44683837890625, 5.0071258544921875, 110.306884765625, 121.51077270507812, 37.71856689453125, 52.00574493408203, -148.95230102539062, 117.61927795410156, 105.6659927368164, 24.342788696289062, 73.86459350585938, 161.88246154785156, 146.19683837890625, -56.51171112060547, 218.25961303710938, 45.32670211791992, -110.44139099121094, -47.198829650878906, 29.482505798339844, 105.4115982055664, -0.8008861541748047, 72.86561584472656, 194.984130859375, -29.02820587158203, 189.3540496826172, 66.32809448242188, 69.05992126464844, 8.667839050292969, 98.29771423339844, 111.00588989257812, 178.4351348876953, 40.78904724121094, 118.80376434326172, -38.10023498535156, 208.80015563964844, 96.01248931884766, 155.5445556640625, 47.10565185546875, 60.18548583984375, 159.8374481201172, 30.190933227539062, -23.505531311035156, 9.844146728515625, -3.127134323120117, 114.61883544921875, -25.757293701171875, 8.494163513183594, 113.28176879882812, -146.44534301757812, 69.81366729736328, 197.0323486328125, -8.286872863769531, -3.1198959350585938, 37.92892074584961, 177.0738525390625, 197.72027587890625, 38.63201141357422, 17.888870239257812, 147.577880859375, 122.14517211914062, -40.98616027832031, -2.205717086791992, 26.463375091552734, 205.7821044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000414.npy"}
|
|
{"epoch": 0.6258503401360545, "step": 415, "batch_size": 64, "mean": 55.42988586425781, "std": 93.54187774658203, "min": -153.58743286132812, "p10": -54.45312499999999, "median": 40.3701171875, "p90": 172.45467071533204, "max": 319.472412109375, "pos_frac": 0.734375, "sample": [-12.547250747680664, 123.07015228271484, 19.398813247680664, 23.93087387084961, 6.481540679931641, 17.340660095214844, 148.02645874023438, 187.60519409179688, -64.83236694335938, 154.34152221679688, 105.19939422607422, -57.420989990234375, 153.90069580078125, 173.8876495361328, -6.97125244140625, 176.7180938720703, 198.570068359375, 106.64686584472656, 107.0645751953125, -2.147695541381836, -17.849411010742188, 90.61735534667969, 113.27263641357422, -2.4678955078125, 40.53875732421875, 169.11105346679688, -132.65274047851562, 10.201988220214844, 68.55851745605469, 144.77084350585938, -43.89973449707031, 146.9799041748047, 28.150705337524414, 6.688926696777344, -148.95748901367188, 319.472412109375, 113.84532928466797, 106.09104919433594, 166.5366973876953, -47.528106689453125, 112.09229278564453, -133.9995574951172, -74.65440368652344, -153.58743286132812, 26.771087646484375, 87.60247039794922, -12.196022033691406, 40.20147705078125, -4.986549377441406, 6.741243362426758, 13.636175155639648, 56.809730529785156, 50.2506103515625, 31.88479232788086, 17.641250610351562, 84.51431274414062, -2.950315475463867, 142.99612426757812, 119.68069458007812, 6.002586364746094, 7.005653381347656, 53.868553161621094, 185.4118194580078, 197.03231811523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000415.npy"}
|
|
{"epoch": 0.6273620559334845, "step": 416, "batch_size": 64, "mean": 46.36594009399414, "std": 95.34080505371094, "min": -168.2784881591797, "p10": -54.558841705322266, "median": 32.195655822753906, "p90": 177.5502166748047, "max": 355.1901550292969, "pos_frac": 0.640625, "sample": [-0.27562713623046875, -139.85205078125, -10.839038848876953, 86.37727355957031, 38.77880096435547, 175.63632202148438, -53.41780090332031, -33.010162353515625, 178.27590942382812, 188.9349822998047, 42.67559814453125, 181.81834411621094, 355.1901550292969, 21.164443969726562, -3.980915069580078, 62.1065673828125, -2.0314388275146484, 110.60565948486328, -168.2784881591797, -29.384979248046875, -6.454622268676758, 122.31773376464844, 32.546142578125, 166.10032653808594, -0.8367767333984375, 133.95074462890625, 101.33575439453125, 180.074462890625, 215.5623779296875, 58.079917907714844, 46.437225341796875, 127.67161560058594, -34.39189147949219, -58.09929656982422, 2.5902271270751953, -6.812004089355469, -90.92062377929688, -119.61824035644531, -1.6320209503173828, 3.789356231689453, -34.67323684692383, 31.845169067382812, -22.254135131835938, 5.0459747314453125, 94.24591064453125, -31.61269760131836, 118.07059478759766, -123.89804077148438, -3.7718753814697266, 117.49363708496094, 62.61244201660156, 4.834955215454102, -55.04785919189453, 116.15666198730469, 0.10955047607421875, 29.298847198486328, 10.905738830566406, 57.51994323730469, 90.38814544677734, 48.762184143066406, 175.85693359375, 135.998779296875, 186.9152374267578, 80.43329620361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000416.npy"}
|
|
{"epoch": 0.6288737717309146, "step": 417, "batch_size": 64, "mean": 63.543800354003906, "std": 103.52922821044922, "min": -319.33270263671875, "p10": -43.26956176757812, "median": 69.18189239501953, "p90": 176.00367736816406, "max": 323.5595703125, "pos_frac": 0.765625, "sample": [35.35157775878906, 7.5365447998046875, -22.99059295654297, -182.12930297851562, 187.16860961914062, 137.98434448242188, -319.33270263671875, 175.94461059570312, 68.96246337890625, 93.60054016113281, 323.5595703125, 54.88096618652344, 27.527565002441406, 68.59720611572266, 112.75543975830078, -100.32965087890625, 172.8058624267578, 140.8502197265625, -1.9322052001953125, 73.4048843383789, 104.62725830078125, 88.80679321289062, 80.30290222167969, 58.70204162597656, 76.80731964111328, -45.605712890625, 181.96856689453125, 55.52162170410156, 28.354732513427734, 170.25094604492188, -64.68547821044922, 176.02899169921875, 179.12506103515625, -129.9300994873047, 178.96380615234375, -37.81854248046875, 47.58970642089844, -15.354530334472656, 164.52447509765625, 101.64322662353516, 174.684814453125, 9.079109191894531, 172.89358520507812, 89.09418487548828, -85.13662719726562, 51.42665100097656, -1.2264766693115234, 61.21886444091797, 1.178924560546875, 70.65980529785156, 33.54345703125, 157.83843994140625, 6.9048614501953125, -28.762359619140625, 96.1039810180664, -33.16291809082031, 97.92526245117188, 23.843402862548828, 89.29972839355469, -0.8233089447021484, 144.05178833007812, 69.40132141113281, 159.16775512695312, 253.55984497070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000417.npy"}
|
|
{"epoch": 0.6303854875283447, "step": 418, "batch_size": 64, "mean": 52.023521423339844, "std": 98.77111053466797, "min": -193.6705322265625, "p10": -50.89808883666992, "median": 43.275238037109375, "p90": 181.35126342773438, "max": 374.6317138671875, "pos_frac": 0.6875, "sample": [99.72453308105469, 140.34426879882812, 121.0693130493164, 76.41677856445312, 64.16433715820312, 188.02398681640625, 19.79718780517578, 104.1948471069336, 188.65521240234375, -51.25049591064453, 167.57357788085938, 212.3207550048828, -17.4542236328125, 161.53741455078125, 62.37952423095703, -92.84819793701172, 167.93865966796875, 8.861442565917969, 143.2510223388672, -5.051002502441406, 0.12441253662109375, 374.6317138671875, 88.3976058959961, -34.28455352783203, 194.9263916015625, 27.71057891845703, -43.3973388671875, 4.062339782714844, 29.834941864013672, -50.0758056640625, -193.6705322265625, -132.40301513671875, 198.30873107910156, 48.89900207519531, 181.66543579101562, -18.835586547851562, 11.123958587646484, -7.9904937744140625, 9.851699829101562, 12.779487609863281, -56.400596618652344, -47.101356506347656, -92.55006408691406, -11.973978042602539, 113.43498229980469, -23.37200927734375, -27.23345184326172, 38.259063720703125, 10.020635604858398, -10.882652282714844, 54.54589080810547, 140.4349365234375, 180.61819458007812, 136.0809783935547, 12.009315490722656, 48.291412353515625, -100.9000244140625, 71.41990661621094, 107.34170532226562, 130.03414916992188, 105.32987213134766, -49.44886779785156, 82.28707885742188, 57.95225524902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000418.npy"}
|
|
{"epoch": 0.6318972033257747, "step": 419, "batch_size": 64, "mean": 72.92339324951172, "std": 99.44164276123047, "min": -159.00193786621094, "p10": -27.480504226684562, "median": 42.358543395996094, "p90": 204.01467590332038, "max": 292.4947509765625, "pos_frac": 0.84375, "sample": [-20.275936126708984, 292.4947509765625, 12.191337585449219, 11.370185852050781, -84.42314147949219, 6.8199615478515625, 190.80813598632812, 18.609161376953125, 12.902885437011719, 15.990787506103516, -159.00193786621094, 163.1234588623047, 230.57325744628906, 126.68910217285156, -51.441558837890625, 38.61466979980469, 73.34567260742188, 43.2186279296875, 209.67462158203125, 14.382339477539062, 70.34352111816406, 109.40300750732422, -30.56817626953125, 121.80421447753906, 9.434738159179688, 5.5735015869140625, 244.70254516601562, -38.30763244628906, 283.48199462890625, 105.22352600097656, 43.48112869262695, -80.1802978515625, -19.475589752197266, 180.42324829101562, 282.976318359375, 105.88055419921875, 152.81655883789062, 6.508934020996094, 15.996490478515625, 90.9283447265625, 0.7751216888427734, 188.70834350585938, 23.914749145507812, 13.237945556640625, 173.7261199951172, 10.221786499023438, 71.42142486572266, 35.76591110229492, 21.033435821533203, 182.68409729003906, 178.2036590576172, 0.3221015930175781, 37.34963607788086, 41.49845886230469, 153.08656311035156, 12.315759658813477, 132.82504272460938, 139.78857421875, 234.3795928955078, -9.130401611328125, 96.44123840332031, 48.94659423828125, -111.62169647216797, 185.08981323242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000419.npy"}
|
|
{"epoch": 0.6334089191232048, "step": 420, "batch_size": 64, "mean": 37.7269401550293, "std": 102.91008758544922, "min": -226.48220825195312, "p10": -79.13648834228515, "median": 16.2681303024292, "p90": 190.8931121826172, "max": 289.00775146484375, "pos_frac": 0.625, "sample": [-6.62945556640625, -28.677494049072266, 129.39527893066406, 21.94725799560547, 126.9862289428711, -153.2258758544922, 23.729232788085938, 5.381511688232422, -6.011253356933594, -174.45291137695312, -23.950897216796875, 86.24381256103516, -4.481201171875, 142.92672729492188, 8.782379150390625, 190.94058227539062, 227.10946655273438, 115.22244262695312, 190.7823486328125, 153.7957763671875, 10.963628768920898, -69.70388793945312, 3.1958789825439453, 61.709930419921875, 78.44418334960938, 174.10052490234375, -98.29493713378906, -91.10395050048828, 87.6308822631836, 7.231742858886719, -106.81637573242188, 31.324325561523438, -56.06153106689453, 43.27019119262695, -1.9798164367675781, -83.17903137207031, -38.244056701660156, 0.8886489868164062, 208.13327026367188, 21.5726318359375, -22.752052307128906, -226.48220825195312, -60.891395568847656, -14.595169067382812, 162.2369384765625, 67.76886749267578, 5.666553497314453, 194.25767517089844, -5.02764892578125, 84.63330078125, 204.69366455078125, 37.955894470214844, 139.0301513671875, 2.057706832885742, 27.82827377319336, 131.0379180908203, 289.00775146484375, 23.21923065185547, -6.367465972900391, 30.675674438476562, -5.995719909667969, 222.53427124023438, -43.1949462890625, -31.66927719116211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000420.npy"}
|
|
{"epoch": 0.6349206349206349, "step": 421, "batch_size": 64, "mean": 56.56682586669922, "std": 115.666015625, "min": -193.00543212890625, "p10": -107.33675994873045, "median": 46.80419158935547, "p90": 198.2625701904297, "max": 325.5990295410156, "pos_frac": 0.75, "sample": [-133.68699645996094, 207.70211791992188, 8.119132995605469, 173.0213623046875, 159.08578491210938, 15.63232421875, 114.45968627929688, 31.51715087890625, 7.159355163574219, 325.5990295410156, 255.03749084472656, -83.531494140625, 188.00439453125, 80.11688995361328, -38.38376235961914, 64.59971618652344, 32.56878662109375, -37.64027404785156, 195.8226318359375, 117.3134765625, 114.36448669433594, 132.33306884765625, -1.0397930145263672, 7.266227722167969, 89.76536560058594, 1.9667167663574219, 152.73129272460938, -186.5260467529297, -193.00543212890625, -74.90292358398438, 7.161088943481445, 173.55418395996094, 162.06195068359375, 13.376577377319336, 86.90770721435547, 201.43157958984375, 203.73829650878906, 148.93310546875, 47.556129455566406, 3.333263397216797, 180.3867645263672, 24.173229217529297, 88.61434936523438, -174.49005126953125, -117.53901672363281, 113.18303680419922, -1.9660186767578125, 46.05225372314453, -129.10931396484375, 25.639663696289062, 283.588134765625, 199.30825805664062, -5.318458557128906, 51.32203674316406, 43.871368408203125, -166.39797973632812, 26.533546447753906, 80.96179962158203, 116.68879699707031, -82.68407440185547, 176.1799774169922, 98.90064239501953, -37.263954162597656, 6.118099212646484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000421.npy"}
|
|
{"epoch": 0.636432350718065, "step": 422, "batch_size": 64, "mean": 72.49601745605469, "std": 89.89511108398438, "min": -102.21551513671875, "p10": -39.19666595458984, "median": 55.23680305480957, "p90": 189.30946197509772, "max": 289.1493225097656, "pos_frac": 0.796875, "sample": [197.82916259765625, 289.1493225097656, -64.04744720458984, -5.349552154541016, 171.1595458984375, -14.301910400390625, 160.62454223632812, -99.49220275878906, 125.1865234375, 219.81179809570312, 13.056961059570312, 38.290260314941406, 19.83879852294922, 49.644927978515625, 221.97608947753906, 31.691539764404297, 165.8373260498047, 197.08799743652344, -54.37205505371094, 150.12277221679688, 85.16473388671875, -102.21551513671875, 5.618259429931641, -0.3187751770019531, 132.46058654785156, 159.8936767578125, 145.62545776367188, 138.3134307861328, 240.66119384765625, 105.272705078125, 71.87611389160156, 94.72221374511719, 33.516056060791016, -6.003175735473633, 52.06803894042969, 51.29793167114258, 118.59397888183594, 16.345552444458008, 163.07522583007812, 26.766876220703125, -31.951255798339844, -72.62240600585938, 164.94296264648438, 129.115234375, 15.5938720703125, -18.454063415527344, 13.810516357421875, 140.54400634765625, 1.7966766357421875, 58.40556716918945, 63.65657424926758, 233.63299560546875, 23.1614933013916, -42.301841735839844, 14.576885223388672, 105.85084533691406, 13.327047348022461, -71.02091217041016, 17.967069625854492, 90.02312469482422, 162.2379913330078, 91.1043472290039, 41.87849426269531, 147.9907989501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000422.npy"}
|
|
{"epoch": 0.6379440665154951, "step": 423, "batch_size": 64, "mean": 77.68196105957031, "std": 96.78023529052734, "min": -110.5324478149414, "p10": -16.94242706298828, "median": 57.19207954406738, "p90": 206.5298294067383, "max": 287.2823486328125, "pos_frac": 0.765625, "sample": [44.943634033203125, -5.582099914550781, 184.09934997558594, -97.44520568847656, 193.8083038330078, 3.454832077026367, 74.71884155273438, -36.37134552001953, 175.65708923339844, 41.58977508544922, -13.903030395507812, 167.65618896484375, -4.868499755859375, -54.879188537597656, 1.427490234375, 10.695663452148438, 21.993804931640625, -0.8941802978515625, 182.74998474121094, -92.79175567626953, 116.63919067382812, 118.91259002685547, -19.720291137695312, -5.898561477661133, 72.51669311523438, 255.4794158935547, 202.85113525390625, 10.28890609741211, -110.5324478149414, 136.15426635742188, 264.7628173828125, 154.7218017578125, 136.81568908691406, 133.9453582763672, 92.8813705444336, 51.1134033203125, 45.3529052734375, 235.9660186767578, 205.0780792236328, 207.15200805664062, 27.017616271972656, 10.897628784179688, 6.6767120361328125, 121.72128295898438, 149.073486328125, 144.91293334960938, 180.651611328125, 77.53706359863281, -11.182136535644531, 70.87882995605469, 287.2823486328125, 106.02308654785156, 11.443857192993164, 16.683549880981445, 130.63247680664062, 218.139404296875, -10.17218017578125, 2.2999114990234375, -18.245025634765625, 36.78883361816406, 257.50341796875, 4.4189453125, 63.270755767822266, -13.148649215698242], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000423.npy"}
|
|
{"epoch": 0.6394557823129252, "step": 424, "batch_size": 64, "mean": 73.91978454589844, "std": 99.12294006347656, "min": -148.356689453125, "p10": -53.99358940124512, "median": 52.209999084472656, "p90": 193.6252914428711, "max": 354.9635009765625, "pos_frac": 0.796875, "sample": [-10.0111083984375, 223.40451049804688, 45.45466613769531, 148.5393829345703, -53.71736145019531, 159.97142028808594, 354.9635009765625, 193.30130004882812, -111.52381896972656, -74.63966369628906, 27.486804962158203, 17.670793533325195, 77.45077514648438, -148.356689453125, 159.1686553955078, 13.638511657714844, 183.4911346435547, -54.57050323486328, 51.9998779296875, -6.3387908935546875, -27.01527976989746, 92.42766571044922, 137.77096557617188, 44.487998962402344, -54.11197280883789, 183.38995361328125, 241.90478515625, 12.956008911132812, 203.01998901367188, 36.51216125488281, 114.90540313720703, 118.46158599853516, 23.124107360839844, 54.18608856201172, 28.482666015625, 89.37666320800781, 151.0200958251953, 234.3538360595703, 159.67532348632812, 97.40459442138672, 123.73589324951172, 29.854774475097656, 9.405887603759766, 188.226318359375, 24.775611877441406, 193.76414489746094, 143.77438354492188, -1.3702449798583984, -84.0553970336914, 6.377004623413086, 179.19955444335938, 52.96650695800781, 227.67459106445312, 191.42941284179688, 12.029502868652344, 31.154319763183594, 52.42012023925781, -87.77276611328125, 51.11029052734375, 135.9329071044922, 8.143730163574219, 35.172569274902344, -5.308067321777344, 72.50950622558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000424.npy"}
|
|
{"epoch": 0.6409674981103552, "step": 425, "batch_size": 64, "mean": 57.63554000854492, "std": 93.865478515625, "min": -148.4365234375, "p10": -49.63398780822754, "median": 38.75452995300293, "p90": 181.55657501220702, "max": 277.1958923339844, "pos_frac": 0.734375, "sample": [153.57080078125, 151.536376953125, 19.03783416748047, 13.611557006835938, 169.13949584960938, -29.889049530029297, 225.90170288085938, -4.184055328369141, 111.94519805908203, 107.08335876464844, 143.17364501953125, -78.56379699707031, 41.43874740600586, -48.996368408203125, -48.4563102722168, 147.00051879882812, 69.32796478271484, 195.74781799316406, 20.1226806640625, 82.57400512695312, -129.00665283203125, 62.733978271484375, 1.0458793640136719, 72.03119659423828, 146.4037322998047, 21.67993927001953, 0.025827407836914062, 210.65408325195312, 10.019386291503906, 190.58212280273438, 168.5840301513672, -47.748111724853516, 111.7198486328125, -2.9534683227539062, 90.17483520507812, 29.565765380859375, 44.520660400390625, 181.40350341796875, 35.889957427978516, 24.561824798583984, 198.96905517578125, -125.91551971435547, 64.11782836914062, 91.75, 35.14593505859375, -148.4365234375, 49.626373291015625, 73.22172546386719, 31.208099365234375, -14.623306274414062, 181.62217712402344, -15.59609603881836, -81.93055725097656, 174.66729736328125, 12.309501647949219, 2.083293914794922, 153.6190948486328, 36.0703125, -9.963539123535156, 277.1958923339844, -49.90725326538086, -4.578056335449219, 153.7600860595703, -58.75153350830078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000425.npy"}
|
|
{"epoch": 0.6424792139077853, "step": 426, "batch_size": 64, "mean": 35.37261199951172, "std": 109.332275390625, "min": -284.5174560546875, "p10": -91.2609161376953, "median": 19.849387168884277, "p90": 162.4817657470703, "max": 302.3173828125, "pos_frac": 0.625, "sample": [-12.125301361083984, -1.0885353088378906, 135.41366577148438, 163.29049682617188, 15.531608581542969, 148.4396209716797, 19.269210815429688, 183.1539306640625, 27.219993591308594, 71.04861450195312, -27.733985900878906, -11.138137817382812, -7.603553771972656, -136.19027709960938, 130.33436584472656, 33.45088195800781, 93.76615905761719, 24.64422607421875, 56.14720916748047, -28.57501983642578, -186.66571044921875, -33.929046630859375, 55.69233703613281, 86.38786315917969, -12.108497619628906, 14.197341918945312, 20.429563522338867, -255.5284881591797, 125.45687866210938, -1.0667495727539062, 235.426513671875, 109.3675765991211, 159.68466186523438, -19.75849723815918, -284.5174560546875, 45.82790756225586, -10.918807983398438, 1.4205303192138672, -3.9974517822265625, 302.3173828125, 176.8492431640625, 243.77577209472656, -115.07002258300781, -122.365234375, 5.4164581298828125, 218.33831787109375, -20.241531372070312, 4.683788299560547, -1.5763359069824219, -80.85403442382812, 79.947998046875, 104.5771713256836, -17.945938110351562, 109.69892883300781, 65.31044006347656, -78.85393524169922, 19.103351593017578, 131.48297119140625, 40.889404296875, -95.72100830078125, 122.12161254882812, 88.50604248046875, 160.5947265625, 0.20607376098632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000426.npy"}
|
|
{"epoch": 0.6439909297052154, "step": 427, "batch_size": 64, "mean": 58.67486572265625, "std": 106.798095703125, "min": -170.24154663085938, "p10": -82.7486343383789, "median": 48.61958885192871, "p90": 188.9495056152344, "max": 364.5653076171875, "pos_frac": 0.71875, "sample": [110.88127136230469, 13.922170639038086, -16.071109771728516, -81.16899871826172, -99.2874755859375, 144.16709899902344, 182.0621337890625, 48.7081184387207, -19.100723266601562, -96.54590606689453, 183.11648559570312, -23.208744049072266, -83.42562103271484, -107.2333984375, 48.53105926513672, 209.21426391601562, 102.07108306884766, 73.36978149414062, -10.657432556152344, -40.84602355957031, 0.1315288543701172, 69.59083557128906, 166.16598510742188, 65.26481628417969, 23.938629150390625, 155.5414276123047, 142.79861450195312, 151.99188232421875, 10.261787414550781, 34.816444396972656, 191.44937133789062, 20.98663330078125, 141.80532836914062, 57.986305236816406, 32.42711639404297, 181.4420166015625, 40.126617431640625, -162.92147827148438, 229.4901123046875, 195.31814575195312, -2.5017471313476562, 43.65559387207031, 73.94305419921875, 32.620147705078125, -141.9702911376953, -57.56659698486328, 125.47868347167969, 93.117919921875, 47.959259033203125, -170.24154663085938, 181.419921875, 272.0901184082031, -0.6808013916015625, 80.54436492919922, 86.72443389892578, 42.41319274902344, 93.06626892089844, 66.15457153320312, 11.055221557617188, -31.52166748046875, 98.10900115966797, -60.34809875488281, 364.5653076171875, 219.99481201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000427.npy"}
|
|
{"epoch": 0.6455026455026455, "step": 428, "batch_size": 64, "mean": 65.74195098876953, "std": 104.5110855102539, "min": -182.67234802246094, "p10": -43.00664329528807, "median": 63.61908721923828, "p90": 197.49234466552736, "max": 371.3601989746094, "pos_frac": 0.6875, "sample": [-112.15116882324219, 93.56389617919922, 204.84228515625, -0.9315452575683594, 3.189422607421875, 13.047210693359375, 36.782901763916016, -136.65557861328125, -57.38496398925781, 78.53939056396484, 194.56988525390625, -30.54460906982422, 139.7867431640625, -19.42559051513672, 167.1788330078125, 71.7418212890625, 181.81781005859375, 26.87866973876953, -8.602027893066406, 62.11714172363281, 209.1072540283203, -3.6880531311035156, 4.730018615722656, -1.5620574951171875, 212.88661193847656, 106.97343444824219, 45.940185546875, -182.67234802246094, 103.48277282714844, -168.16104125976562, 134.55520629882812, -4.591667175292969, 17.428783416748047, 37.49888229370117, 80.5860824584961, 199.617919921875, 168.70947265625, 147.97364807128906, 72.20624542236328, -1.145986557006836, 225.6421356201172, 371.3601989746094, 96.09986114501953, 154.55303955078125, -84.99156188964844, -7.316497802734375, 89.35267639160156, 72.53050994873047, -1.2853717803955078, -9.303466796875, 27.731483459472656, 14.787117004394531, -3.1949996948242188, 0.21532058715820312, 172.65994262695312, 188.48580932617188, 198.7448272705078, 65.12103271484375, 128.25608825683594, 161.56015014648438, 194.44744873046875, 142.1204376220703, -29.979759216308594, -48.34751510620117], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000428.npy"}
|
|
{"epoch": 0.6470143613000756, "step": 429, "batch_size": 64, "mean": 50.33329772949219, "std": 94.89295959472656, "min": -180.228759765625, "p10": -63.75774574279784, "median": 52.460296630859375, "p90": 167.95776214599613, "max": 212.55078125, "pos_frac": 0.734375, "sample": [147.9941864013672, -135.71591186523438, 2.984516143798828, -40.899925231933594, 135.61575317382812, 127.3272933959961, 53.817806243896484, 36.85442352294922, -19.84902572631836, 58.28245544433594, 53.107154846191406, -180.228759765625, 88.69200897216797, 44.78547668457031, -7.0840911865234375, 131.58177185058594, -159.33456420898438, 43.37641906738281, 0.35400390625, 111.22771453857422, 142.46217346191406, -39.97001266479492, 145.00527954101562, 155.5632781982422, 170.04647827148438, 172.2037353515625, 194.87289428710938, 5.0533447265625, 56.58940887451172, -27.824665069580078, -69.73516082763672, -49.81044387817383, 15.203376770019531, -17.37944793701172, 97.9127426147461, -1.4578704833984375, -86.84548950195312, 90.90084075927734, -5.091026306152344, 7.501091003417969, 178.6732635498047, 138.84713745117188, 33.65679168701172, 163.08409118652344, 157.3525390625, -161.63113403320312, -134.55458068847656, 13.12905502319336, 6.341072082519531, 158.8970947265625, 22.494911193847656, 51.813438415527344, 27.142250061035156, -20.505550384521484, 67.60226440429688, 68.88357543945312, 212.55078125, 79.68219757080078, 206.54222106933594, 197.267578125, 12.952873229980469, 129.31492614746094, 73.15916442871094, 90.54590606689453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000429.npy"}
|
|
{"epoch": 0.6485260770975056, "step": 430, "batch_size": 64, "mean": 82.31600952148438, "std": 98.59005737304688, "min": -261.8578186035156, "p10": -30.325248908996578, "median": 80.96505355834961, "p90": 206.54207916259767, "max": 227.44949340820312, "pos_frac": 0.828125, "sample": [53.20062255859375, 34.47755432128906, -31.993255615234375, 50.723731994628906, 8.274574279785156, 181.42015075683594, -0.8469505310058594, 165.62872314453125, 48.69403839111328, 129.96041870117188, -110.73307800292969, 146.60943603515625, 203.28488159179688, 9.59471321105957, 180.54615783691406, 101.9898452758789, 111.12512969970703, 67.97636413574219, 208.06642150878906, -26.4332332611084, 90.27378845214844, 227.44949340820312, 182.94076538085938, 132.93724060058594, -0.06368255615234375, 184.3246612548828, 50.67860412597656, 225.61727905273438, 140.2591094970703, 188.08839416503906, 94.19895935058594, 20.89910888671875, 88.31201171875, 205.37600708007812, -57.93240737915039, 207.0418243408203, 221.20809936523438, 134.39862060546875, -109.32417297363281, 155.25582885742188, 6.7598876953125, 122.88214874267578, 42.016963958740234, 73.61809539794922, 24.402053833007812, 159.89466857910156, 28.999977111816406, 0.9925594329833984, -261.8578186035156, 12.511878967285156, -9.596954345703125, 179.91366577148438, 1.2410202026367188, 163.79342651367188, 57.387725830078125, 214.00634765625, 211.9147186279297, 136.05821228027344, 164.44744873046875, -42.87290954589844, 69.31344604492188, -49.66606521606445, 19.850738525390625, 28.7080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000430.npy"}
|
|
{"epoch": 0.6500377928949358, "step": 431, "batch_size": 64, "mean": 58.892181396484375, "std": 94.3077392578125, "min": -181.51988220214844, "p10": -37.96807403564453, "median": 41.03378677368164, "p90": 185.83954010009765, "max": 242.94058227539062, "pos_frac": 0.734375, "sample": [-165.328125, 149.32289123535156, 68.33348083496094, 40.773399353027344, 8.33852767944336, 142.13003540039062, 158.44821166992188, 151.414306640625, 214.2841033935547, -5.857843399047852, 10.767402648925781, 185.89292907714844, 185.7149658203125, -13.681137084960938, 43.092674255371094, 20.071157455444336, 242.94058227539062, -1.207427978515625, 218.81198120117188, 48.8603515625, 72.49002838134766, -181.51988220214844, 15.442268371582031, 84.51332092285156, -40.31779479980469, 182.0865020751953, 66.77847290039062, -82.26663208007812, 42.280731201171875, -6.92750358581543, 68.2798080444336, -15.029850006103516, 178.20584106445312, 182.05267333984375, -106.99700927734375, -10.340858459472656, 194.1912841796875, 117.69834899902344, -39.166046142578125, -35.17280578613281, -45.080841064453125, 16.121917724609375, -2.2808914184570312, 144.5039520263672, 2.9665603637695312, 25.003440856933594, 182.10047912597656, 96.738037109375, 3.5511016845703125, 219.28646850585938, 27.682373046875, 54.459529876708984, 11.479522705078125, 30.919818878173828, 106.05719757080078, 11.354270935058594, 35.15802001953125, -24.559852600097656, 10.80472183227539, 189.14340209960938, 176.3978271484375, 94.79644012451172, 41.29417419433594, -28.201492309570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000431.npy"}
|
|
{"epoch": 0.6515495086923658, "step": 432, "batch_size": 64, "mean": 62.6456298828125, "std": 97.67249298095703, "min": -218.43751525878906, "p10": -48.4071434020996, "median": 49.20833969116211, "p90": 190.20234985351564, "max": 228.41116333007812, "pos_frac": 0.71875, "sample": [-23.572250366210938, -38.69892883300781, -41.09593200683594, -218.43751525878906, 3.303884506225586, -3.917013168334961, 15.219406127929688, 83.9879150390625, -51.54051971435547, 180.0597686767578, 122.92817687988281, -87.26749420166016, 59.35406494140625, 58.77171325683594, 191.2267608642578, -153.41049194335938, 51.472557067871094, 30.559906005859375, 222.9470977783203, 195.68911743164062, 12.910438537597656, 114.6056900024414, 151.04852294921875, -37.464324951171875, -9.58157730102539, 195.12387084960938, -17.01653289794922, 194.11117553710938, 159.802734375, -12.353635787963867, -58.72431945800781, 40.06527328491211, 14.208818435668945, -24.226760864257812, 26.951385498046875, 86.41471862792969, 187.5467071533203, 168.6702880859375, 46.944122314453125, 146.59817504882812, -15.946247100830078, 44.78763198852539, 130.74378967285156, 30.633377075195312, 166.66561889648438, 31.905990600585938, 67.95733642578125, 187.8120574951172, 31.65334701538086, 201.68563842773438, 126.11373138427734, 119.02003479003906, 141.86148071289062, -85.49369812011719, 111.20846557617188, 228.41116333007812, -19.043052673339844, 146.6370849609375, 95.76300048828125, 16.929443359375, -78.45826721191406, 160.42001342773438, 12.45159912109375, 172.38584899902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000432.npy"}
|
|
{"epoch": 0.6530612244897959, "step": 433, "batch_size": 64, "mean": 73.48242950439453, "std": 94.48108673095703, "min": -204.51559448242188, "p10": -40.54414863586425, "median": 58.52422332763672, "p90": 184.96942901611328, "max": 223.24327087402344, "pos_frac": 0.765625, "sample": [50.04396438598633, 138.25830078125, 26.26980209350586, 59.99225997924805, -20.508888244628906, 59.438995361328125, -0.5317897796630859, 171.21502685546875, -38.51774978637695, -8.400970458984375, 35.55720520019531, 26.924400329589844, 134.07174682617188, 40.28095245361328, 184.00668334960938, -90.82999420166016, 106.21363830566406, 176.1790313720703, 153.62579345703125, -27.426776885986328, 57.60945129394531, 33.167694091796875, 8.244407653808594, 187.3041229248047, -204.51559448242188, 92.25367736816406, 161.02328491210938, 40.651161193847656, 168.79684448242188, 153.16188049316406, 37.465782165527344, 83.52471160888672, -68.56636810302734, 10.580024719238281, 31.925273895263672, -17.4713134765625, -51.328182220458984, 44.34687805175781, 142.3984375, 185.3820343017578, 223.24327087402344, 182.67684936523438, 210.23300170898438, 40.02520751953125, 168.96226501464844, -125.9974136352539, 37.031890869140625, 136.3218994140625, -41.41260528564453, 182.76516723632812, -17.64466094970703, 124.5776138305664, 188.25204467773438, -30.326820373535156, 52.00489044189453, 176.25091552734375, 151.56173706054688, 71.166259765625, 188.40377807617188, 197.4324951171875, 168.6168670654297, 35.48639678955078, 177.59495544433594, -66.16615295410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000433.npy"}
|
|
{"epoch": 0.654572940287226, "step": 434, "batch_size": 64, "mean": 81.2872085571289, "std": 110.92500305175781, "min": -223.31776428222656, "p10": -52.34114379882812, "median": 93.77168273925781, "p90": 201.6330276489258, "max": 348.03704833984375, "pos_frac": 0.796875, "sample": [-192.46054077148438, 99.30244445800781, 155.8926239013672, 48.59260559082031, 174.0500030517578, 205.7193603515625, -65.30929565429688, 208.43743896484375, -98.14106750488281, 93.08840942382812, -7.106412887573242, -47.19212341308594, -223.31776428222656, 94.4549560546875, 170.1572265625, -54.54786682128906, 139.4833526611328, 88.51264953613281, 348.03704833984375, 189.3257598876953, 120.25316619873047, 1.9407672882080078, 250.8611297607422, 9.331871032714844, -145.97653198242188, 44.12492370605469, 193.27392578125, 128.83583068847656, 2.864215850830078, 126.3316650390625, -0.5572853088378906, 110.50486755371094, 101.33161926269531, 188.12176513671875, 202.67262268066406, 188.417236328125, -18.47071075439453, 143.72018432617188, 228.48904418945312, 17.73242950439453, 199.20730590820312, 15.907546997070312, 44.83005905151367, 160.18820190429688, 26.130905151367188, 11.217155456542969, 44.094520568847656, 139.123779296875, 83.35284423828125, 172.232421875, 12.49405288696289, 211.97842407226562, -130.9811248779297, -21.213272094726562, 1.3631362915039062, 104.70569610595703, 168.9520721435547, 189.61376953125, 158.99171447753906, -1.8288421630859375, 36.76435470581055, 88.5448989868164, 196.0868682861328, 69.84322357177734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000434.npy"}
|
|
{"epoch": 0.656084656084656, "step": 435, "batch_size": 64, "mean": 60.99307632446289, "std": 100.51752471923828, "min": -169.72323608398438, "p10": -60.48953933715819, "median": 43.757158279418945, "p90": 198.31760559082034, "max": 236.41632080078125, "pos_frac": 0.671875, "sample": [118.2391128540039, 94.58976745605469, -25.871376037597656, 101.74163818359375, -122.7973861694336, 31.517189025878906, 4.6366729736328125, -0.7021331787109375, 131.81790161132812, -5.77691650390625, 204.3601837158203, 178.6392822265625, -15.12225341796875, 123.40496063232422, 180.58004760742188, 194.84625244140625, 37.970787048339844, -32.18409729003906, -21.166458129882812, 194.51019287109375, 46.86457061767578, -11.085464477539062, 177.06375122070312, 206.9630889892578, 32.19581604003906, 48.732017517089844, -29.134414672851562, 87.36514282226562, 189.40541076660156, 215.54293823242188, -21.38079071044922, 103.4473876953125, -169.72323608398438, 154.15065002441406, 40.64974594116211, 78.42655944824219, -19.623558044433594, -41.16766357421875, 142.1019287109375, 199.80532836914062, 236.41632080078125, 218.1280517578125, 185.8215789794922, 34.884056091308594, 0.7068214416503906, 56.985008239746094, 35.14125442504883, -48.724266052246094, -43.186763763427734, 201.98526000976562, -109.25680541992188, 28.569381713867188, -65.53179931640625, 106.08394622802734, 107.97154998779297, 82.37089538574219, 160.1319580078125, -69.00564575195312, -126.1939468383789, 30.442337036132812, 3.69158935546875, -79.2347412109375, 152.724853515625, -1.1965484619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000435.npy"}
|
|
{"epoch": 0.6575963718820862, "step": 436, "batch_size": 64, "mean": 49.892601013183594, "std": 103.58084869384766, "min": -193.45761108398438, "p10": -83.87596054077149, "median": 34.29534912109375, "p90": 183.07798309326174, "max": 269.13702392578125, "pos_frac": 0.703125, "sample": [102.64633178710938, -28.597469329833984, 160.5748291015625, 6.443010330200195, 114.80664825439453, 9.02586555480957, 115.16917419433594, 169.89369201660156, -115.15337371826172, 39.32964324951172, 20.689651489257812, 170.0694580078125, 29.26105499267578, 117.90530395507812, -47.81374740600586, 193.08145141601562, 150.09194946289062, 128.6807403564453, -193.45761108398438, 49.985435485839844, 12.17422103881836, 69.1020736694336, -37.56413269042969, 1.3675765991210938, 40.29875183105469, -13.851463317871094, 112.99188232421875, -0.5601024627685547, 183.84559631347656, 20.238319396972656, 79.38528442382812, -38.0845947265625, -178.84487915039062, 129.9429168701172, -42.768531799316406, 75.2506332397461, 182.218994140625, -84.09847259521484, 16.5869140625, -72.05722045898438, 164.1112060546875, 141.76687622070312, 28.203289031982422, 11.518173217773438, -95.49066162109375, 145.4933319091797, -92.99877166748047, 211.34889221191406, 106.8331298828125, 269.13702392578125, 161.38833618164062, 1.3343238830566406, 213.64187622070312, 108.44467163085938, -40.155364990234375, 10.940120697021484, 183.4461212158203, -83.35676574707031, 21.271217346191406, -1.153036117553711, -71.51419067382812, 204.7826690673828, -128.91302490234375, 74.84123992919922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000436.npy"}
|
|
{"epoch": 0.6591080876795162, "step": 437, "batch_size": 64, "mean": 59.357200622558594, "std": 118.33283996582031, "min": -290.5043029785156, "p10": -81.69401092529297, "median": 59.5167236328125, "p90": 212.13809356689455, "max": 296.1117858886719, "pos_frac": 0.640625, "sample": [296.1117858886719, 237.65414428710938, 175.96441650390625, 124.5030517578125, 69.7188949584961, -20.394702911376953, 182.2736053466797, 108.09257507324219, 228.170654296875, 88.6224365234375, 184.9271697998047, -37.92449951171875, -290.5043029785156, 77.54527282714844, -100.09400939941406, -78.80199432373047, 55.47627258300781, 107.81214904785156, 166.85238647460938, -155.8407745361328, 208.98947143554688, 238.5076904296875, 193.69187927246094, -24.610912322998047, -46.51234817504883, 86.49177551269531, 275.6458740234375, -28.390514373779297, -166.5882110595703, -82.82324981689453, -26.072921752929688, -79.05912017822266, 49.85271453857422, -21.873252868652344, -14.490715026855469, 185.74110412597656, 89.41221618652344, -34.29962158203125, 60.804237365722656, 88.81661987304688, 47.89387512207031, -10.764495849609375, -47.76631546020508, 213.4875030517578, 31.64263916015625, 181.56304931640625, 72.7619857788086, 88.86345672607422, 116.04151916503906, -134.88345336914062, 58.229209899902344, 37.120479583740234, 173.68844604492188, 157.0382080078125, -8.4979248046875, 15.352806091308594, 33.7786865234375, 138.1503143310547, -95.78530883789062, -11.591495513916016, -3.2813587188720703, 102.54342651367188, 31.7894287109375, 238.08901977539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000437.npy"}
|
|
{"epoch": 0.6606198034769464, "step": 438, "batch_size": 64, "mean": 81.26517486572266, "std": 104.62354278564453, "min": -115.2166519165039, "p10": -32.78075103759765, "median": 59.72261428833008, "p90": 188.80894317626954, "max": 480.2109375, "pos_frac": 0.765625, "sample": [223.8775177001953, 0.835296630859375, 480.2109375, -67.02635955810547, -3.1354713439941406, -56.03394317626953, -27.363147735595703, 158.48129272460938, 3.5020008087158203, 265.86151123046875, -48.888668060302734, 214.55419921875, -21.12206268310547, 15.546653747558594, 184.85107421875, 0.7011375427246094, 148.94168090820312, -115.2166519165039, 162.47000122070312, 97.1080551147461, 146.80078125, 151.28147888183594, -11.575492858886719, 50.605560302734375, 64.85845947265625, 179.75306701660156, 82.18894958496094, 43.71345138549805, 180.79327392578125, 125.5335693359375, 97.28861999511719, -43.34130859375, 127.88080596923828, 19.25147819519043, 22.518035888671875, 175.62887573242188, 190.5051727294922, -11.066373825073242, 105.73762512207031, 33.24455261230469, 134.1345977783203, 21.369247436523438, 83.24250030517578, 313.6956787109375, 137.3341522216797, 27.481903076171875, 138.61920166015625, 54.42345428466797, 17.419937133789062, -0.9146308898925781, -35.10258102416992, -6.632318496704102, 31.95029640197754, 166.57012939453125, 156.05899047851562, -93.59940338134766, 178.42398071289062, 54.586769104003906, -1.6152420043945312, 203.2482452392578, 1.8535194396972656, 44.7685546875, 147.9711456298828, 75.92754364013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000438.npy"}
|
|
{"epoch": 0.6621315192743764, "step": 439, "batch_size": 64, "mean": 70.84788513183594, "std": 101.01692199707031, "min": -255.05499267578125, "p10": -27.123155975341795, "median": 48.36017608642578, "p90": 219.97415008544925, "max": 268.08172607421875, "pos_frac": 0.75, "sample": [182.18209838867188, 12.821475982666016, 27.277387619018555, -32.73094940185547, 59.95925521850586, 177.60264587402344, 111.49563598632812, 31.84930419921875, -36.50263214111328, 236.5692596435547, 227.36058044433594, -4.955217361450195, 195.43072509765625, 14.560379028320312, 61.75566101074219, 264.1210632324219, 172.96026611328125, 26.005226135253906, 208.40286254882812, -45.64149475097656, 7.255565643310547, 126.32136535644531, 65.44551849365234, -3.9142303466796875, 184.98683166503906, 71.35735321044922, 241.67236328125, -111.56709289550781, 6.386079788208008, 203.47044372558594, -18.344724655151367, 224.9332733154297, -29.888954162597656, -6.940155029296875, 56.04619598388672, -26.011734008789062, -3.11334228515625, 15.380828857421875, -255.05499267578125, 40.98202896118164, 63.9884033203125, 39.15339660644531, 193.49525451660156, 36.111602783203125, 63.29692459106445, 188.27978515625, 61.067138671875, 227.94320678710938, 54.56489562988281, 32.124267578125, 33.14485549926758, 99.26414489746094, -27.59947967529297, 160.500732421875, 42.15545654296875, 268.08172607421875, 165.90216064453125, -25.938446044921875, -14.451622009277344, 5.6215972900390625, 25.244869232177734, 75.26692199707031, 96.9970474243164, -9.876441955566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000439.npy"}
|
|
{"epoch": 0.6636432350718064, "step": 440, "batch_size": 64, "mean": 72.67469024658203, "std": 101.89470672607422, "min": -143.02816772460938, "p10": -47.41279945373535, "median": 56.93446731567383, "p90": 191.7075164794922, "max": 337.5400085449219, "pos_frac": 0.734375, "sample": [-37.03446960449219, 82.74464416503906, 44.960693359375, 243.48455810546875, 191.7191162109375, 337.5400085449219, 192.84628295898438, -21.774383544921875, 191.5242919921875, 89.4429931640625, 10.918952941894531, 180.62841796875, 56.870033264160156, 164.66326904296875, 146.32803344726562, 56.9989013671875, 186.8343048095703, -28.46112823486328, -128.37078857421875, 57.87342834472656, 109.6875228881836, -10.859933853149414, 15.436630249023438, 170.54129028320312, 48.879241943359375, 154.38235473632812, 102.29667663574219, 184.93804931640625, -1.064065933227539, 40.110862731933594, 0.021673202514648438, 66.24285125732422, 162.990234375, -6.879657745361328, -0.5534515380859375, 52.853240966796875, 42.420860290527344, -121.42780303955078, 19.910791397094727, 199.93931579589844, 158.735107421875, 194.9591064453125, 10.988143920898438, 187.2620086669922, -46.72114944458008, 25.28705596923828, 199.30885314941406, 26.904991149902344, 175.8674774169922, -47.70922088623047, -76.4501953125, -82.84483337402344, 82.71450805664062, -40.45062255859375, 114.89754486083984, 31.112567901611328, -20.236629486083984, -57.87177276611328, 183.51116943359375, -143.02816772460938, 44.15196228027344, 191.68045043945312, 185.04827880859375, 104.45950317382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000440.npy"}
|
|
{"epoch": 0.6651549508692366, "step": 441, "batch_size": 64, "mean": 80.16014099121094, "std": 83.98716735839844, "min": -82.91409301757812, "p10": -28.711175537109362, "median": 82.05094909667969, "p90": 191.59245758056642, "max": 218.76744079589844, "pos_frac": 0.765625, "sample": [169.65679931640625, 89.49689483642578, 196.13352966308594, 209.71676635742188, -0.6147365570068359, 151.06471252441406, 171.5347900390625, -5.932777404785156, 120.93484497070312, -17.604080200195312, -48.462032318115234, 150.0072784423828, -34.842323303222656, 191.98526000976562, 49.88957595825195, 66.10211181640625, 113.07152557373047, -16.45843505859375, -38.88515853881836, 190.67591857910156, 83.66168212890625, 107.46521759033203, 166.94287109375, 173.78146362304688, -0.42038726806640625, 182.4303436279297, 1.3984565734863281, -3.2841262817382812, 218.76744079589844, 156.67874145507812, 69.55931854248047, 90.675048828125, -6.262937545776367, 101.31236267089844, 132.62875366210938, 9.493156433105469, -73.67588806152344, 187.9338836669922, 53.930572509765625, 80.44021606445312, 36.446258544921875, 175.19869995117188, 46.81305694580078, 6.932964324951172, 103.50575256347656, 74.55911254882812, 114.90005493164062, 5.713676452636719, 137.50364685058594, -33.47135925292969, 211.25592041015625, 21.096651077270508, 209.995849609375, 208.757568359375, -46.869415283203125, 39.868797302246094, 89.73181915283203, 0.34622955322265625, -10.148658752441406, 112.2869644165039, -82.91409301757812, 60.33350372314453, 170.4290008544922, 37.049964904785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000441.npy"}
|
|
{"epoch": 0.6666666666666666, "step": 442, "batch_size": 64, "mean": 77.17893981933594, "std": 97.78256225585938, "min": -184.67965698242188, "p10": -33.62414550781249, "median": 62.045040130615234, "p90": 194.29644165039068, "max": 281.5799255371094, "pos_frac": 0.765625, "sample": [7.144462585449219, 274.2114562988281, 54.17304992675781, -42.97607421875, 76.78713989257812, 281.5799255371094, 198.25311279296875, 183.2725067138672, 59.71519088745117, 168.226318359375, 58.286014556884766, 173.42356872558594, 161.02891540527344, 33.120513916015625, 154.29507446289062, 123.96328735351562, 4.5003662109375, 25.52490234375, 52.51416015625, -48.34557342529297, -88.21983337402344, 178.23876953125, 113.32923889160156, -15.06158447265625, 116.86299133300781, -2.9959182739257812, -77.9765853881836, 104.83424377441406, 9.648015975952148, 233.54417419433594, -0.6562347412109375, 83.0239486694336, 203.77001953125, 173.66403198242188, 258.8878479003906, 107.80432891845703, 36.03633117675781, -0.5248680114746094, 184.37632751464844, 62.429412841796875, 61.660667419433594, 23.31580352783203, 17.781721115112305, 15.617321014404297, -4.917181015014648, -184.67965698242188, 2.41864013671875, 213.46743774414062, -35.61601257324219, -28.976455688476562, 185.064208984375, 155.2086181640625, -91.67930603027344, 76.0090103149414, 172.78939819335938, -6.751058578491211, 76.03573608398438, 26.530517578125, 120.47552490234375, 179.39935302734375, 87.33241271972656, 181.63348388671875, -2.1834793090820312, 19.802602767944336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000442.npy"}
|
|
{"epoch": 0.6681783824640968, "step": 443, "batch_size": 64, "mean": 95.66801452636719, "std": 109.27962493896484, "min": -183.16281127929688, "p10": -20.5896499633789, "median": 97.7286376953125, "p90": 194.47905273437502, "max": 456.8019104003906, "pos_frac": 0.796875, "sample": [10.053150177001953, 167.20741271972656, 88.33311462402344, 179.26718139648438, 32.535743713378906, 67.24163818359375, 163.84873962402344, -72.16001892089844, 71.05044555664062, 134.0628204345703, -40.330848693847656, -11.05302619934082, 76.88551330566406, 184.80813598632812, 68.01132202148438, 233.80599975585938, 5.456079483032227, 147.9466552734375, 116.67801666259766, 190.83981323242188, 456.8019104003906, -183.16281127929688, -23.26470184326172, 168.66015625, 7.924398422241211, 261.5312805175781, 163.58380126953125, 107.12416076660156, 170.0223388671875, 182.22547912597656, 175.88223266601562, 142.4618682861328, 150.96449279785156, 174.31797790527344, 76.7508544921875, 196.03872680664062, 178.05084228515625, 173.64743041992188, 118.157470703125, 209.89956665039062, 9.617828369140625, 177.2405242919922, -8.209390640258789, 68.76561737060547, 183.58633422851562, 178.45338439941406, -14.347862243652344, 15.628021240234375, 9.138328552246094, 14.993650436401367, 6.078731536865234, -50.872398376464844, 179.49615478515625, -2.1955718994140625, 43.80123519897461, 69.91673278808594, -178.9368133544922, -31.223907470703125, 249.74159240722656, 75.01321411132812, -0.030788421630859375, -8.354927062988281, 139.7451171875, 273.60211181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000443.npy"}
|
|
{"epoch": 0.6696900982615268, "step": 444, "batch_size": 64, "mean": 41.3953857421875, "std": 108.96317291259766, "min": -186.0450439453125, "p10": -78.77079467773436, "median": 18.332674026489258, "p90": 192.63760223388672, "max": 291.2716369628906, "pos_frac": 0.625, "sample": [148.65328979492188, -3.242015838623047, -55.005287170410156, 2.4672412872314453, 61.0015869140625, 40.025455474853516, 110.43708038330078, -42.88816833496094, 16.13167953491211, 129.8446502685547, 8.254478454589844, 78.2892074584961, -125.73451232910156, -168.89381408691406, 82.34712219238281, 9.53643798828125, 88.47805786132812, 62.85255432128906, 65.42594909667969, -83.465087890625, 222.40609741210938, 251.58346557617188, -0.0883636474609375, -39.45716857910156, -21.448883056640625, 34.30689239501953, 9.14898681640625, -12.984853744506836, -67.81744384765625, -62.32275390625, 199.91851806640625, 13.061710357666016, 197.18093872070312, 38.882568359375, 30.67010498046875, 175.34158325195312, 52.36743927001953, 192.25770568847656, -9.582061767578125, -186.0450439453125, 42.255401611328125, 291.2716369628906, -138.86956787109375, -49.52220916748047, -5.631681442260742, 183.04519653320312, 176.15921020507812, 272.08477783203125, 192.8004150390625, 78.88221740722656, 16.1540470123291, 114.54447937011719, -3.3417396545410156, 40.50953674316406, 183.904296875, -14.946802139282227, -52.572086334228516, 20.511301040649414, -5.1227569580078125, -122.04969024658203, -178.46755981445312, 163.5146942138672, -3.6729679107666016, 5.969259262084961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000444.npy"}
|
|
{"epoch": 0.671201814058957, "step": 445, "batch_size": 64, "mean": 79.61311340332031, "std": 96.80767059326172, "min": -139.4617919921875, "p10": -47.61932754516601, "median": 90.68703079223633, "p90": 195.64578857421878, "max": 247.39218139648438, "pos_frac": 0.8125, "sample": [23.02651596069336, 247.39218139648438, 14.448417663574219, -49.65409851074219, -51.207061767578125, 33.26713562011719, 182.853515625, 185.02500915527344, 91.38092041015625, 89.47755432128906, -2.30755615234375, -128.44119262695312, -33.98968505859375, 18.991317749023438, -113.72357177734375, 164.94036865234375, 5.6999359130859375, 18.984649658203125, 1.0025634765625, 76.37828063964844, 5.910499572753906, 198.10211181640625, -139.4617919921875, 43.785186767578125, -63.429832458496094, 200.88726806640625, 1.1101760864257812, 181.62939453125, 72.99527740478516, 0.029144287109375, -55.25312423706055, -19.320585250854492, 108.05915069580078, 207.66940307617188, 181.7996826171875, 140.54676818847656, 155.63316345214844, 182.10202026367188, 103.58248138427734, -12.493431091308594, 200.82833862304688, 89.9931411743164, 56.08170700073242, 228.9683380126953, 185.0762939453125, 185.5547332763672, 236.90090942382812, 17.491853713989258, 169.24696350097656, -42.87152862548828, 139.1591339111328, 2.6701431274414062, 13.97857666015625, 117.69548797607422, 106.2343521118164, 120.18276977539062, 174.27001953125, 185.30914306640625, 94.69231414794922, 95.67296600341797, 115.2431411743164, 121.40522766113281, 189.91436767578125, 24.11235809326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000445.npy"}
|
|
{"epoch": 0.672713529856387, "step": 446, "batch_size": 64, "mean": 81.42778778076172, "std": 93.92506408691406, "min": -125.00102233886719, "p10": -17.357362937927242, "median": 71.9971694946289, "p90": 194.85227966308594, "max": 306.2306213378906, "pos_frac": 0.796875, "sample": [9.343534469604492, 6.843589782714844, -125.00102233886719, 111.71868896484375, 28.205299377441406, 116.07386779785156, 75.32135009765625, -33.449195861816406, 306.2306213378906, 32.92413330078125, 27.254215240478516, 119.39474487304688, 149.86854553222656, 171.47430419921875, 121.15886688232422, 192.2268524169922, -3.5356979370117188, 109.9182357788086, 88.3977279663086, 69.78099822998047, 2.262531280517578, 162.73736572265625, 8.585174560546875, 182.170654296875, 195.9774627685547, 103.7303695678711, 1.1577644348144531, 177.39736938476562, 215.8560333251953, -14.677177429199219, 173.36734008789062, 0.10217094421386719, -84.7858657836914, -3.8578109741210938, -31.863731384277344, -2.451387405395508, 20.372093200683594, -38.599178314208984, 74.21334075927734, 146.85140991210938, 47.200531005859375, 215.9900360107422, 210.0974884033203, 179.84332275390625, 274.830322265625, 0.5566654205322266, 263.3255615234375, 185.5452423095703, -18.506013870239258, 37.81692123413086, 69.0169906616211, 38.944580078125, 95.606201171875, 118.22024536132812, 185.8352813720703, 187.85255432128906, 9.593669891357422, 12.678947448730469, -3.8147125244140625, 96.84515380859375, -1.9631576538085938, 57.68380355834961, -68.56503295898438, 154.0482940673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000446.npy"}
|
|
{"epoch": 0.674225245653817, "step": 447, "batch_size": 64, "mean": 94.95809936523438, "std": 107.13021087646484, "min": -188.96493530273438, "p10": -46.55530929565428, "median": 104.28750991821289, "p90": 215.06085357666018, "max": 251.6134033203125, "pos_frac": 0.796875, "sample": [251.6134033203125, 216.1866912841797, 178.0889129638672, 21.24919891357422, 77.56918334960938, -32.99150085449219, 225.17623901367188, 47.350006103515625, -108.41620635986328, -4.5483551025390625, 55.77325439453125, 161.34561157226562, 0.38486480712890625, 124.18539428710938, 204.90692138671875, 182.93157958984375, 107.29337310791016, 55.9551887512207, 94.01568603515625, 96.30986022949219, 20.20867919921875, -25.851661682128906, 52.21070861816406, -52.368370056152344, 60.42645263671875, 232.77407836914062, 210.90037536621094, 222.3035125732422, 177.43905639648438, -86.56451416015625, 96.96815490722656, 68.37391662597656, 185.73486328125, 175.97116088867188, 101.28164672851562, 174.4307861328125, 219.11068725585938, 172.3824920654297, 201.19781494140625, 9.481159210205078, 2.1595001220703125, 138.4086151123047, 201.43255615234375, -133.3277587890625, -14.822956085205078, -139.30072021484375, 43.97582244873047, 156.67611694335938, 239.18179321289062, 145.25051879882812, -12.238113403320312, 172.1019744873047, -77.50651550292969, 212.43389892578125, 100.578369140625, 84.04104614257812, 118.93870544433594, -188.96493530273438, 155.72158813476562, 209.0426483154297, 194.56723022460938, -26.064727783203125, 154.74957275390625, 169.49362182617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000447.npy"}
|
|
{"epoch": 0.6757369614512472, "step": 448, "batch_size": 64, "mean": 74.84162902832031, "std": 100.25890350341797, "min": -182.75672912597656, "p10": -42.803675842285124, "median": 63.74123764038086, "p90": 191.65413818359374, "max": 332.9554748535156, "pos_frac": 0.84375, "sample": [190.50946044921875, -145.3007049560547, 188.57530212402344, 99.42913818359375, 76.5219955444336, 113.32129669189453, 2.755828857421875, 169.60829162597656, 43.095542907714844, 1.9385452270507812, 152.5889129638672, 95.0879135131836, 66.37579345703125, 9.211563110351562, 187.31097412109375, 216.5755157470703, -8.343013763427734, 210.541259765625, 27.604965209960938, 1.9052085876464844, 18.293746948242188, 172.25753784179688, 65.83698272705078, 112.85853576660156, 150.7215576171875, 0.3087882995605469, 189.38853454589844, 75.51091003417969, 26.94116973876953, -82.42140197753906, -73.48371124267578, 61.64549255371094, 40.20447540283203, 25.426498413085938, 9.390846252441406, 118.85739135742188, -72.66291809082031, 21.61595344543457, 241.7081298828125, -9.911859512329102, 2.805532455444336, -182.75672912597656, 102.12446594238281, 46.80921936035156, 148.48477172851562, 182.5542755126953, 131.51438903808594, 160.58316040039062, 60.805641174316406, 45.147560119628906, -56.900169372558594, 54.779266357421875, 92.3062744140625, 197.9607391357422, 1.8665771484375, 332.9554748535156, -7.604827880859375, -138.67098999023438, 181.46075439453125, 192.14471435546875, 170.38525390625, 58.848724365234375, 21.20244598388672, 199.25711059570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000448.npy"}
|
|
{"epoch": 0.6772486772486772, "step": 449, "batch_size": 64, "mean": 36.778724670410156, "std": 95.84552764892578, "min": -170.92691040039062, "p10": -93.61531524658201, "median": 28.099824905395508, "p90": 173.7509735107422, "max": 222.03903198242188, "pos_frac": 0.65625, "sample": [6.969202041625977, 41.978118896484375, -127.17987060546875, 78.6009521484375, 41.72258758544922, -0.9400711059570312, -26.237014770507812, 86.07032012939453, 64.6462173461914, 205.58984375, 165.5185546875, 102.23299407958984, 8.349693298339844, -31.535459518432617, -1.991302490234375, -19.647216796875, 222.03903198242188, 1.4884719848632812, 2.7212066650390625, 69.04885864257812, -65.18267822265625, 56.580039978027344, 179.12136840820312, 152.02261352539062, -170.92691040039062, -35.79561996459961, 145.6893310546875, -141.5687255859375, 146.78030395507812, 51.67414855957031, 49.51686096191406, -34.917449951171875, 15.633453369140625, -105.17292785644531, 168.24447631835938, 20.810073852539062, 213.9894561767578, -11.16738510131836, 93.20713806152344, 46.63676452636719, 21.5500545501709, 26.676071166992188, -83.80917358398438, 42.86363220214844, -97.81794738769531, 80.18284606933594, 2.054931640625, 205.75375366210938, 17.047588348388672, 126.1758041381836, 202.49893188476562, -24.457489013671875, 39.587562561035156, -26.861534118652344, -131.27383422851562, 176.11090087890625, 140.99493408203125, -68.83507537841797, 139.370361328125, -67.3639144897461, 60.33671569824219, -107.4573974609375, 29.523578643798828, -13.632171630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000449.npy"}
|
|
{"epoch": 0.6787603930461074, "step": 450, "batch_size": 64, "mean": 61.2251091003418, "std": 115.10295867919922, "min": -140.54248046875, "p10": -57.73398857116699, "median": 18.9503755569458, "p90": 215.24642333984377, "max": 378.1980895996094, "pos_frac": 0.640625, "sample": [0.9693737030029297, -17.619247436523438, -58.040428161621094, -40.65283966064453, 378.1980895996094, 206.82595825195312, 89.8720703125, 74.79559326171875, 162.4477081298828, 194.5545654296875, 112.67980194091797, 41.17362976074219, 53.52796936035156, 51.06425476074219, 6.134849548339844, 203.11465454101562, -2.0832672119140625, 87.6948471069336, 169.00791931152344, 272.15863037109375, -1.3633384704589844, 36.90817642211914, -28.83698272705078, -46.43462371826172, 75.50434875488281, -28.862041473388672, -29.64221954345703, 222.92291259765625, 97.06038665771484, 8.802057266235352, -11.221244812011719, 218.85519409179688, 48.964599609375, 155.30841064453125, -135.2652130126953, 201.89053344726562, 179.87054443359375, -4.889919281005859, 279.6690979003906, 14.27316665649414, 15.751766204833984, -7.938056945800781, 127.36003875732422, -127.46305084228516, 16.14947509765625, 6.249200820922852, 161.64871215820312, -12.639862060546875, 219.28216552734375, -57.01896286010742, 21.75127601623535, -140.54248046875, 14.91398811340332, 117.24577331542969, 296.30584716796875, -15.162673950195312, -79.04017639160156, 175.95986938476562, -137.46551513671875, 186.6156768798828, -86.03829956054688, 11.601097106933594, -22.24167823791504, -6.215080261230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000450.npy"}
|
|
{"epoch": 0.6802721088435374, "step": 451, "batch_size": 64, "mean": 54.53407669067383, "std": 98.3890380859375, "min": -172.74221801757812, "p10": -43.52080841064453, "median": 37.66128730773926, "p90": 185.08598632812502, "max": 278.9577331542969, "pos_frac": 0.671875, "sample": [113.64317321777344, 0.0410308837890625, 74.48841857910156, 64.58293151855469, 191.22853088378906, 22.386016845703125, -172.74221801757812, 22.83544921875, -0.43495750427246094, 211.638916015625, 109.04420471191406, 109.2905502319336, 177.65280151367188, 55.000213623046875, 156.79327392578125, 78.97663879394531, 72.7559585571289, -13.393341064453125, 32.60224914550781, 107.97235870361328, -4.083600997924805, -146.5084228515625, -136.5450897216797, 278.9577331542969, 175.65277099609375, -45.75433349609375, 130.3670654296875, -152.65252685546875, -56.35282897949219, -0.6996726989746094, 32.23064422607422, 272.1881103515625, -6.956905364990234, -5.384424209594727, 163.05999755859375, 104.0560073852539, 188.27163696289062, 140.14691162109375, -26.327804565429688, 6.856651306152344, 27.823806762695312, 111.93986511230469, 2.706714630126953, -2.8677978515625, 112.77059173583984, 48.164093017578125, 5.493572235107422, 125.84117126464844, 205.5188446044922, 110.4710693359375, -32.996673583984375, 145.80361938476562, 50.2568359375, -6.916465759277344, 5.765159606933594, 14.383193969726562, 172.78103637695312, -1.8681678771972656, -7.812280654907227, 42.7203254699707, 189.31170654296875, -6.809207916259766, -108.8751220703125, -38.30924987792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000451.npy"}
|
|
{"epoch": 0.6817838246409675, "step": 452, "batch_size": 64, "mean": 64.65379333496094, "std": 110.04042053222656, "min": -167.09512329101562, "p10": -76.85338516235349, "median": 50.08852767944336, "p90": 201.52106475830078, "max": 286.34136962890625, "pos_frac": 0.765625, "sample": [26.342323303222656, -20.665199279785156, 19.555044174194336, 85.97689819335938, 23.23664093017578, 130.672119140625, 13.247215270996094, 199.45506286621094, 158.28636169433594, 207.53167724609375, 17.660900115966797, 259.89373779296875, -144.453369140625, 168.9988250732422, 182.56195068359375, 93.25911712646484, 286.34136962890625, 147.04568481445312, 136.4525604248047, 165.12423706054688, 48.962059020996094, -27.320404052734375, 268.2640686035156, 136.43203735351562, 23.764354705810547, -8.788202285766602, 178.011962890625, 62.979766845703125, 0.5680465698242188, 29.828163146972656, 5.091228485107422, 232.3463134765625, -135.67340087890625, 196.40582275390625, -49.09309387207031, 108.79806518554688, 47.53032684326172, 128.41397094726562, -167.09512329101562, 10.436038970947266, 112.34294891357422, -117.22515869140625, 112.97005462646484, 51.214996337890625, 102.26670837402344, -161.51405334472656, 202.406494140625, 143.41925048828125, 21.575775146484375, 10.383094787597656, 102.26819610595703, -12.581329345703125, 5.81614875793457, 158.04893493652344, 9.253856658935547, 9.382736206054688, -49.99372100830078, 214.70716857910156, 168.4432373046875, -88.36466979980469, -17.371307373046875, 95.5406265258789, -158.85055541992188, -22.681961059570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000452.npy"}
|
|
{"epoch": 0.6832955404383976, "step": 453, "batch_size": 64, "mean": 62.802001953125, "std": 106.91879272460938, "min": -165.30218505859375, "p10": -67.88056640624998, "median": 57.20143127441406, "p90": 203.13164978027345, "max": 313.27264404296875, "pos_frac": 0.671875, "sample": [144.02752685546875, 60.723670959472656, -16.384849548339844, 6.723487854003906, 205.9810333251953, 40.82624816894531, -28.129608154296875, 172.27110290527344, 176.54493713378906, -39.68956756591797, -128.64315795898438, -11.5799560546875, 62.947509765625, 7.910087585449219, 164.93319702148438, 170.97140502929688, 313.27264404296875, 56.85075378417969, 201.208251953125, 73.40992736816406, -3.916780471801758, -31.615158081054688, 38.64506530761719, 25.935836791992188, 14.782127380371094, -163.1004638671875, -165.30218505859375, 193.64572143554688, -2.2103652954101562, 183.51251220703125, 203.95596313476562, 49.70320129394531, 102.79972839355469, -51.8966064453125, -11.815631866455078, 64.28116607666016, 169.72109985351562, 182.81179809570312, -114.21150970458984, 163.84449768066406, 74.32050323486328, 251.53196716308594, -74.7308349609375, -17.813190460205078, -31.962234497070312, 28.515762329101562, -93.10734558105469, 29.837047576904297, 145.197509765625, 89.40396118164062, -10.83077621459961, -10.069034576416016, 57.55210876464844, 112.68519592285156, 118.41688537597656, -92.12236022949219, -5.572643280029297, 73.77853393554688, 207.24610900878906, 60.34686279296875, 241.02667236328125, 162.66622924804688, 211.66946411132812, 7.597259521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000453.npy"}
|
|
{"epoch": 0.6848072562358276, "step": 454, "batch_size": 64, "mean": 71.90084838867188, "std": 104.80872344970703, "min": -247.7777099609375, "p10": -29.399674415588372, "median": 64.59491729736328, "p90": 194.54732818603517, "max": 382.8367919921875, "pos_frac": 0.78125, "sample": [174.37933349609375, -31.56749725341797, -23.868316650390625, 213.32022094726562, -247.7777099609375, -24.341421127319336, 156.39341735839844, 74.4502182006836, 8.599720001220703, 54.57097625732422, 65.22712707519531, 187.0354766845703, 28.810409545898438, -204.63363647460938, 197.76669311523438, 117.18931579589844, 181.99533081054688, 63.96270751953125, 222.47283935546875, 44.59245300292969, -19.372589111328125, -124.38200378417969, 6.199647903442383, 172.66189575195312, 207.3780517578125, 382.8367919921875, -3.310087203979492, 35.98564147949219, -52.265296936035156, 32.795616149902344, -40.53236389160156, 123.59027862548828, 173.3326416015625, 25.32189178466797, 140.98797607421875, 52.18507385253906, 220.44775390625, -37.35673904418945, 199.48501586914062, 98.09336853027344, 101.40652465820312, 114.10163879394531, 9.770286560058594, -0.16907215118408203, 15.139572143554688, 30.14214324951172, 180.36599731445312, 107.02748107910156, 1.929025650024414, 27.69903564453125, 144.09144592285156, 153.2578125, 0.9182853698730469, 173.79885864257812, 137.89328002929688, 29.501068115234375, 137.09266662597656, 102.45375061035156, -12.310249328613281, 138.21044921875, 94.8257827758789, 78.90614318847656, 4.794916152954102, -21.85265350341797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000454.npy"}
|
|
{"epoch": 0.6863189720332578, "step": 455, "batch_size": 64, "mean": 68.90592956542969, "std": 99.97894287109375, "min": -138.0931396484375, "p10": -48.57554893493652, "median": 40.403066635131836, "p90": 222.15782012939462, "max": 292.9461975097656, "pos_frac": 0.78125, "sample": [50.95245361328125, 48.76904296875, 66.48543548583984, 245.6375732421875, 90.57103729248047, 2.718576431274414, 277.1236572265625, 144.07766723632812, -58.17511749267578, 38.698448181152344, 138.4276885986328, -138.0931396484375, 74.28765869140625, 136.3732452392578, 40.52740478515625, 292.9461975097656, -28.85277557373047, 9.785629272460938, 5.317779541015625, -82.1869888305664, 241.84637451171875, 7.238283157348633, 159.66058349609375, -5.330341339111328, -2.1153717041015625, 231.577392578125, 31.038009643554688, 132.14622497558594, 194.0665283203125, 53.86455535888672, 19.08730697631836, 20.76133918762207, 202.62384033203125, 22.84082794189453, 43.65045928955078, -8.623237609863281, -75.7647705078125, 135.17758178710938, 169.55404663085938, 167.46188354492188, -44.25809097290039, 35.79808807373047, 86.09829711914062, -5.324066162109375, 163.47276306152344, 230.52952575683594, 90.62340545654297, 62.48780059814453, -1.0183563232421875, 15.339324951171875, 199.26126098632812, 19.44951629638672, 0.5998649597167969, 181.6062469482422, 39.69264221191406, 2.75848388671875, 40.27872848510742, -77.40332794189453, -50.42588806152344, 33.82264709472656, 70.00931549072266, 18.31863784790039, -85.1575927734375, 287.26702880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000455.npy"}
|
|
{"epoch": 0.6878306878306878, "step": 456, "batch_size": 64, "mean": 63.83797073364258, "std": 106.40106201171875, "min": -172.4216766357422, "p10": -34.04703941345214, "median": 45.01461982727051, "p90": 181.69277496337892, "max": 386.0698547363281, "pos_frac": 0.75, "sample": [127.89125061035156, 0.6788558959960938, 5.302270889282227, 131.60433959960938, 386.0698547363281, 1.2890052795410156, -37.24164962768555, 94.96308898925781, -8.455135345458984, 126.32717895507812, 48.68220520019531, 56.672122955322266, 285.5724792480469, 161.70928955078125, 16.245925903320312, 5.9543914794921875, 126.64431762695312, 87.42052459716797, -2.043153762817383, 201.32028198242188, -116.55860900878906, 20.750473022460938, 53.04497528076172, -26.59294891357422, 173.3355255126953, -14.435701370239258, -172.4216766357422, -138.23878479003906, 152.68881225585938, -41.649322509765625, 5.538463592529297, 88.69053649902344, 8.02067756652832, 128.7784881591797, 18.038360595703125, 61.36138916015625, 67.43182373046875, 1.2085113525390625, 113.2308349609375, 3.486583709716797, -12.114019393920898, 41.3470344543457, 95.06513977050781, 116.70069885253906, -112.62173461914062, -116.79058074951172, 124.29499816894531, -3.27838134765625, -13.671623229980469, -8.34188461303711, 155.16368103027344, 184.7807159423828, 16.187164306640625, 267.1806640625, 38.65190124511719, 141.1956329345703, 261.9986572265625, 10.280952453613281, 174.48757934570312, 167.44873046875, 10.015634536743164, -16.29107666015625, 81.18037414550781, 280.444091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000456.npy"}
|
|
{"epoch": 0.6893424036281179, "step": 457, "batch_size": 64, "mean": 69.24058532714844, "std": 92.09001922607422, "min": -139.09681701660156, "p10": -18.208752250671385, "median": 50.04524230957031, "p90": 190.38246002197266, "max": 272.4947814941406, "pos_frac": 0.8125, "sample": [35.00336456298828, 7.7845916748046875, 10.318195343017578, 135.01991271972656, 76.56751251220703, -116.53927612304688, 21.74600601196289, 49.51824951171875, 63.08454132080078, 11.86614990234375, 24.659292221069336, 239.55145263671875, 171.61668395996094, 110.04973602294922, -7.248409271240234, -100.94552612304688, 194.8118896484375, 47.03901672363281, -0.12798118591308594, 189.36376953125, 156.12466430664062, 50.572235107421875, 224.4870147705078, 57.01023864746094, 7.649967193603516, 49.39288330078125, -16.61985206604004, 187.58450317382812, 13.041252136230469, -0.4689979553222656, -94.97149658203125, -97.39295196533203, 17.455078125, 179.83541870117188, 31.237628936767578, -55.18235778808594, -139.09681701660156, 69.88174438476562, 115.8888931274414, 18.61895751953125, 81.2077407836914, 13.259925842285156, 189.52105712890625, 161.50991821289062, -2.1235198974609375, 5.481513977050781, 165.40321350097656, 87.78502655029297, 5.605293273925781, 187.75909423828125, 127.75000762939453, 133.2889404296875, 272.4947814941406, 134.779541015625, 89.16104125976562, 190.7516326904297, 195.12875366210938, 40.31037139892578, 26.00800895690918, 44.1040153503418, 199.12921142578125, -18.88970947265625, 85.31529235839844, 78.46902465820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000457.npy"}
|
|
{"epoch": 0.690854119425548, "step": 458, "batch_size": 64, "mean": 73.50098419189453, "std": 110.0573501586914, "min": -213.00958251953125, "p10": -26.168550491333008, "median": 62.369834899902344, "p90": 210.85065765380864, "max": 308.01605224609375, "pos_frac": 0.8125, "sample": [157.95643615722656, 147.06689453125, 5.414518356323242, 147.01470947265625, 81.03793334960938, 19.44308853149414, 119.87435913085938, -207.39654541015625, 0.11921310424804688, 8.533782958984375, -19.234939575195312, -67.25761413574219, -24.907974243164062, 263.97662353515625, 199.20562744140625, 48.760101318359375, 65.6704330444336, 191.98480224609375, 81.01927947998047, 107.88267517089844, 18.55061912536621, 39.92149353027344, 19.44574737548828, -5.139717102050781, 153.4795379638672, 39.34727478027344, 266.4697265625, 68.06343841552734, 22.08313751220703, 189.17349243164062, 234.82699584960938, 71.08233642578125, 84.60319519042969, -123.61124420166016, 73.39149475097656, 0.7361545562744141, 22.085472106933594, 27.771148681640625, 113.64614868164062, 160.62574768066406, 162.90576171875, 54.9072380065918, -213.00958251953125, -7.956207275390625, 308.01605224609375, 2.3936119079589844, 167.6958465576172, -7.828100204467773, 191.29837036132812, 218.08229064941406, -26.708797454833984, 300.6656494140625, 90.99986267089844, 22.708520889282227, -127.8478775024414, 193.3382110595703, 59.069236755371094, 190.43942260742188, -95.4468994140625, 49.61883544921875, 12.894573211669922, 50.542724609375, 215.8413848876953, 88.72708129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000458.npy"}
|
|
{"epoch": 0.6923658352229781, "step": 459, "batch_size": 64, "mean": 87.84938049316406, "std": 107.6886215209961, "min": -272.2384948730469, "p10": -16.90714302062988, "median": 92.46795272827148, "p90": 213.32340240478516, "max": 296.4844970703125, "pos_frac": 0.765625, "sample": [48.07555389404297, 134.30897521972656, -11.543937683105469, 155.22096252441406, 23.138076782226562, -272.2384948730469, -2.3604564666748047, 205.3050537109375, 117.86367797851562, 174.69671630859375, 218.673583984375, -27.11166763305664, 119.58207702636719, -81.78606414794922, 192.05264282226562, 248.8772430419922, 94.82106018066406, 107.32968139648438, 11.79620361328125, 181.3155059814453, 163.91336059570312, 196.58164978027344, 93.75337219238281, 14.875663757324219, 14.029191970825195, 187.920654296875, 75.9541244506836, 158.5419921875, 51.929874420166016, 148.03565979003906, 264.2410583496094, 58.248558044433594, -5.373262405395508, 17.70245933532715, -16.67914581298828, 120.33052062988281, 58.62090301513672, -113.02452087402344, 177.6981201171875, -17.00485610961914, -5.104799270629883, 149.31411743164062, 190.73155212402344, 2.8099365234375, 296.4844970703125, -108.52726745605469, 99.4580307006836, 30.41364288330078, -58.64971923828125, 91.18253326416016, -14.405075073242188, -1.9433059692382812, -6.081014633178711, 262.6325378417969, 148.49562072753906, 68.45768737792969, 202.22154235839844, 71.91796875, 209.70242309570312, 216.75543212890625, 214.32308959960938, 23.700828552246094, 210.9907989501953, 39.16724395751953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000459.npy"}
|
|
{"epoch": 0.6938775510204082, "step": 460, "batch_size": 64, "mean": 85.93632507324219, "std": 107.63619995117188, "min": -112.58917236328125, "p10": -38.11549453735351, "median": 57.099430084228516, "p90": 212.39881744384772, "max": 372.8768310546875, "pos_frac": 0.765625, "sample": [242.9195098876953, 115.02043151855469, 247.8011474609375, -65.02339172363281, 6.159599304199219, 180.63380432128906, 274.33563232421875, 54.262359619140625, 352.52252197265625, 11.111122131347656, -43.74237060546875, -40.050262451171875, -15.802528381347656, 28.912261962890625, -7.80389404296875, 14.278083801269531, 15.329107284545898, 192.0153350830078, -54.42791748046875, 181.7496337890625, 86.02314758300781, 187.0962371826172, 46.555213928222656, 225.5135498046875, 179.78292846679688, 91.50865936279297, 3.0100669860839844, 43.34716796875, 190.90528869628906, 78.46278381347656, 151.16311645507812, 181.12725830078125, 167.37513732910156, 161.1874237060547, 59.936500549316406, 64.55062866210938, 1.2624015808105469, 16.586929321289062, 52.280181884765625, -2.4396610260009766, 104.27398681640625, 194.69285583496094, -33.601036071777344, 23.880464553833008, 142.98960876464844, 192.36642456054688, -51.164886474609375, 32.22833251953125, 190.72113037109375, -97.28683471679688, 3.6852359771728516, 53.413604736328125, 197.7631072998047, 87.69094848632812, -7.4057464599609375, 218.6712646484375, 53.44036102294922, 372.8768310546875, -112.58917236328125, -28.156539916992188, 126.53555297851562, -13.35101318359375, -14.889137268066406, 187.70428466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000460.npy"}
|
|
{"epoch": 0.6953892668178382, "step": 461, "batch_size": 64, "mean": 86.53425598144531, "std": 103.96370697021484, "min": -123.26210021972656, "p10": -38.09547119140625, "median": 74.42244338989258, "p90": 217.44663543701174, "max": 305.4117126464844, "pos_frac": 0.75, "sample": [223.9708251953125, 73.45221710205078, 41.72816467285156, 22.78282928466797, 174.77859497070312, 218.1847381591797, 56.126670837402344, 172.15057373046875, 146.38320922851562, -120.8165283203125, 10.374160766601562, 87.97550964355469, 70.26252746582031, 5.644105911254883, 137.60653686523438, -25.253463745117188, -74.47981262207031, 195.94607543945312, 0.4483184814453125, 4.363958358764648, 151.0874786376953, 178.11123657226562, 174.72787475585938, -9.326934814453125, 56.48487091064453, 133.5500030517578, -38.605369567871094, -7.930961608886719, -48.766563415527344, 139.15171813964844, 199.09072875976562, -30.49321746826172, -11.011133193969727, 90.87144470214844, 174.7165069580078, 215.72439575195312, 305.4117126464844, 214.85528564453125, 75.39266967773438, 258.1657409667969, 237.17918395996094, 177.64071655273438, 118.96812438964844, 30.54114532470703, 2.880849838256836, -58.91984558105469, 115.26744079589844, -1.0074443817138672, 174.37831115722656, 27.15644073486328, 269.5626220703125, 110.38095092773438, 71.8423843383789, 38.97003173828125, 208.27301025390625, -29.089065551757812, 218.2329559326172, -123.26210021972656, 63.77880859375, -29.8519287109375, 175.39468383789062, -36.90570831298828, 183.47909545898438, -49.534568786621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000461.npy"}
|
|
{"epoch": 0.6969009826152683, "step": 462, "batch_size": 64, "mean": 86.15657806396484, "std": 101.32343292236328, "min": -194.94418334960938, "p10": -32.79890213012694, "median": 87.53099822998047, "p90": 200.39992980957032, "max": 274.7103576660156, "pos_frac": 0.8125, "sample": [178.87225341796875, 210.99935913085938, 205.20761108398438, 194.877685546875, 12.990242004394531, 1.40118408203125, 151.468994140625, 3.6252784729003906, -3.2616958618164062, 4.833366394042969, -57.72663116455078, 185.0839080810547, 171.14852905273438, 53.42931365966797, 192.34439086914062, 212.1683807373047, -37.6259765625, 195.9784393310547, 13.905937194824219, 109.4798355102539, 170.12884521484375, 68.59193420410156, 63.59266662597656, 155.7149658203125, 57.163360595703125, 13.387107849121094, 178.24783325195312, 219.64404296875, -18.088245391845703, 156.751708984375, -47.87542724609375, 15.435379028320312, -21.535728454589844, 184.4234161376953, 200.28164672851562, -68.79779052734375, -47.71034240722656, 110.71849060058594, 42.973594665527344, 153.5052032470703, 200.45062255859375, 206.16087341308594, 84.4837417602539, 46.73411560058594, 53.08552551269531, 194.11367797851562, -7.3132171630859375, 35.5440559387207, 145.44583129882812, 171.50967407226562, 102.60858154296875, -179.32386779785156, 187.72918701171875, 183.36962890625, 163.6277618408203, 11.427787780761719, 28.568025588989258, 274.7103576660156, 90.57825469970703, 102.9544906616211, -194.94418334960938, 17.22382354736328, -6.009561538696289, 15.532459259033203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000462.npy"}
|
|
{"epoch": 0.6984126984126984, "step": 463, "batch_size": 64, "mean": 52.036827087402344, "std": 100.2201156616211, "min": -219.10543823242188, "p10": -61.291362762451165, "median": 45.31126403808594, "p90": 189.49871673583985, "max": 244.7469024658203, "pos_frac": 0.703125, "sample": [-6.486980438232422, 209.99786376953125, 93.12823486328125, -219.10543823242188, 103.670654296875, 160.41429138183594, -28.097360610961914, 190.45437622070312, 196.3935546875, 149.15469360351562, -42.347171783447266, -65.22047424316406, -18.562416076660156, 68.77374267578125, 142.9246826171875, -157.88592529296875, 41.25678253173828, 48.49314880371094, 11.451522827148438, -83.00762176513672, 62.89286804199219, 42.12937927246094, -9.00640869140625, 213.5533447265625, 84.88385772705078, -83.78903198242188, 33.769012451171875, 17.96033477783203, 3.7622833251953125, 15.472332000732422, 40.88371276855469, -11.539619445800781, 181.70361328125, 184.02877807617188, 70.05291748046875, 134.9075927734375, 113.75031280517578, 40.92774200439453, 7.626075744628906, 244.7469024658203, 73.83660888671875, 14.999778747558594, 101.148681640625, -135.2248077392578, -43.997257232666016, -51.501129150390625, 220.48818969726562, 124.3173828125, 187.2688446044922, 119.38768005371094, 15.211498260498047, -152.185302734375, -9.66448974609375, -52.123435974121094, 68.35307312011719, 196.617919921875, 62.36137771606445, -25.869140625, 92.35301971435547, 95.96564483642578, -10.892570495605469, 66.05073547363281, 13.27125358581543, 176.0674285888672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000463.npy"}
|
|
{"epoch": 0.6999244142101285, "step": 464, "batch_size": 64, "mean": 81.547119140625, "std": 116.87724304199219, "min": -188.785400390625, "p10": -73.69131126403805, "median": 83.86689758300781, "p90": 229.49688262939455, "max": 264.75390625, "pos_frac": 0.734375, "sample": [-38.55720520019531, 38.13543701171875, 185.7991943359375, -26.39031982421875, 187.4221649169922, -4.4626007080078125, 9.768562316894531, 180.22271728515625, 99.35539245605469, 160.36279296875, 197.269775390625, -38.41509246826172, 184.25241088867188, 132.46324157714844, 96.12135314941406, -120.08517456054688, 240.40496826171875, 156.97776794433594, 41.38343811035156, 176.24020385742188, 4.001190185546875, 264.75390625, -8.24717903137207, 123.22615051269531, 22.722702026367188, 240.572509765625, 66.47806549072266, 231.3076171875, 134.3946990966797, 191.94850158691406, 113.78419494628906, 11.556663513183594, 184.9074249267578, 257.88970947265625, -186.78958129882812, -88.62837219238281, 194.79513549804688, 64.0769271850586, -150.05661010742188, -89.47760009765625, 155.08074951171875, 9.007698059082031, 40.31399154663086, 259.34234619140625, -95.84398651123047, -0.39890289306640625, 21.09039306640625, 71.61244201660156, -7.41815185546875, 250.8802032470703, 140.30160522460938, 225.27183532714844, -188.785400390625, -38.83816909790039, 64.15262603759766, -13.418903350830078, 6.571296691894531, 190.99417114257812, 147.26068115234375, 219.67605590820312, -23.814002990722656, 163.7488250732422, 157.90548706054688, 22.83777618408203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000464.npy"}
|
|
{"epoch": 0.7014361300075586, "step": 465, "batch_size": 64, "mean": 69.41270446777344, "std": 109.1218490600586, "min": -154.61080932617188, "p10": -69.42794570922851, "median": 86.06663131713867, "p90": 203.09149627685548, "max": 326.7765197753906, "pos_frac": 0.671875, "sample": [-34.03125762939453, 326.7765197753906, 190.09341430664062, -154.61080932617188, 166.424560546875, 160.79660034179688, -42.88560485839844, 196.74423217773438, 87.19920349121094, 69.92803192138672, 99.31712341308594, 189.80532836914062, 146.0079803466797, 234.6397705078125, -44.14557647705078, 145.32443237304688, 27.458168029785156, 192.3056640625, 186.40155029296875, -6.794136047363281, -70.55303955078125, -130.89356994628906, 205.81175231933594, 232.03575134277344, 62.85231018066406, 205.81910705566406, -66.80272674560547, -115.4691162109375, -28.424049377441406, 89.4275894165039, -3.0962905883789062, -38.472999572753906, 102.39834594726562, 105.10391998291016, 155.6593780517578, -9.144989013671875, -72.11032104492188, -6.119316101074219, 6.908727645874023, 180.7427215576172, -61.124813079833984, 10.034721374511719, 86.16136169433594, 51.304237365722656, -9.642829895019531, -148.78680419921875, -73.81362915039062, 100.7608642578125, 43.4925537109375, 206.24325561523438, 19.119361877441406, 233.1400146484375, 19.775978088378906, 101.42449951171875, 17.927162170410156, -28.32208824157715, 85.9719009399414, -22.509618759155273, 117.28486633300781, 97.68982696533203, 177.75857543945312, 145.67672729492188, 154.70147705078125, 175.7173614501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000465.npy"}
|
|
{"epoch": 0.7029478458049887, "step": 466, "batch_size": 64, "mean": 38.943687438964844, "std": 106.6772232055664, "min": -342.9578857421875, "p10": -66.94647521972654, "median": 22.203895568847656, "p90": 180.16890258789064, "max": 261.9873352050781, "pos_frac": 0.65625, "sample": [5.063499450683594, 172.33786010742188, -40.661163330078125, -117.842529296875, 18.18290138244629, -0.8231220245361328, 31.31114959716797, -12.4605712890625, 34.64696502685547, -342.9578857421875, 97.01646423339844, 61.12701416015625, 196.04302978515625, -128.15231323242188, 99.58612823486328, -18.817626953125, 97.93061828613281, 257.7768249511719, 54.16325378417969, 221.92337036132812, 58.622554779052734, 6.66485595703125, 178.26470947265625, -0.18683242797851562, -10.088043212890625, -0.5606460571289062, 133.3789825439453, -76.81845092773438, 261.9873352050781, 52.871246337890625, -7.777917861938477, 142.7716827392578, -23.238479614257812, 31.825729370117188, 166.20681762695312, 51.085086822509766, 134.6414031982422, 53.642005920410156, -29.357349395751953, 21.360092163085938, -33.45752716064453, -219.89093017578125, -80.99633026123047, -84.58981323242188, 144.4107666015625, 23.047698974609375, 18.290084838867188, 124.53211212158203, 238.306640625, -2.493541717529297, -43.911865234375, 95.4239501953125, -39.27495574951172, 33.29576110839844, -5.115364074707031, 1.4130496978759766, 11.751066207885742, 12.167036056518555, 25.53132438659668, 180.9849853515625, 203.97340393066406, 5.758327484130859, 8.196144104003906, 44.355308532714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000466.npy"}
|
|
{"epoch": 0.7044595616024187, "step": 467, "batch_size": 64, "mean": 63.57453918457031, "std": 102.59225463867188, "min": -184.6373748779297, "p10": -53.40816612243652, "median": 40.54966354370117, "p90": 195.74163513183595, "max": 433.3954162597656, "pos_frac": 0.78125, "sample": [0.3327789306640625, 105.26753234863281, 49.117523193359375, 75.70950317382812, 35.18511962890625, 84.20085906982422, 1.6330795288085938, -28.531871795654297, -55.43703842163086, 70.53099822998047, 152.80230712890625, -49.856204986572266, 190.10162353515625, -58.516082763671875, 124.06358337402344, 114.6640853881836, 82.61675262451172, 45.914207458496094, -142.40464782714844, 13.867050170898438, 166.40109252929688, -70.79913330078125, -40.17828369140625, 146.02398681640625, 225.02711486816406, 30.01892852783203, 25.92993927001953, 104.04013061523438, -184.6373748779297, 210.23519897460938, -5.43011474609375, 50.69068908691406, 8.392278671264648, 11.601409912109375, -3.1192169189453125, 18.988784790039062, 120.01889038085938, 250.76516723632812, 106.21409606933594, 110.8939208984375, 0.7236785888671875, -0.43791770935058594, 18.953041076660156, 13.068109512329102, 5.92738151550293, -54.93043518066406, 166.23526000976562, 5.9447479248046875, 166.5304718017578, 141.1359405517578, 433.3954162597656, 17.729209899902344, -6.236000061035156, 9.633169174194336, 94.20207977294922, 103.79375457763672, 53.155723571777344, 198.15878295898438, 10.124244689941406, 246.0895233154297, -58.039642333984375, 227.75486755371094, 161.94366455078125, 21.576942443847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000467.npy"}
|
|
{"epoch": 0.7059712773998488, "step": 468, "batch_size": 64, "mean": 85.18936157226562, "std": 111.97914123535156, "min": -204.941162109375, "p10": -46.77696762084959, "median": 71.3425178527832, "p90": 224.92953491210938, "max": 263.4761962890625, "pos_frac": 0.828125, "sample": [2.8041324615478516, 37.743316650390625, 140.0083770751953, 253.79881286621094, 27.853187561035156, -54.007530212402344, 4.779218673706055, 149.35418701171875, 220.89547729492188, 52.391326904296875, 108.99092102050781, 241.0732421875, 238.77549743652344, -181.6841583251953, 28.286727905273438, 14.2864990234375, 62.720603942871094, -13.22979736328125, -204.941162109375, 138.56365966796875, 182.20062255859375, 157.99609375, 190.76559448242188, 263.4761962890625, 46.82673645019531, 163.19448852539062, 188.49627685546875, 0.8370380401611328, -18.944046020507812, -140.26150512695312, 224.31900024414062, -155.09725952148438, 133.0421142578125, -1.3416404724121094, 18.834449768066406, -56.086936950683594, 152.23849487304688, 173.69732666015625, -66.40019226074219, 180.8490753173828, 62.32147979736328, 54.322906494140625, 198.581787109375, 227.49295043945312, 163.33328247070312, 1.5002403259277344, 25.904714584350586, 59.43382263183594, 8.825706481933594, 0.17310142517089844, 193.3218994140625, 31.60089874267578, 123.22933959960938, 47.61260986328125, 261.4487609863281, 133.10427856445312, -29.905654907226562, 225.19119262695312, 181.13531494140625, 191.79200744628906, 187.8082275390625, 94.93135833740234, 79.96443176269531, 21.89008331298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000468.npy"}
|
|
{"epoch": 0.7074829931972789, "step": 469, "batch_size": 64, "mean": 77.55298614501953, "std": 92.8604736328125, "min": -140.1810302734375, "p10": -19.293867683410646, "median": 66.62998962402344, "p90": 189.24817962646486, "max": 352.23028564453125, "pos_frac": 0.78125, "sample": [20.008453369140625, 187.19639587402344, 93.52006530761719, 154.9876708984375, 24.053016662597656, 4.701816558837891, 105.25172424316406, 175.27194213867188, 74.76300048828125, -140.1810302734375, 176.34181213378906, 92.12860107421875, 187.7821807861328, 6.435737609863281, -19.204200744628906, 189.87646484375, 129.93997192382812, 54.25969696044922, 132.84776306152344, 180.36663818359375, 202.72186279296875, 9.630546569824219, 134.03302001953125, -9.21258544921875, 93.30303955078125, -82.02140045166016, 45.23267364501953, 9.976531982421875, 5.743894577026367, 58.496978759765625, -6.8148040771484375, 0.9302654266357422, 110.61068725585938, 206.2732696533203, 181.86537170410156, -12.400634765625, -1.0977249145507812, -26.197792053222656, 177.3046112060547, 165.4864959716797, 202.93545532226562, 80.40250396728516, 36.79924392700195, 148.6434326171875, -7.555351257324219, -4.21234130859375, 107.74703979492188, 0.41301727294921875, 168.947021484375, 24.644912719726562, -55.47607421875, -24.018871307373047, 241.22430419921875, 9.796348571777344, 352.23028564453125, 1.5595169067382812, 117.73231506347656, 40.132957458496094, 136.81593322753906, -19.33229637145996, 146.6601104736328, 12.124174118041992, -39.05487060546875, 190.02017211914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000469.npy"}
|
|
{"epoch": 0.708994708994709, "step": 470, "batch_size": 64, "mean": 76.54893493652344, "std": 105.7480697631836, "min": -209.9139862060547, "p10": -47.5119342803955, "median": 80.1877212524414, "p90": 208.6586929321289, "max": 282.489013671875, "pos_frac": 0.78125, "sample": [215.95285034179688, 97.18157196044922, 246.02850341796875, -209.9139862060547, 66.78038024902344, 209.86715698242188, 38.234161376953125, 34.950477600097656, 188.07040405273438, 106.8155517578125, 203.84527587890625, -74.29127502441406, 206.02316284179688, 10.275466918945312, 192.03952026367188, 46.59777069091797, 99.24056243896484, -94.72183990478516, -27.092132568359375, 164.64772033691406, -32.80116653442383, 207.52560424804688, 9.494424819946289, -128.41207885742188, 209.14430236816406, 40.46867370605469, 187.88186645507812, 158.9126434326172, 2.2105865478515625, 5.111814498901367, 123.2626953125, 4.4212799072265625, 135.0836181640625, 45.40068817138672, -71.33840942382812, 13.912742614746094, 105.11061096191406, 1.7366695404052734, 111.4129638671875, -19.937049865722656, 282.489013671875, 200.5510711669922, 213.99658203125, 12.543342590332031, -38.54920196533203, -26.436294555664062, 22.508066177368164, 202.93899536132812, 128.65548706054688, 127.1865463256836, 114.59564208984375, 214.31362915039062, -51.29338836669922, 158.8379364013672, 22.222763061523438, 172.950439453125, 98.90950012207031, 93.59506225585938, -7.3171234130859375, -52.853538513183594, 206.17840576171875, 5.943761825561523, -38.688541412353516, 6.7199554443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000470.npy"}
|
|
{"epoch": 0.7105064247921391, "step": 471, "batch_size": 64, "mean": 83.86780548095703, "std": 103.94709777832031, "min": -196.94203186035156, "p10": -19.83138427734374, "median": 77.2806396484375, "p90": 200.73172302246095, "max": 296.4338073730469, "pos_frac": 0.84375, "sample": [4.0722808837890625, 243.70062255859375, 147.83688354492188, 173.2147216796875, 83.83535766601562, -54.1378173828125, 182.65042114257812, 64.3194580078125, 196.90618896484375, 242.66973876953125, 69.49817657470703, 64.70442199707031, 186.91256713867188, 28.081382751464844, -142.72543334960938, -0.7327632904052734, 76.84764862060547, 202.09030151367188, 186.07521057128906, -94.83615112304688, 46.83237075805664, 28.80386734008789, 151.68215942382812, 89.06233215332031, 185.21627807617188, -196.94203186035156, 77.71363067626953, 124.034423828125, 188.45477294921875, 23.338058471679688, 156.77328491210938, 129.3096466064453, 20.013011932373047, 53.262237548828125, 109.86947631835938, 88.33551788330078, 194.6524658203125, -65.03962707519531, 53.067665100097656, 186.1771240234375, 21.129859924316406, 31.32928466796875, 191.27374267578125, 224.1553955078125, 11.26249885559082, 8.821823120117188, 17.584701538085938, 64.66230773925781, -11.563285827636719, 161.18687438964844, -23.374855041503906, 204.27130126953125, 97.44782257080078, 24.964523315429688, 197.56170654296875, 5.187801361083984, 9.0103759765625, 132.8896942138672, -181.70701599121094, 296.4338073730469, 34.18639373779297, 107.40843200683594, -5.8778839111328125, 243.6941680908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000471.npy"}
|
|
{"epoch": 0.7120181405895691, "step": 472, "batch_size": 64, "mean": 65.74198913574219, "std": 123.31773376464844, "min": -227.4779510498047, "p10": -86.03196029663084, "median": 57.706275939941406, "p90": 202.82987365722656, "max": 432.26483154296875, "pos_frac": 0.703125, "sample": [-5.371253967285156, 43.262367248535156, 108.98999786376953, -46.84687805175781, 198.27029418945312, 173.381103515625, -43.960296630859375, 133.14906311035156, 229.3924560546875, 136.66055297851562, 91.62464904785156, 287.3359375, 8.101804733276367, -63.573699951171875, -6.570409774780273, 193.54397583007812, -10.990564346313477, 86.17765045166016, 190.06686401367188, 53.913604736328125, -119.9366455078125, 28.58782958984375, -31.694461822509766, 202.66305541992188, 17.85076904296875, 202.9013671875, 49.26399230957031, 210.640625, 192.58779907226562, -7.324733734130859, 24.621070861816406, 87.53138732910156, 100.07171630859375, -150.61749267578125, -39.263511657714844, 78.77780151367188, 240.6553955078125, 432.26483154296875, -9.016654968261719, -5.722297668457031, 193.3481903076172, 196.7881317138672, -227.4779510498047, 208.78582763671875, -172.82142639160156, -95.65692901611328, 188.9063720703125, 13.056734085083008, -163.9365234375, 111.05709075927734, 10.193572998046875, 61.49894714355469, -21.151168823242188, 25.383399963378906, 12.036788940429688, 67.01520538330078, 92.92103576660156, 23.602615356445312, 192.8101043701172, 44.484954833984375, 103.45640563964844, 83.72492218017578, -169.66244506835938, 167.72450256347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000472.npy"}
|
|
{"epoch": 0.7135298563869993, "step": 473, "batch_size": 64, "mean": 78.22579193115234, "std": 131.8306884765625, "min": -195.7452392578125, "p10": -88.56231384277343, "median": 51.41722106933594, "p90": 257.0948913574219, "max": 370.4033203125, "pos_frac": 0.734375, "sample": [268.1123352050781, 212.66802978515625, 43.83518981933594, -159.99850463867188, -129.1896514892578, 174.84507751464844, 49.89613342285156, 9.898025512695312, 19.84848976135254, 222.82080078125, 181.68966674804688, 52.93830871582031, -5.137697219848633, 128.3251190185547, 3.3710479736328125, -51.322349548339844, 292.11309814453125, 252.89651489257812, 17.92676544189453, 213.47998046875, -117.99143981933594, 25.60686492919922, 177.71820068359375, -85.58938598632812, -67.41683959960938, 179.86444091796875, 171.3629913330078, 246.25387573242188, 11.587594985961914, -33.016502380371094, -28.73590087890625, 7.876678466796875, -171.82540893554688, -24.379493713378906, -25.130826950073242, 90.83128356933594, 370.4033203125, 261.0694274902344, 30.824031829833984, 18.136011123657227, 286.5697937011719, 187.81033325195312, 342.73394775390625, 96.11561584472656, 116.05613708496094, 81.32472229003906, 77.27957916259766, 7.44873046875, 42.55328369140625, 185.9678192138672, 155.8897705078125, -5.837699890136719, 116.62252044677734, 124.7658920288086, -19.23889923095703, 147.52415466308594, 258.8941955566406, -140.2337188720703, 11.055252075195312, -89.83642578125, 31.646072387695312, 135.2823944091797, 215.33737182617188, -195.7452392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000473.npy"}
|
|
{"epoch": 0.7150415721844293, "step": 474, "batch_size": 64, "mean": 69.45356750488281, "std": 119.30158233642578, "min": -193.39566040039062, "p10": -51.15876884460449, "median": 39.43556022644043, "p90": 225.75611877441406, "max": 317.9157409667969, "pos_frac": 0.6875, "sample": [211.9595947265625, 182.1370391845703, 150.25074768066406, -67.49671936035156, -8.674625396728516, 16.25460433959961, 262.1955261230469, 223.20706176757812, 83.04820251464844, 56.505950927734375, 139.77992248535156, 75.58616638183594, -193.39566040039062, 147.38328552246094, 91.73147583007812, 205.5970916748047, -106.80474090576172, 0.034572601318359375, -48.56488037109375, 34.6748046875, -46.985496520996094, -10.728385925292969, 180.6424560546875, 42.16221618652344, 153.0492706298828, 38.82250213623047, -0.16012001037597656, -36.44596862792969, -16.540786743164062, -1.9021072387695312, 179.40878295898438, 40.04861831665039, -167.6435089111328, 2.3527755737304688, -27.98807144165039, 197.73768615722656, 34.82640075683594, 234.16787719726562, 226.84857177734375, -0.5054550170898438, 25.081830978393555, 57.699066162109375, 263.3985900878906, 175.5958251953125, -52.27043533325195, 307.14678955078125, 276.5121765136719, 75.15608215332031, 132.35443115234375, 0.9511013031005859, 186.03662109375, 317.9157409667969, -36.56144714355469, 29.470298767089844, 33.62134552001953, -28.98507308959961, 195.39697265625, -134.85012817382812, -9.228435516357422, 114.89225769042969, 168.061767578125, -154.08338928222656, 4.4820098876953125, 20.657543182373047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000474.npy"}
|
|
{"epoch": 0.7165532879818595, "step": 475, "batch_size": 64, "mean": 98.86182403564453, "std": 112.10923767089844, "min": -103.04702758789062, "p10": -27.80060939788818, "median": 62.04240798950195, "p90": 224.4021133422852, "max": 349.1754455566406, "pos_frac": 0.78125, "sample": [-3.784975051879883, 10.502574920654297, -28.748390197753906, -14.934860229492188, 181.87973022460938, 200.64251708984375, 188.42469787597656, 215.44683837890625, -12.942567825317383, -49.31684112548828, 186.26434326171875, -103.04702758789062, 228.43563842773438, 349.1754455566406, 18.78019142150879, -21.072341918945312, 171.7064971923828, 210.16944885253906, 6.016315460205078, 143.78305053710938, 61.330299377441406, 29.440475463867188, -25.589120864868164, 17.25104522705078, 174.60879516601562, 9.273990631103516, 26.186180114746094, 227.8689727783203, 195.62014770507812, -16.04460906982422, 212.33633422851562, 196.889404296875, 209.15023803710938, 206.61129760742188, 62.406776428222656, 2.560565948486328, 177.7679901123047, 15.805183410644531, 121.75746154785156, 27.733837127685547, 290.0992431640625, 182.50405883789062, 17.576274871826172, 33.19053649902344, 175.54969787597656, -50.41117477416992, 40.03788375854492, 104.221435546875, 337.32061767578125, 42.729835510253906, 188.01600646972656, 262.5152587890625, 282.3453369140625, 108.96097564697266, 179.9849853515625, -73.82421112060547, -45.728126525878906, 216.31277465820312, 61.67803955078125, 188.99391174316406, 0.5082130432128906, -8.206413269042969, -57.39082336425781, 39.82670211791992], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000475.npy"}
|
|
{"epoch": 0.7180650037792895, "step": 476, "batch_size": 64, "mean": 62.84834671020508, "std": 113.40619659423828, "min": -154.80300903320312, "p10": -60.64745483398437, "median": 57.04940414428711, "p90": 187.35180969238283, "max": 338.3601379394531, "pos_frac": 0.65625, "sample": [224.2308349609375, 75.06098937988281, -10.744237899780273, 90.07720947265625, -58.47023010253906, 43.996734619140625, -61.58055114746094, 188.89266967773438, 338.3601379394531, 71.73025512695312, 294.6523742675781, -2.4491844177246094, 174.36936950683594, 173.54495239257812, 156.5487060546875, 116.00737762451172, 182.66238403320312, -7.816459655761719, 45.44579315185547, -9.0439453125, -124.09815216064453, 0.8617134094238281, 148.1663360595703, 177.65582275390625, 44.78565979003906, -116.93173217773438, 50.02021026611328, -44.08747100830078, -55.079010009765625, 1.7200584411621094, 214.0039825439453, -40.33390808105469, 123.08949279785156, 9.598819732666016, 78.04463958740234, -86.71235656738281, -141.2742462158203, -154.80300903320312, 64.07859802246094, 7.9028778076171875, -41.94639587402344, 173.1154022216797, 17.355865478515625, -54.609130859375, 131.84605407714844, 75.88713073730469, 169.33615112304688, -29.62024688720703, -37.578857421875, 147.56466674804688, 139.823486328125, -139.46536254882812, 216.39981079101562, -31.189727783203125, 109.262939453125, 261.23760986328125, 6.460626602172852, 96.0985107421875, 183.7564697265625, 153.1649169921875, -22.596803665161133, 182.6159210205078, 173.4989013671875, -40.207275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000476.npy"}
|
|
{"epoch": 0.7195767195767195, "step": 477, "batch_size": 64, "mean": 68.399658203125, "std": 135.89877319335938, "min": -242.90628051757812, "p10": -131.35957031249998, "median": 50.91370964050293, "p90": 224.80951995849608, "max": 338.15411376953125, "pos_frac": 0.75, "sample": [-179.7115020751953, -102.1343994140625, 144.58233642578125, 26.104209899902344, -7.72235107421875, 190.76821899414062, 20.80518341064453, 28.41941261291504, 67.4503173828125, 5.046295166015625, 224.57559204101562, 56.1553955078125, -158.1830291748047, -5.999931335449219, 25.144983291625977, 9.735294342041016, -21.527084350585938, 288.1407470703125, 169.8932647705078, -83.49908447265625, 291.34588623046875, 160.7695770263672, 1.6920166015625, 56.68755340576172, 90.51651000976562, -44.486427307128906, -143.8846435546875, 0.8778610229492188, 207.25604248046875, 7.194976806640625, -191.6872100830078, 37.60137176513672, 60.33136749267578, 45.67202377319336, -209.24127197265625, -38.40128707885742, -54.01393127441406, 43.69519805908203, 166.4623565673828, 38.297637939453125, 221.25514221191406, -0.08025360107421875, 224.90977478027344, 207.90306091308594, 85.07666015625, 223.58975219726562, -196.85739135742188, 225.11386108398438, 142.31561279296875, 34.87939453125, 338.15411376953125, 152.1417236328125, 9.96428108215332, 205.70518493652344, 237.0796661376953, 103.43946838378906, -242.90628051757812, 304.9652099609375, 205.48353576660156, 206.94375610351562, 89.96446990966797, 181.12753295898438, 22.413162231445312, 170.26718139648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000477.npy"}
|
|
{"epoch": 0.7210884353741497, "step": 478, "batch_size": 64, "mean": 54.164920806884766, "std": 111.2083740234375, "min": -173.37506103515625, "p10": -102.54683303833008, "median": 45.05862045288086, "p90": 191.33315734863282, "max": 254.0304412841797, "pos_frac": 0.6875, "sample": [-9.008138656616211, 176.39739990234375, 121.553955078125, 167.4271240234375, 151.31838989257812, -4.847312927246094, 199.49774169921875, 117.15017700195312, -31.63334083557129, 123.92959594726562, 3.8278732299804688, 17.60113525390625, 96.29592895507812, 233.7044677734375, -31.378700256347656, 18.95233154296875, -135.2106475830078, 45.747314453125, -22.059646606445312, 93.63119506835938, -49.67143249511719, -32.815452575683594, 192.02096557617188, 157.19183349609375, 185.00411987304688, -101.38074493408203, 168.00860595703125, 150.81573486328125, 91.08749389648438, -168.97854614257812, 43.6279296875, 9.104358673095703, -173.37506103515625, -169.12493896484375, 19.156665802001953, 105.16566467285156, 189.37850952148438, 37.9337158203125, 172.43408203125, 47.665382385253906, 198.85263061523438, -3.941751480102539, -130.59552001953125, 44.36992645263672, 189.728271484375, 48.58341979980469, -7.14990234375, 254.0304412841797, 43.74192810058594, -2.6899986267089844, 225.08839416503906, 99.21997833251953, -65.9632568359375, 159.93234252929688, 3.5631256103515625, 221.95431518554688, 131.7972869873047, -55.28046417236328, 6.2557373046875, 45.75218200683594, -103.04658508300781, -170.69757080078125, 113.0315170288086, 13.87278938293457], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000478.npy"}
|
|
{"epoch": 0.7226001511715797, "step": 479, "batch_size": 64, "mean": 49.07115173339844, "std": 109.13861083984375, "min": -245.66131591796875, "p10": -89.31968765258787, "median": 38.48871421813965, "p90": 183.69400787353518, "max": 360.90771484375, "pos_frac": 0.65625, "sample": [8.146968841552734, 31.739795684814453, 148.91650390625, -153.07118225097656, -102.19143676757812, 84.60429382324219, 235.23199462890625, 111.32521057128906, 118.44017028808594, -99.87787628173828, 46.05979537963867, -58.4849967956543, 16.288665771484375, -20.346593856811523, -14.364707946777344, 51.24530029296875, 252.09739685058594, 220.53497314453125, -24.264135360717773, -2.4836463928222656, -44.16640090942383, 37.59366989135742, 360.90771484375, 205.50619506835938, 25.0386905670166, -138.6965789794922, 45.97088623046875, 67.18368530273438, -115.91307830810547, -22.07262420654297, -2.1717987060546875, 121.7706069946289, -4.9231719970703125, 152.82579040527344, -16.579193115234375, 59.25692367553711, -156.61276245117188, -16.069137573242188, 24.342559814453125, 170.67263793945312, 153.0548095703125, -245.66131591796875, -39.55854034423828, 76.12092590332031, 187.12789916992188, 175.6815948486328, 90.0277328491211, 21.2471923828125, -1.0002670288085938, 87.14427947998047, 39.383758544921875, 98.73673248291016, -64.68391418457031, 212.27102661132812, -10.584169387817383, 123.61471557617188, 113.67594909667969, 16.366945266723633, 57.242576599121094, 144.56134033203125, 4.209714889526367, 25.216644287109375, 109.66569519042969, 163.28109741210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000479.npy"}
|
|
{"epoch": 0.7241118669690099, "step": 480, "batch_size": 64, "mean": 75.7420425415039, "std": 114.6369857788086, "min": -215.22280883789062, "p10": -52.412998962402334, "median": 77.86122512817383, "p90": 197.8585952758789, "max": 397.1616516113281, "pos_frac": 0.765625, "sample": [86.32950592041016, -3.4171142578125, 188.1361541748047, -164.42593383789062, 133.34368896484375, 30.29498863220215, 169.12594604492188, -215.22280883789062, 16.681732177734375, 397.1616516113281, -37.665687561035156, 116.20863342285156, 199.03085327148438, -15.453727722167969, 124.01007080078125, -31.979446411132812, -92.01437377929688, 194.83143615722656, -44.91986083984375, 3.9735145568847656, -134.00543212890625, 178.65713500976562, 8.611763000488281, -120.25145721435547, 198.46640014648438, 161.86981201171875, 329.506591796875, 69.3929443359375, 178.6797332763672, 53.35795593261719, 115.58392333984375, 2.354259490966797, 39.96143341064453, 154.08456420898438, 46.20885467529297, 99.3382568359375, 225.3252410888672, 122.2928466796875, -123.4179458618164, 123.57450866699219, 173.49777221679688, 106.4889907836914, 53.637962341308594, 181.5125274658203, 87.50733947753906, 170.51815795898438, 0.13677978515625, 23.9786376953125, 53.07514572143555, 40.22285842895508, -10.503271102905273, -55.62434387207031, 204.88046264648438, 98.63896179199219, 171.4212646484375, 124.27298736572266, 196.4403839111328, 173.62051391601562, -6.957294464111328, -37.379127502441406, 45.14527893066406, 221.07064819335938, 2.3369293212890625, 45.930572509765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000480.npy"}
|
|
{"epoch": 0.7256235827664399, "step": 481, "batch_size": 64, "mean": 68.3882827758789, "std": 110.935302734375, "min": -191.98281860351562, "p10": -46.87031173706054, "median": 50.692726135253906, "p90": 191.85083618164063, "max": 395.046630859375, "pos_frac": 0.765625, "sample": [37.33740997314453, 4.518791198730469, 188.75552368164062, 88.93245697021484, -105.60090637207031, 123.69535827636719, 177.12249755859375, 67.12239074707031, -42.26604461669922, -88.83407592773438, 126.8541259765625, 84.16709899902344, 170.1698455810547, 80.1288070678711, 15.916908264160156, 107.91183471679688, 181.30679321289062, 45.54747009277344, 143.87823486328125, 151.16317749023438, -188.09156799316406, -4.206459045410156, 236.55996704101562, 55.837982177734375, -4.172344207763672, -43.175621032714844, 14.035598754882812, 182.8524627685547, 1.9492607116699219, 101.27780151367188, 1.1117572784423828, 193.17739868164062, 10.368881225585938, 8.75390625, 44.46562194824219, -48.45375061035156, 153.23785400390625, 14.132984161376953, 5.806247711181641, 11.673873901367188, 88.79150390625, 33.45153045654297, 178.23208618164062, 199.07859802246094, -5.8676300048828125, 217.53048706054688, -32.33636474609375, -22.149085998535156, 196.56825256347656, 185.2836151123047, 185.01925659179688, 19.53093719482422, 75.45172882080078, -0.34876251220703125, -63.845458984375, 395.046630859375, -191.98281860351562, 161.76727294921875, 116.983642578125, 335.5367126464844, 94.08202362060547, 3.9900894165039062, -117.6912841796875, 19.757335662841797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000481.npy"}
|
|
{"epoch": 0.72713529856387, "step": 482, "batch_size": 64, "mean": 70.85865783691406, "std": 132.8113555908203, "min": -284.11572265625, "p10": -89.08525009155272, "median": 53.837303161621094, "p90": 214.88953247070316, "max": 430.7927551269531, "pos_frac": 0.6875, "sample": [19.620681762695312, -284.11572265625, -21.67607879638672, 153.49081420898438, 51.189125061035156, 141.258056640625, 69.98750305175781, -29.56578826904297, 8.413204193115234, 223.09580993652344, 109.44192504882812, 192.90023803710938, 178.48898315429688, 51.868080139160156, 138.09364318847656, 166.132568359375, 305.61004638671875, -12.700141906738281, -191.65052795410156, 91.96086883544922, 217.92721557617188, -22.71734619140625, 430.7927551269531, -50.15510559082031, -6.339141845703125, 141.74307250976562, 162.95999145507812, 84.63435363769531, -16.159500122070312, 2.382740020751953, 28.96424102783203, 170.3671112060547, -91.28363037109375, 123.28109741210938, 207.80160522460938, 173.80990600585938, 96.85137176513672, 35.77067184448242, -21.061664581298828, -92.1473388671875, 378.0224914550781, 150.59976196289062, 35.69568634033203, 21.3094482421875, -0.145172119140625, 190.6561737060547, 55.80652618408203, -3.0749263763427734, 353.6419677734375, -129.538818359375, 110.18755340576172, 48.15589904785156, 92.07601928710938, 40.79359436035156, 172.8063201904297, 0.9083271026611328, -62.82135772705078, 299.88800048828125, -83.95569610595703, -134.54710388183594, 132.75132751464844, -29.21929168701172, -121.063232421875, 76.75496673583984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000482.npy"}
|
|
{"epoch": 0.7286470143613001, "step": 483, "batch_size": 64, "mean": 84.68096160888672, "std": 109.61720275878906, "min": -191.9762420654297, "p10": -27.13540935516357, "median": 73.36193084716797, "p90": 214.27351684570314, "max": 358.5904235839844, "pos_frac": 0.78125, "sample": [27.139244079589844, 211.0244598388672, -2.935588836669922, 109.67163848876953, 175.37619018554688, 171.78506469726562, 178.3883819580078, -84.92796325683594, -45.763153076171875, -21.26840591430664, 80.49004364013672, 181.03591918945312, 175.12728881835938, 176.59141540527344, -82.73701477050781, 171.943115234375, 39.608699798583984, -66.03168487548828, 3.795745849609375, 180.75392150878906, 87.91719055175781, 195.22906494140625, 185.19540405273438, 83.39274597167969, 213.37057495117188, 60.598045349121094, 263.84967041015625, -28.60979461669922, 16.076034545898438, 123.31008911132812, 149.65719604492188, -166.39901733398438, 196.5233154296875, 6.166837692260742, 1.362457275390625, 358.5904235839844, -21.10779571533203, -191.9762420654297, 51.89314270019531, -18.377784729003906, 63.54157257080078, 33.07372283935547, 255.84097290039062, 104.56793975830078, 11.137916564941406, 79.52969360351562, 253.6686248779297, 233.41921997070312, 62.821922302246094, 168.61228942871094, 41.582603454589844, 82.60273742675781, 18.108558654785156, 223.09420776367188, -23.69517707824707, 14.996122360229492, 67.19416809082031, 7.108087539672852, 214.66049194335938, 170.06027221679688, 28.847702026367188, -17.196372985839844, -2.7388877868652344, 183.01434326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000483.npy"}
|
|
{"epoch": 0.7301587301587301, "step": 484, "batch_size": 64, "mean": 54.72780227661133, "std": 83.86465454101562, "min": -140.03634643554688, "p10": -52.87069358825683, "median": 42.68070030212402, "p90": 176.157440185547, "max": 237.12478637695312, "pos_frac": 0.8125, "sample": [31.2628173828125, 23.37713623046875, 1.0836029052734375, -5.784965515136719, 212.58938598632812, 4.954904556274414, 87.26960754394531, 19.1036376953125, 131.63687133789062, 14.806221008300781, 26.736671447753906, -108.68647003173828, 87.69830322265625, 49.183223724365234, 104.18955993652344, 100.76759338378906, -72.37924194335938, -26.250934600830078, 94.73744201660156, 111.43767547607422, 3.289365768432617, 188.3634033203125, 134.2898406982422, 5.713371276855469, 115.74784088134766, 115.7637710571289, 201.09117126464844, 26.290817260742188, 114.01506042480469, 36.3978271484375, 48.8531608581543, 9.877235412597656, 2.6858978271484375, -47.95134735107422, 237.12478637695312, -12.41311264038086, 23.982572555541992, 9.84796142578125, 77.49351501464844, -3.5860443115234375, 144.77639770507812, -64.51488494873047, 234.0777130126953, -98.13217163085938, 151.40557861328125, 86.38037872314453, 186.765380859375, -54.97898483276367, 75.48419189453125, 18.282073974609375, 215.65191650390625, 16.231281280517578, 74.73280334472656, 35.71366882324219, -98.2751693725586, -140.03634643554688, 46.14289474487305, 82.62551879882812, 107.88520812988281, 136.2655029296875, 39.218505859375, 67.87200927734375, 51.242069244384766, 13.153610229492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000484.npy"}
|
|
{"epoch": 0.7316704459561603, "step": 485, "batch_size": 64, "mean": 62.69023132324219, "std": 98.2781982421875, "min": -191.59271240234375, "p10": -23.218752288818358, "median": 31.06640625, "p90": 196.6101501464844, "max": 330.29547119140625, "pos_frac": 0.75, "sample": [8.559776306152344, 130.1519317626953, 89.48310852050781, 137.23558044433594, 130.3450927734375, -12.034095764160156, 0.7882709503173828, 18.49385643005371, 71.22883605957031, -57.17096710205078, -140.6148681640625, 22.744857788085938, 194.15151977539062, 202.85006713867188, -10.48602294921875, 235.1018829345703, 5.305408477783203, 178.9694366455078, 246.2825927734375, 8.124319076538086, 11.438512802124023, 174.30441284179688, -20.158981323242188, 250.8087158203125, -4.8177337646484375, 0.6447868347167969, -191.59271240234375, 107.32646179199219, 85.95211029052734, 82.20590209960938, -19.778785705566406, -6.387264251708984, 68.69031524658203, 0.011865615844726562, 143.57481384277344, -90.33009338378906, 330.29547119140625, 201.6634521484375, 16.988449096679688, 184.8975830078125, 44.01506423950195, -26.744089126586914, 42.38348388671875, 18.682151794433594, 91.95079803466797, 2.024137496948242, -6.726736068725586, 189.76309204101562, 109.37889862060547, 4.0086212158203125, 21.199581146240234, 20.764535903930664, 129.98513793945312, 191.2732391357422, 99.35221099853516, 72.38430786132812, 55.60120391845703, 197.66384887695312, -24.53008270263672, -16.508726119995117, 36.49415588378906, -0.7994575500488281, 25.638656616210938, -50.32716369628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000485.npy"}
|
|
{"epoch": 0.7331821617535903, "step": 486, "batch_size": 64, "mean": 58.484527587890625, "std": 127.78746032714844, "min": -213.3621063232422, "p10": -97.35025024414061, "median": 46.79801940917969, "p90": 222.17746887207034, "max": 431.5199890136719, "pos_frac": 0.671875, "sample": [19.875633239746094, -20.049087524414062, 334.4012451171875, 2.4435977935791016, -31.90130615234375, -3.7324485778808594, 48.38862609863281, 431.5199890136719, 23.64324188232422, -4.6396942138671875, 3.405263900756836, 214.1768798828125, 124.65935516357422, 89.97415924072266, 68.61234283447266, 45.20741271972656, -33.0211181640625, 207.29315185546875, -1.4264297485351562, 236.95797729492188, 125.6337661743164, 58.506103515625, -71.97196960449219, -103.64862060546875, 225.60629272460938, -177.8350372314453, 44.507354736328125, -82.654052734375, 57.17469024658203, -213.3621063232422, 276.3239440917969, 41.689727783203125, 66.3563232421875, -126.10205078125, 0.6580829620361328, 156.76119995117188, 84.37613677978516, 168.14015197753906, -146.6632537841797, 54.92527389526367, 0.3263282775878906, 185.04827880859375, 86.76504516601562, 190.98123168945312, 196.82073974609375, -79.09671783447266, 151.59510803222656, 162.34042358398438, 76.26382446289062, 186.36924743652344, 124.48117065429688, -38.07298278808594, 23.775619506835938, 227.9166259765625, -3.6523361206054688, -123.29426574707031, -1.9574394226074219, 108.8428955078125, 270.4425048828125, -26.18815803527832, 26.953628540039062, -192.78512573242188, 48.48920822143555, -53.565589904785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000486.npy"}
|
|
{"epoch": 0.7346938775510204, "step": 487, "batch_size": 64, "mean": 72.108154296875, "std": 119.5491943359375, "min": -167.05856323242188, "p10": -59.73632354736328, "median": 49.12646484375, "p90": 202.2637130737305, "max": 501.8863830566406, "pos_frac": 0.75, "sample": [138.0894317626953, -76.5909423828125, 180.02658081054688, 126.14456939697266, 29.245750427246094, 6.61860466003418, -61.37232971191406, 16.169960021972656, 98.88225555419922, 160.37701416015625, -84.40005493164062, -32.01127624511719, 49.624916076660156, -78.95916748046875, -15.339317321777344, 189.06932067871094, 119.17924499511719, 31.73544692993164, 424.3006591796875, 75.01754760742188, -0.46820831298828125, 251.63330078125, 61.615234375, 205.18727111816406, 148.9229278564453, -17.849849700927734, 48.628013610839844, -154.9063720703125, 12.642021179199219, 81.12191772460938, 153.2347412109375, 82.54157257080078, 94.80907440185547, -0.017606735229492188, 195.44207763671875, 5.165657043457031, 131.53921508789062, -55.918975830078125, 42.663352966308594, 113.30458068847656, 226.68695068359375, 10.226886749267578, 94.4229736328125, 230.03414916992188, -139.47543334960938, 146.201904296875, 1.0979537963867188, -50.5230712890625, 101.09970092773438, -167.05856323242188, -7.17491340637207, 189.14126586914062, 33.413673400878906, 16.032333374023438, 5.6859588623046875, 163.34051513671875, 46.110294342041016, 23.064483642578125, -24.0615234375, 231.9359588623047, 99.06839752197266, 33.540382385253906, 501.8863830566406, 155.1270751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000487.npy"}
|
|
{"epoch": 0.7362055933484505, "step": 488, "batch_size": 64, "mean": 76.56622314453125, "std": 107.67730712890625, "min": -175.26255798339844, "p10": -34.5872932434082, "median": 49.80772590637207, "p90": 215.76961517333987, "max": 390.54541015625, "pos_frac": 0.703125, "sample": [41.06288146972656, 28.799827575683594, -6.330789566040039, -9.722089767456055, 35.907310485839844, -86.33805084228516, -17.75060272216797, 202.9051055908203, 218.84654235839844, 85.99955749511719, 0.47112083435058594, -1.653472900390625, 197.62542724609375, 22.822006225585938, 16.28563690185547, 250.05093383789062, -37.039215087890625, 181.57630920410156, -0.45305633544921875, 95.63691711425781, 132.57192993164062, 55.55805206298828, 8.868934631347656, -25.581649780273438, -52.99927520751953, 79.26512908935547, -91.91015625, 203.074951171875, -7.870361328125, 200.62283325195312, -19.312515258789062, -54.96910095214844, -14.352163314819336, 170.22105407714844, 230.44166564941406, 123.28102111816406, 187.68626403808594, -28.86614227294922, 241.04571533203125, 262.29876708984375, 245.74513244628906, 174.12905883789062, 178.0651092529297, 0.49755859375, 103.5641860961914, 122.80741119384766, -12.208816528320312, 117.64607238769531, -46.957763671875, 84.57144165039062, 39.36932373046875, 107.02713012695312, -175.26255798339844, 116.50244140625, 7.566375732421875, 70.82486724853516, 390.54541015625, 44.05739974975586, 206.3024139404297, 22.46737289428711, 17.301620483398438, 59.98167419433594, -0.6718063354492188, 208.59011840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000488.npy"}
|
|
{"epoch": 0.7377173091458806, "step": 489, "batch_size": 64, "mean": 47.9740104675293, "std": 116.40182495117188, "min": -228.33575439453125, "p10": -90.41388702392578, "median": 33.386043548583984, "p90": 184.04990844726564, "max": 303.9877014160156, "pos_frac": 0.640625, "sample": [-175.08200073242188, 171.48196411132812, 27.100479125976562, 179.83352661132812, 166.84666442871094, 185.76171875, -73.25, 184.68356323242188, 58.10529708862305, 270.28631591796875, 105.57042694091797, 18.23332977294922, 118.50247192382812, -83.85369873046875, -9.776405334472656, -91.39823913574219, -33.75054168701172, -24.947589874267578, 128.53970336914062, 42.960960388183594, 205.225830078125, 74.70469665527344, 159.21998596191406, 25.275127410888672, 145.15750122070312, 166.9261474609375, -4.0568695068359375, -52.007347106933594, -88.1170654296875, 46.72929000854492, -173.64215087890625, -48.03927230834961, 179.01449584960938, 19.20909881591797, 14.267333984375, -14.755645751953125, -35.56169891357422, -4.279993057250977, 182.57138061523438, 164.04623413085938, 56.39869689941406, 167.74484252929688, 28.21204376220703, 303.9877014160156, 92.50443267822266, -127.34596252441406, 204.42413330078125, 22.061973571777344, 125.93860626220703, 135.7136688232422, -193.93801879882812, 23.726905822753906, -27.093013763427734, 15.040647506713867, -10.552391052246094, -228.33575439453125, 175.67074584960938, 48.18816375732422, -135.06915283203125, 107.10751342773438, -63.92662811279297, 184.71163940429688, -1.1291217803955078, 38.56004333496094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000489.npy"}
|
|
{"epoch": 0.7392290249433107, "step": 490, "batch_size": 64, "mean": 57.02571487426758, "std": 132.28671264648438, "min": -283.91497802734375, "p10": -111.39508209228515, "median": 47.00039482116699, "p90": 208.95775604248047, "max": 387.6341552734375, "pos_frac": 0.65625, "sample": [-33.136985778808594, 3.0210723876953125, -283.91497802734375, -7.398063659667969, -3.962808609008789, 93.0311279296875, 53.11543273925781, 184.1785430908203, 219.27056884765625, 289.4499206542969, -172.22500610351562, 203.91787719726562, -202.4371337890625, 8.411558151245117, 32.88226318359375, 106.90579986572266, 161.33636474609375, 126.45423126220703, 316.294677734375, 22.704017639160156, 29.033279418945312, 207.4473876953125, 154.63455200195312, 225.81378173828125, -105.41189575195312, -58.02618408203125, 187.14016723632812, -3.904399871826172, 29.210399627685547, 22.142982482910156, -173.97061157226562, 137.30320739746094, 95.1065673828125, 102.32424926757812, 3.6530017852783203, -27.7818603515625, -30.27875518798828, 209.6050567626953, -130.50210571289062, -23.109710693359375, 139.80203247070312, -73.48223876953125, 96.5151138305664, 150.37030029296875, -1.2639408111572266, 147.55816650390625, 66.63945007324219, 6.727289199829102, -68.27237701416016, -25.973846435546875, 387.6341552734375, 158.52297973632812, -25.32266616821289, 109.26803588867188, 254.22166442871094, 122.0726318359375, 200.0341033935547, 73.37162017822266, 48.318359375, 193.89422607421875, -209.36973571777344, -113.95930480957031, -1.6704864501953125, 45.682430267333984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000490.npy"}
|
|
{"epoch": 0.7407407407407407, "step": 491, "batch_size": 64, "mean": 62.767616271972656, "std": 102.44654083251953, "min": -227.6339111328125, "p10": -43.41940841674804, "median": 43.55835723876953, "p90": 198.94813537597656, "max": 372.2786865234375, "pos_frac": 0.703125, "sample": [163.2362060546875, -34.22332000732422, 118.38143920898438, 20.789794921875, 159.79156494140625, 90.25003814697266, -7.739725112915039, 128.31967163085938, 213.78854370117188, 59.49574661254883, -29.339263916015625, -94.20244598388672, 200.23001098632812, 44.30400848388672, 171.99317932128906, -71.39685821533203, 19.14780044555664, 150.92434692382812, 17.242679595947266, 89.94288635253906, -68.95074462890625, 224.25430297851562, 199.9006805419922, -12.82126235961914, 57.14331817626953, 155.078125, 14.278251647949219, -31.625442504882812, -14.646781921386719, 2.5462074279785156, 198.64649963378906, 118.73469543457031, -51.27122497558594, 44.13526153564453, 126.91353607177734, 22.338138580322266, 42.98145294189453, 372.2786865234375, 59.684967041015625, -54.68254852294922, 197.44497680664062, 19.554107666015625, 16.584205627441406, -19.418275833129883, -2.5639991760253906, 95.31776428222656, -20.2613525390625, -47.2935791015625, 110.71796417236328, 199.07740783691406, -9.052955627441406, 140.23118591308594, 250.1968536376953, 179.26231384277344, 9.138696670532227, 18.1514892578125, 10.255622863769531, 73.82982635498047, 65.35517883300781, -227.6339111328125, -34.379676818847656, 7.879899978637695, -22.32706069946289, 191.20855712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000491.npy"}
|
|
{"epoch": 0.7422524565381708, "step": 492, "batch_size": 64, "mean": 52.11651611328125, "std": 122.31566619873047, "min": -294.13916015625, "p10": -72.23178939819336, "median": 29.689085006713867, "p90": 213.62440490722656, "max": 328.2103271484375, "pos_frac": 0.6875, "sample": [300.10491943359375, 328.2103271484375, -127.31437683105469, 2.1471099853515625, 217.8428955078125, 13.904195785522461, 109.58013916015625, 85.90130615234375, 73.3993148803711, -4.578081130981445, 186.4989776611328, 210.40243530273438, 83.46441650390625, -43.55181121826172, 19.23834228515625, 191.66903686523438, -205.68072509765625, 84.69236755371094, 4.016532897949219, 164.6166229248047, 170.466064453125, -69.96234893798828, -10.317520141601562, -28.102210998535156, 2.3298873901367188, 74.40115356445312, 122.03629302978516, 242.203857421875, -72.48617553710938, -12.398635864257812, 185.29962158203125, 9.752702713012695, 7.21405029296875, 77.50875854492188, -2.1280994415283203, 33.70491027832031, -294.13916015625, 235.12533569335938, -137.129638671875, -2.6157760620117188, 166.9546356201172, 2.0378646850585938, 92.36949920654297, 11.001567840576172, -58.17134094238281, 123.59052276611328, 17.060482025146484, 60.35223388671875, -71.63822174072266, 151.37315368652344, -201.0841064453125, -47.371559143066406, -15.7769775390625, 162.83717346191406, 229.71636962890625, 25.673259735107422, 152.39840698242188, 101.73953247070312, 121.38232421875, 38.07524108886719, 3.9334354400634766, -107.67779541015625, -63.65093994140625, 215.0052490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000492.npy"}
|
|
{"epoch": 0.7437641723356009, "step": 493, "batch_size": 64, "mean": 79.32008361816406, "std": 114.28922271728516, "min": -192.24163818359375, "p10": -57.30896911621093, "median": 75.85392379760742, "p90": 218.97237243652347, "max": 375.6128845214844, "pos_frac": 0.703125, "sample": [-9.612258911132812, 13.113201141357422, -20.982269287109375, 175.21884155273438, -45.14356231689453, 79.33755493164062, -139.87176513671875, 12.717891693115234, -0.04984855651855469, -80.02674102783203, 72.92184448242188, 265.26422119140625, 212.9473876953125, 212.97744750976562, 85.50138854980469, -61.19929122924805, 191.55984497070312, -2.290374755859375, 140.76687622070312, 156.39501953125, 28.552942276000977, 45.46526336669922, -1.8214359283447266, 3.3435516357421875, 60.198280334472656, -50.87391662597656, 161.37379455566406, -16.2911376953125, 210.3229522705078, 162.47540283203125, 154.6280975341797, 211.64955139160156, -120.22210693359375, -45.5823860168457, -81.51771545410156, 149.341796875, 192.58958435058594, -33.1827392578125, 225.99627685546875, 2.4090213775634766, 265.423095703125, 121.44499206542969, 127.64805603027344, 375.6128845214844, 109.60062408447266, 221.5416259765625, -60.06684875488281, 222.09152221679688, 147.7896728515625, -17.112144470214844, -17.728103637695312, 178.8866424560547, 78.78600311279297, 26.556137084960938, 232.49331665039062, 155.43775939941406, 57.15928649902344, 60.820648193359375, 117.404052734375, 194.93679809570312, 12.554237365722656, 109.0933609008789, 29.952919006347656, -192.24163818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000493.npy"}
|
|
{"epoch": 0.745275888133031, "step": 494, "batch_size": 64, "mean": 67.10954284667969, "std": 126.84598541259766, "min": -275.2979431152344, "p10": -68.26264419555663, "median": 62.948524475097656, "p90": 198.74918518066406, "max": 423.99127197265625, "pos_frac": 0.671875, "sample": [139.7861785888672, 67.20830535888672, 77.60774993896484, 65.32212829589844, 198.60372924804688, 8.907058715820312, -8.583480834960938, 163.58164978027344, 145.958984375, -121.61151123046875, 9.620452880859375, -49.207374572753906, -3.4064788818359375, 96.6209487915039, 165.43226623535156, 85.34976196289062, 184.1047821044922, 0.45261383056640625, 55.2209358215332, -132.61190795898438, 29.053730010986328, 423.99127197265625, 139.1819610595703, -36.562225341796875, 48.276710510253906, 191.14187622070312, 201.9328155517578, -9.757225036621094, 165.70977783203125, 196.63389587402344, 176.75161743164062, -198.3287353515625, -216.87152099609375, 183.06982421875, -7.761957168579102, -94.19355773925781, -61.260459899902344, 177.31631469726562, 39.60276794433594, 151.68716430664062, 81.64089965820312, -3.1251392364501953, -275.2979431152344, -6.363368988037109, 28.31185531616211, 55.532386779785156, 244.6869659423828, -6.412588119506836, -44.33863830566406, 60.574920654296875, 198.8115234375, 67.05203247070312, 52.92832946777344, 285.98046875, 101.4350814819336, 108.25965118408203, -39.971038818359375, -16.870952606201172, 180.34109497070312, 220.9488525390625, 290.7220764160156, -71.26358032226562, -53.39115524291992, 186.84841918945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000494.npy"}
|
|
{"epoch": 0.7467876039304611, "step": 495, "batch_size": 64, "mean": 54.24787139892578, "std": 124.63041687011719, "min": -220.7160186767578, "p10": -81.2715934753418, "median": 23.46154022216797, "p90": 197.0527572631836, "max": 481.66058349609375, "pos_frac": 0.6875, "sample": [-73.22246551513672, -103.44487762451172, -220.7160186767578, -159.58164978027344, 172.6035919189453, -5.704750061035156, -141.46937561035156, 19.524642944335938, 87.56777954101562, -80.12492370605469, 4.540458679199219, -139.15652465820312, -21.255966186523438, 97.84616088867188, 67.18733978271484, 196.1339111328125, -40.611915588378906, 24.16357421875, 124.6347427368164, -9.36640739440918, 33.64399337768555, 153.6342010498047, 158.45449829101562, 3.505807876586914, 182.6322021484375, 197.44654846191406, -10.331485748291016, 103.0833969116211, -0.8437538146972656, 44.64327621459961, 481.66058349609375, 4.625970840454102, 28.201454162597656, -77.06573486328125, 14.572957992553711, 38.55913543701172, 1.0859527587890625, 143.9878692626953, 319.0989685058594, 168.57733154296875, -99.1695556640625, -14.976127624511719, 50.09999465942383, 67.30255126953125, 222.89276123046875, 20.62812042236328, 81.7789077758789, -7.804595947265625, 11.212287902832031, 197.72747802734375, 9.16847038269043, 170.79725646972656, 60.759910583496094, -19.198333740234375, -81.76302337646484, 361.45880126953125, 94.52499389648438, -10.516357421875, 2.7925033569335938, 91.90890502929688, 145.13787841796875, 22.759506225585938, 297.15631103515625, 8.464740753173828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000495.npy"}
|
|
{"epoch": 0.7482993197278912, "step": 496, "batch_size": 64, "mean": 72.6009750366211, "std": 141.6013946533203, "min": -234.54428100585938, "p10": -67.76335372924804, "median": 44.30210494995117, "p90": 225.53213348388675, "max": 693.777099609375, "pos_frac": 0.6875, "sample": [136.02012634277344, 191.78359985351562, 217.51596069335938, 186.4367218017578, -53.786964416503906, 29.2755126953125, 90.7303466796875, 358.4295349121094, 19.415239334106445, 126.94081115722656, -78.62120056152344, 43.878211975097656, 51.449241638183594, 48.061370849609375, 4.65705680847168, -3.4131546020507812, -7.583805084228516, 12.348665237426758, 177.1243438720703, 53.92439270019531, -48.388710021972656, 693.777099609375, -5.023899078369141, 206.5880126953125, 53.4730224609375, -234.54428100585938, 359.4934997558594, 194.86209106445312, -1.2463531494140625, 278.8570861816406, -81.4455337524414, 13.824604034423828, 203.64212036132812, 249.22311401367188, 1.7720184326171875, 0.4478015899658203, -120.98765563964844, -19.062196731567383, 86.53433227539062, 122.2780532836914, 25.528167724609375, 70.61058044433594, 252.5894775390625, 97.63308715820312, 155.91494750976562, 44.72599792480469, -48.160400390625, 91.00406646728516, -20.74559783935547, -3.499897003173828, 228.96763610839844, -177.25125122070312, -51.46873474121094, 4.926748275756836, 148.78170776367188, 186.76341247558594, 15.569725036621094, 139.28662109375, -73.75323486328125, -7.9453125, -89.7103271484375, 89.1182632446289, 21.98241424560547, -13.065788269042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000496.npy"}
|
|
{"epoch": 0.7498110355253212, "step": 497, "batch_size": 64, "mean": 76.42105102539062, "std": 111.30982208251953, "min": -230.065185546875, "p10": -27.763135910034176, "median": 81.18371963500977, "p90": 218.9056564331055, "max": 280.3525085449219, "pos_frac": 0.6875, "sample": [186.78912353515625, 30.36499786376953, 280.3525085449219, 158.76943969726562, 247.4112548828125, 32.36585235595703, -19.108505249023438, -7.4704742431640625, 17.46409034729004, 104.33802795410156, 158.60787963867188, 147.71661376953125, 113.7916259765625, -15.574417114257812, 131.02755737304688, -10.125160217285156, 157.21517944335938, -190.0374755859375, 181.81466674804688, 226.97003173828125, -2.9502944946289062, -7.403621673583984, -7.646827697753906, -7.065055847167969, 79.3114013671875, -47.55555725097656, -23.748321533203125, 211.9964141845703, 195.43629455566406, 205.11160278320312, -6.51140022277832, 113.57386779785156, 101.88461303710938, 224.16525268554688, -196.5443115234375, 176.71359252929688, 221.86676025390625, 83.05603790283203, 58.360111236572266, -28.875701904296875, 178.88803100585938, 131.03347778320312, -21.302005767822266, 96.93136596679688, 113.79469299316406, -40.198516845703125, 242.498046875, 115.1283950805664, -11.350257873535156, 32.01854705810547, 50.6694450378418, 71.21572875976562, 56.558349609375, 131.83450317382812, 68.76189422607422, 249.03746032714844, -230.065185546875, 6.0741424560546875, -25.16714859008789, 196.5404510498047, 90.90823364257812, 62.22876739501953, -125.09135437011719, 174.14260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000497.npy"}
|
|
{"epoch": 0.7513227513227513, "step": 498, "batch_size": 64, "mean": 75.59732055664062, "std": 103.95801544189453, "min": -300.1185607910156, "p10": -36.00460739135742, "median": 56.65217971801758, "p90": 204.14622650146484, "max": 282.9949951171875, "pos_frac": 0.828125, "sample": [-33.62251281738281, 104.55196380615234, 160.92027282714844, 92.203369140625, 94.76129150390625, -6.0774688720703125, 182.40689086914062, 177.99481201171875, -19.022693634033203, -104.31431579589844, 204.74440002441406, 72.601806640625, 77.58062744140625, -39.810508728027344, 158.3076171875, 104.54364013671875, 12.420341491699219, -40.203208923339844, 106.02054595947266, 148.53067016601562, 282.9949951171875, 41.998939514160156, 200.1568603515625, 23.942501068115234, 47.064048767089844, 58.27433776855469, -37.02550506591797, 184.78912353515625, 186.00018310546875, 55.03002166748047, 226.50555419921875, 182.82164001464844, 4.717792510986328, 23.717636108398438, 43.56945037841797, -94.25616455078125, -9.471870422363281, 58.834442138671875, 2.542257308959961, 13.688455581665039, 124.12776184082031, 198.34397888183594, -300.1185607910156, 157.9335479736328, 116.59391021728516, 21.649391174316406, 5.945911407470703, 45.136634826660156, 9.776538848876953, 59.19166946411133, 241.001220703125, 202.75048828125, 114.41732025146484, 25.35942840576172, 237.06776428222656, 5.519540786743164, 274.5181884765625, 30.647064208984375, 21.596956253051758, 20.60968017578125, 44.0350341796875, -37.201454162597656, 26.064781188964844, 242.8295440673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000498.npy"}
|
|
{"epoch": 0.7528344671201814, "step": 499, "batch_size": 64, "mean": 78.90623474121094, "std": 137.9642791748047, "min": -218.03797912597656, "p10": -113.25960464477538, "median": 92.71980667114258, "p90": 242.74781036376953, "max": 450.5357666015625, "pos_frac": 0.765625, "sample": [126.96273040771484, 231.494140625, -191.93719482421875, 317.85693359375, 29.030838012695312, 62.41396713256836, 63.19920349121094, -188.76272583007812, 28.696247100830078, 122.60783386230469, 84.47969818115234, 160.3883056640625, 143.12477111816406, -162.6630401611328, 244.56832885742188, 6.590476989746094, 450.5357666015625, -122.4396743774414, 209.11363220214844, 191.71795654296875, 102.20329284667969, -85.65121459960938, -32.19789123535156, -117.98747253417969, -102.22791290283203, 139.53958129882812, 106.26168823242188, 242.90516662597656, -82.26425170898438, 116.9041519165039, -47.4461555480957, 33.02797317504883, 136.61056518554688, 183.09678649902344, 84.69647216796875, -47.049896240234375, 26.136245727539062, 136.39187622070312, 9.553579330444336, 85.49810791015625, 242.38064575195312, 8.178184509277344, 214.93765258789062, 56.22475051879883, -218.03797912597656, 99.9415054321289, 106.98661041259766, 102.90984344482422, 276.0671081542969, 267.71563720703125, 214.96099853515625, 4.437812805175781, 50.728759765625, -77.90618896484375, -163.7700653076172, 236.68826293945312, 141.65414428710938, 150.10394287109375, -56.112335205078125, 288.564453125, 38.696266174316406, 177.37734985351562, 173.42922973632812, 18.863601684570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000499.npy"}
|
|
{"epoch": 0.7543461829176115, "step": 500, "batch_size": 64, "mean": 66.60507202148438, "std": 125.71446228027344, "min": -367.20953369140625, "p10": -116.55373458862302, "median": 81.85843658447266, "p90": 206.64544677734375, "max": 273.4119873046875, "pos_frac": 0.765625, "sample": [-69.26246643066406, 125.6677017211914, 16.293556213378906, 10.758878707885742, 151.69119262695312, 100.67301940917969, 107.94854736328125, 168.36419677734375, 119.7952880859375, 206.6071014404297, 52.33918762207031, 185.27529907226562, 190.49374389648438, -83.44556427001953, 200.2352752685547, 4.3524017333984375, 182.75550842285156, 83.657958984375, 32.56549835205078, -19.323577880859375, 222.15957641601562, 180.5138702392578, 27.588294982910156, 172.58262634277344, -12.900863647460938, 206.66188049316406, 76.01371765136719, 14.499080657958984, -44.72179412841797, -139.5426025390625, -155.14398193359375, 124.92799377441406, 25.46646499633789, 68.07395935058594, -150.63134765625, 9.100912094116211, 86.81509399414062, 273.4119873046875, 229.03787231445312, -1.6068954467773438, 127.13615417480469, 12.782339096069336, 209.0589141845703, 60.6956672668457, -29.17166519165039, 152.440185546875, 83.76261901855469, -367.20953369140625, 184.51361083984375, -213.9896240234375, 235.8096466064453, -137.15029907226562, 3.3564605712890625, 173.19891357421875, 161.57827758789062, 4.942829132080078, 1.7517318725585938, 80.05891418457031, 138.08058166503906, -42.556190490722656, -130.74295043945312, 157.6752166748047, 193.77484130859375, 223.17965698242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000500.npy"}
|
|
{"epoch": 0.7558578987150416, "step": 501, "batch_size": 64, "mean": 59.56120681762695, "std": 84.08857727050781, "min": -150.12399291992188, "p10": -21.089498901367186, "median": 48.3173942565918, "p90": 182.81955566406253, "max": 239.4927520751953, "pos_frac": 0.71875, "sample": [-4.822700500488281, 203.91806030273438, -19.996902465820312, 55.80594253540039, 122.3018798828125, 150.38450622558594, -42.30564880371094, 48.893394470214844, 14.082164764404297, 137.16726684570312, 142.7738494873047, 37.54304504394531, 86.18128967285156, -11.465408325195312, -16.485273361206055, -12.59309196472168, 38.174156188964844, 177.52862548828125, -9.943702697753906, -11.759126663208008, 80.38343048095703, -10.373970031738281, 1.3521690368652344, 239.4927520751953, 185.08709716796875, 170.56503295898438, 163.43894958496094, 226.6585693359375, 3.455982208251953, 17.6080322265625, 49.019371032714844, -115.13015747070312, -4.965080261230469, 5.321399688720703, 87.01296997070312, 108.5869140625, 122.9451675415039, 1.0194625854492188, -4.013256072998047, 105.79191589355469, 74.41789245605469, 156.61996459960938, 26.104869842529297, 131.72100830078125, -24.96381378173828, -150.12399291992188, 55.5465087890625, 189.6407012939453, -51.825008392333984, 76.1128158569336, 47.74139404296875, 52.978416442871094, 19.773170471191406, -21.557754516601562, 92.02821350097656, -14.288980484008789, 26.257049560546875, 3.4881839752197266, 207.59523010253906, 188.1885528564453, 69.52479553222656, 0.6273689270019531, 160.18402099609375, -22.51226806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000501.npy"}
|
|
{"epoch": 0.7573696145124716, "step": 502, "batch_size": 64, "mean": 66.10803985595703, "std": 101.0274429321289, "min": -191.24635314941406, "p10": -46.89643478393554, "median": 65.47760009765625, "p90": 207.30366516113284, "max": 289.37060546875, "pos_frac": 0.734375, "sample": [-32.66211700439453, 7.417545318603516, 19.69195556640625, -48.86933898925781, 210.95199584960938, 130.63795471191406, 32.79405975341797, 73.13201904296875, -108.58568572998047, 37.86737060546875, 35.4802360534668, 198.7908935546875, -13.61529541015625, -68.5265121459961, 191.67013549804688, 289.37060546875, 67.5928955078125, -191.24635314941406, -42.292991638183594, 104.89546203613281, 35.52799987792969, -21.903972625732422, 39.720298767089844, 176.78662109375, 212.9005126953125, 99.4075927734375, 138.9971160888672, -15.337299346923828, -1.9478511810302734, 63.3623046875, 35.15833282470703, 11.408735275268555, 81.90799713134766, 10.315397262573242, 117.38804626464844, 84.17213439941406, 195.0272674560547, 227.4236602783203, -90.19058227539062, 0.4353981018066406, -4.090812683105469, 27.563270568847656, 77.22909545898438, 7.428466796875, 104.67550659179688, -15.69219970703125, 45.60968017578125, 213.20010375976562, 90.14653778076172, -104.38423156738281, 77.636962890625, 113.1556396484375, 179.48959350585938, 132.53768920898438, 258.1601867675781, -127.66845703125, 75.23558044433594, 192.89755249023438, -28.631576538085938, 191.87387084960938, -0.9999351501464844, 113.86620330810547, 220.478759765625, 96.14269256591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000502.npy"}
|
|
{"epoch": 0.7588813303099018, "step": 503, "batch_size": 64, "mean": 66.83392333984375, "std": 109.58280181884766, "min": -247.07864379882812, "p10": -37.58009796142578, "median": 48.29543685913086, "p90": 197.74463806152346, "max": 248.44761657714844, "pos_frac": 0.75, "sample": [174.8060302734375, 8.622066497802734, -35.563934326171875, 69.2811279296875, 142.944091796875, 49.92017364501953, 234.7335205078125, -247.07864379882812, -4.4712982177734375, 145.11428833007812, 163.114990234375, -28.646133422851562, 23.06507110595703, 192.07305908203125, -38.44416809082031, 99.72653198242188, -29.546110153198242, 16.7193603515625, 248.44761657714844, -50.77526092529297, 34.424217224121094, 144.10491943359375, 187.73948669433594, 46.67070007324219, -0.09062957763671875, 58.91072082519531, 158.92776489257812, 212.05657958984375, -12.941936492919922, 144.53814697265625, -20.207759857177734, 85.43343353271484, 206.38804626464844, 195.18310546875, 199.41036987304688, -23.966537475585938, 135.47955322265625, 7.874595642089844, -162.4976348876953, 23.56592559814453, 1.0543937683105469, 29.40740966796875, 5.624284744262695, 112.44648742675781, -3.061115264892578, 19.456504821777344, 43.94367980957031, 211.96701049804688, 140.23350524902344, 13.550277709960938, 193.43341064453125, 176.18499755859375, -183.19700622558594, 9.87216567993164, 173.41404724121094, 165.51669311523438, 5.388284683227539, -85.89685821533203, 162.93386840820312, 198.84243774414062, 13.453784942626953, 159.00534057617188, 133.5734405517578, -174.82106018066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000503.npy"}
|
|
{"epoch": 0.7603930461073318, "step": 504, "batch_size": 64, "mean": 88.09020233154297, "std": 97.39571380615234, "min": -160.4856414794922, "p10": -11.437057113647459, "median": 87.90879821777344, "p90": 213.64789428710938, "max": 310.5306091308594, "pos_frac": 0.8125, "sample": [22.312774658203125, 187.877197265625, 12.688125610351562, 130.70204162597656, 260.1910400390625, -19.080726623535156, 121.52255249023438, 26.25629234313965, 210.4022979736328, 17.90947914123535, 78.30487060546875, 73.52044677734375, -1.8647613525390625, 215.0388641357422, 41.84657669067383, 180.1853485107422, 174.5508270263672, -118.6786117553711, 120.95401000976562, 144.39529418945312, -15.499210357666016, 104.16217041015625, 3.3938827514648438, 186.02220153808594, 215.76829528808594, -25.1837158203125, 74.31339263916016, 208.1085662841797, 100.14192962646484, 310.5306091308594, 266.2251281738281, 263.8523254394531, -8.65219497680664, 63.9274787902832, 88.83810424804688, -12.630569458007812, 7.768165588378906, -135.27386474609375, 180.29678344726562, 83.96578979492188, 175.72500610351562, -1.4944610595703125, -3.024616241455078, 49.259368896484375, 88.18251037597656, 169.00172424316406, 120.5439453125, 64.98949432373047, -7.110504150390625, 142.30921936035156, 113.70046997070312, 98.94169616699219, -160.4856414794922, 168.08187866210938, 128.1470489501953, 33.820091247558594, 14.627561569213867, 87.63508605957031, 122.46438598632812, 10.44114875793457, 3.713083267211914, 222.23818969726562, 123.29286193847656, 33.66425323486328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000504.npy"}
|
|
{"epoch": 0.7619047619047619, "step": 505, "batch_size": 64, "mean": 58.554752349853516, "std": 123.0129165649414, "min": -265.3995056152344, "p10": -67.87525405883788, "median": 34.5292854309082, "p90": 203.13399810791014, "max": 411.213623046875, "pos_frac": 0.734375, "sample": [135.7371826171875, 1.2002220153808594, 98.97615051269531, 19.127742767333984, 2.3211402893066406, 411.213623046875, -150.29583740234375, 289.87847900390625, 175.4066619873047, 195.18687438964844, 200.01473999023438, -10.844680786132812, 60.973960876464844, 2.2358932495117188, 23.900657653808594, -50.012451171875, -41.47381591796875, -20.283405303955078, 145.5940399169922, 8.92849349975586, -92.73246765136719, 185.8019561767578, 168.366943359375, 37.46554946899414, 62.97332763671875, 275.4093933105469, 7.843650817871094, -68.85795593261719, 32.738006591796875, 0.6475410461425781, -64.73553466796875, 17.14842987060547, 203.32955932617188, -173.04083251953125, 214.81536865234375, 172.75814819335938, 134.974365234375, 202.6776885986328, -8.994352340698242, -23.3902587890625, 48.198822021484375, 36.32056427001953, -15.458778381347656, -138.66748046875, 39.52777099609375, 65.89913940429688, 104.50543975830078, -17.215028762817383, 233.09535217285156, -151.2792205810547, 2.9036483764648438, 183.41342163085938, 190.33155822753906, 47.09168243408203, 180.72396850585938, 55.33235168457031, 163.58364868164062, 30.302047729492188, 15.188392639160156, 214.92333984375, 5.96705436706543, 0.813995361328125, -65.58228302001953, -265.3995056152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000505.npy"}
|
|
{"epoch": 0.763416477702192, "step": 506, "batch_size": 64, "mean": 64.28105163574219, "std": 121.10800170898438, "min": -305.2364196777344, "p10": -51.27606391906736, "median": 52.677738189697266, "p90": 198.6438751220703, "max": 300.2073974609375, "pos_frac": 0.703125, "sample": [21.386754989624023, 157.9447479248047, 9.253665924072266, -30.7030029296875, 186.2330780029297, 193.5278778076172, 93.21757507324219, -60.09309005737305, 162.4051055908203, -30.19354248046875, 4.303945541381836, 93.17290496826172, 54.733604431152344, -4.539436340332031, 143.0346221923828, 122.61099243164062, -194.25982666015625, 251.1394805908203, 261.11651611328125, 24.20960235595703, 199.27783203125, -155.812255859375, -15.518722534179688, -305.2364196777344, -150.0302734375, -190.2160186767578, 50.62187194824219, 211.18797302246094, 293.7232360839844, 195.50555419921875, 187.37442016601562, -14.776763916015625, 300.2073974609375, -16.58050537109375, 191.21786499023438, 16.36046600341797, 180.47572326660156, 103.32947540283203, 134.63818359375, 27.821868896484375, -129.21133422851562, 19.990066528320312, 14.638687133789062, 11.230033874511719, 118.25872802734375, 105.86326599121094, -6.063488006591797, 75.96639251708984, 132.1175537109375, 61.529212951660156, 37.007896423339844, 197.16464233398438, -7.616706848144531, 158.70465087890625, 131.76914978027344, -3.7402420043945312, 17.47112274169922, 142.35977172851562, -27.57837677001953, -8.144824981689453, 42.868865966796875, 211.602783203125, 123.05728149414062, -7.3304443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000506.npy"}
|
|
{"epoch": 0.764928193499622, "step": 507, "batch_size": 64, "mean": 70.91971588134766, "std": 130.473876953125, "min": -264.5625, "p10": -79.39933013916016, "median": 50.775596618652344, "p90": 237.90662536621102, "max": 425.91326904296875, "pos_frac": 0.671875, "sample": [185.24050903320312, 17.58203125, -6.5386199951171875, 69.438720703125, 395.555419921875, -18.58429718017578, 335.273193359375, -264.5625, -81.79350280761719, 135.43960571289062, 94.72273254394531, 48.79698181152344, -73.81292724609375, 130.9049072265625, -46.990318298339844, -118.51202392578125, 78.12028503417969, -31.397239685058594, 194.1787567138672, 31.171031951904297, 70.78846740722656, 52.75421142578125, 184.8795623779297, -24.61881446838379, 218.79827880859375, -8.076974868774414, 64.7177505493164, 282.80804443359375, 113.54618835449219, 8.59625244140625, 77.95925903320312, -0.401458740234375, 320.5631408691406, -44.24999237060547, 210.88665771484375, 149.15916442871094, -83.28079223632812, 214.9937286376953, -97.57632446289062, -1.20770263671875, -43.550933837890625, 6.058807373046875, -6.605476379394531, 159.69952392578125, 137.8975830078125, 10.427474975585938, 45.22808074951172, 25.185882568359375, 425.91326904296875, 252.16793823242188, -44.70391845703125, 105.69387817382812, -51.097511291503906, 8.035011291503906, 129.4208984375, 5.518745422363281, 154.9907989501953, -82.88330078125, 60.457801818847656, -118.61602783203125, 23.831329345703125, 246.09591674804688, 131.4019012451172, 173.02239990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000507.npy"}
|
|
{"epoch": 0.7664399092970522, "step": 508, "batch_size": 64, "mean": 81.78291320800781, "std": 121.95687866210938, "min": -189.1245880126953, "p10": -64.15365829467773, "median": 63.70330047607422, "p90": 227.58724670410157, "max": 337.59588623046875, "pos_frac": 0.703125, "sample": [157.34664916992188, 36.48347473144531, 163.3229217529297, -2.4368438720703125, 50.05658721923828, -55.56831359863281, 246.98870849609375, -1.52734375, 157.4958953857422, 228.27130126953125, 279.8604736328125, 171.470947265625, 39.98506164550781, -7.3861541748046875, 59.381988525390625, 146.15390014648438, -179.46932983398438, 57.43536376953125, 24.426132202148438, -81.89662170410156, 18.04095458984375, 26.332183837890625, 160.79946899414062, -67.83309173583984, -54.87909698486328, -189.1245880126953, 85.70721435546875, -0.1964855194091797, 337.59588623046875, 101.61476135253906, -76.1015625, 327.4730224609375, 107.16999816894531, 83.81011199951172, 151.97332763671875, 157.13064575195312, 121.01106262207031, 148.39627075195312, 193.57437133789062, -7.114738464355469, 3.784942626953125, -36.56281280517578, 172.16766357421875, 215.9315185546875, -15.571502685546875, -89.80458068847656, 149.88980102539062, 292.94866943359375, 223.43972778320312, 8.402551651000977, 210.23565673828125, -3.581024169921875, 65.07185363769531, -3.2853221893310547, 7.569091796875, -159.98513793945312, -19.553573608398438, 208.20159912109375, 225.64788818359375, 274.70855712890625, 87.40190887451172, 62.334747314453125, 12.948516845703125, 225.99111938476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000508.npy"}
|
|
{"epoch": 0.7679516250944822, "step": 509, "batch_size": 64, "mean": 63.084320068359375, "std": 114.2491683959961, "min": -214.39892578125, "p10": -97.65310592651365, "median": 66.98635482788086, "p90": 212.11127471923828, "max": 249.33041381835938, "pos_frac": 0.6875, "sample": [54.58857727050781, -30.741741180419922, 103.43841552734375, 191.55145263671875, 84.67463684082031, -3.2733592987060547, 8.789505004882812, -18.431840896606445, -106.3602066040039, 71.44154357910156, 25.479209899902344, 84.97152709960938, -183.05459594726562, 101.54344177246094, 193.53921508789062, -151.28871154785156, 224.23284912109375, 170.74090576171875, -37.91093444824219, 20.268218994140625, 167.40550231933594, -105.99433135986328, 31.7703857421875, -22.535659790039062, 249.33041381835938, 38.65873718261719, -44.510704040527344, 27.373165130615234, 76.53021240234375, -8.121856689453125, 246.08074951171875, 26.238727569580078, 212.23672485351562, 157.85301208496094, 56.9134521484375, -2.262716293334961, 181.32400512695312, 99.15907287597656, 169.53033447265625, 182.64024353027344, 20.057334899902344, -11.022445678710938, 223.70953369140625, -78.19024658203125, 146.2564697265625, 228.55455017089844, 64.65882873535156, 101.94682312011719, 218.48068237304688, 166.04681396484375, 208.87850952148438, -39.762733459472656, -58.345542907714844, -214.39892578125, -10.579925537109375, 170.6325225830078, 105.75310516357422, -143.90914916992188, -126.11251068115234, 88.77281951904297, 10.972003936767578, 69.31388092041016, 211.8185577392578, 140.0479278564453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000509.npy"}
|
|
{"epoch": 0.7694633408919124, "step": 510, "batch_size": 64, "mean": 80.7472915649414, "std": 139.20419311523438, "min": -213.3067626953125, "p10": -90.16721725463864, "median": 55.65388107299805, "p90": 257.7420989990235, "max": 458.7113952636719, "pos_frac": 0.765625, "sample": [75.76026916503906, 194.29563903808594, 201.89614868164062, 162.65245056152344, 23.479766845703125, 11.303375244140625, 23.584148406982422, 235.3883056640625, -6.854347229003906, 17.681915283203125, -55.374671936035156, 48.39482879638672, 24.712032318115234, 43.64979553222656, -169.63653564453125, -17.972877502441406, 1.8848114013671875, 62.77284240722656, -54.97747802734375, 12.33537483215332, 82.24151611328125, 195.47718811035156, 220.92771911621094, -105.07830810546875, 7.806632995605469, 197.56871032714844, 38.594871520996094, -38.16893005371094, -0.5836372375488281, 152.62509155273438, 46.00018310546875, 133.45166015625, 309.7549743652344, 82.22937774658203, 11.777339935302734, 174.24502563476562, 83.78777313232422, 281.9734191894531, 80.43293762207031, 14.043792724609375, 5.33131217956543, -165.96237182617188, 191.80377197265625, 79.37164306640625, 303.51708984375, 18.530479431152344, 159.89599609375, 261.82574462890625, 456.7504577636719, -108.52415466308594, 48.53491973876953, 88.01870727539062, 188.5074462890625, 177.75775146484375, -143.2693328857422, -1.1539421081542969, 285.1875915527344, 248.21359252929688, -12.928565979003906, 135.15695190429688, 458.7113952636719, 76.57984924316406, -174.80616760253906, -213.3067626953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000510.npy"}
|
|
{"epoch": 0.7709750566893424, "step": 511, "batch_size": 64, "mean": 91.31495666503906, "std": 109.50364685058594, "min": -149.4381103515625, "p10": -42.93028450012206, "median": 80.55558395385742, "p90": 223.65815277099614, "max": 285.81561279296875, "pos_frac": 0.78125, "sample": [227.29400634765625, 215.685302734375, 84.445556640625, 198.57167053222656, 32.52652359008789, -24.09185791015625, 48.199039459228516, -6.872947692871094, 156.22515869140625, 211.4234619140625, 112.64822387695312, -83.57727813720703, 21.662673950195312, 81.26009368896484, 191.31784057617188, 263.239501953125, 179.3805694580078, -30.169570922851562, 156.79673767089844, 129.84681701660156, 201.79330444335938, 27.84588623046875, 72.93561553955078, -109.35492706298828, -131.09353637695312, -46.076751708984375, 227.07508850097656, 137.2995147705078, 116.2163314819336, 11.5826416015625, 41.70664978027344, 201.38485717773438, 60.606842041015625, 79.85107421875, 184.18600463867188, 166.9998016357422, 174.31362915039062, 17.94961166381836, -16.735916137695312, 17.374113082885742, 65.58369445800781, 72.68904113769531, 209.24610900878906, 238.67355346679688, 285.81561279296875, 166.51663208007812, -149.4381103515625, 69.77490234375, -9.388154983520508, 193.48658752441406, 37.713218688964844, 0.178009033203125, 160.56626892089844, 183.77798461914062, 7.364601135253906, 76.97584533691406, -135.08258056640625, 253.29833984375, 239.8716583251953, 167.64920043945312, 209.75454711914062, -6.349781036376953, -60.602813720703125, -35.58852767944336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000511.npy"}
|
|
{"epoch": 0.7724867724867724, "step": 512, "batch_size": 64, "mean": 56.685821533203125, "std": 115.59736633300781, "min": -313.17926025390625, "p10": -77.70474166870117, "median": 53.52577209472656, "p90": 190.4505401611328, "max": 262.98583984375, "pos_frac": 0.6875, "sample": [190.83441162109375, 101.94964599609375, 26.23381233215332, -168.68994140625, -76.14413452148438, 189.55484008789062, 59.69887924194336, -0.48945045471191406, 70.16997528076172, 141.75962829589844, 159.3904266357422, 226.5083465576172, -313.17926025390625, -62.691585540771484, 9.842544555664062, -57.46974182128906, -16.896278381347656, 3.9128379821777344, 175.81512451171875, 7.668039321899414, 212.36569213867188, 187.67250061035156, -9.916908264160156, 193.30123901367188, -34.20298767089844, 142.65914916992188, -105.57157897949219, 171.85739135742188, -78.37357330322266, 156.4342498779297, -1.9402503967285156, 186.35711669921875, 173.75006103515625, 3.117544174194336, 148.38157653808594, 49.18821716308594, 6.505287170410156, 126.29777526855469, 6.760019302368164, 195.71963500976562, -0.5864410400390625, -159.41578674316406, -12.585329055786133, -34.87174987792969, 88.87591552734375, 262.98583984375, 56.127113342285156, 35.536251068115234, 75.6864013671875, 7.059589385986328, 174.57064819335938, -8.500946044921875, 63.846771240234375, -155.911865234375, 42.07041931152344, 176.74549865722656, 197.3009033203125, 98.39340209960938, 183.91200256347656, 165.5975341796875, 50.92443084716797, -46.82062530517578, 97.9476318359375, -129.13540649414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000512.npy"}
|
|
{"epoch": 0.7739984882842026, "step": 513, "batch_size": 64, "mean": 74.24784851074219, "std": 102.83036804199219, "min": -202.87594604492188, "p10": -37.763645553588866, "median": 47.17817687988281, "p90": 208.55105743408205, "max": 373.1201171875, "pos_frac": 0.765625, "sample": [316.7572326660156, 162.8611297607422, 188.58139038085938, 129.0252227783203, 165.6680908203125, 56.89995193481445, 167.3289794921875, -2.467151641845703, -2.051013946533203, -14.138465881347656, 72.37532806396484, -93.58216857910156, 170.97201538085938, 134.96270751953125, 119.93276977539062, 118.48773956298828, -35.9754524230957, 11.209281921386719, 205.59854125976562, -3.9848403930664062, 28.776962280273438, 227.5618438720703, 134.17068481445312, 92.259521484375, 222.78692626953125, 6.163238525390625, -38.53001403808594, 117.86761474609375, 37.07752990722656, 107.64635467529297, 17.230628967285156, 98.8453369140625, 123.5400619506836, -202.87594604492188, 84.28963470458984, 14.768075942993164, -54.962974548339844, 183.77346801757812, 373.1201171875, 29.908008575439453, 2.1320457458496094, -1.9157543182373047, 242.60696411132812, 16.346405029296875, 154.12579345703125, 209.81642150878906, 230.64788818359375, 22.743385314941406, 162.45291137695312, 103.78530883789062, 47.125205993652344, -4.7220001220703125, 30.951499938964844, 46.25563049316406, -53.27374267578125, -70.63130187988281, 47.23114776611328, 22.488506317138672, -8.571430206298828, 24.05122947692871, 107.69667053222656, -64.21709442138672, 9.346176147460938, 3.511707305908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000513.npy"}
|
|
{"epoch": 0.7755102040816326, "step": 514, "batch_size": 64, "mean": 83.78589630126953, "std": 117.93494415283203, "min": -197.56700134277344, "p10": -62.07444038391111, "median": 97.64902114868164, "p90": 202.0567184448242, "max": 446.0457763671875, "pos_frac": 0.734375, "sample": [5.008085250854492, 143.93643188476562, 127.70236206054688, 122.34954833984375, 97.22550964355469, 77.76970672607422, 201.92039489746094, -34.4169921875, 68.65164184570312, -12.79727554321289, 24.06686782836914, 216.33786010742188, 138.45721435546875, 102.76022338867188, -11.949609756469727, 119.89546966552734, -150.92764282226562, 157.82664489746094, 12.211959838867188, 79.49144744873047, -197.56700134277344, -0.6350936889648438, 183.01612854003906, 156.97996520996094, 219.13571166992188, 14.252487182617188, 60.318756103515625, 172.58184814453125, 133.6764678955078, 243.60617065429688, -72.66744995117188, -80.2161636352539, 160.2036590576172, 202.11514282226562, -0.7139968872070312, 98.0725326538086, 149.96392822265625, 176.83053588867188, -17.863235473632812, 154.86485290527344, 66.22472381591797, -91.58717346191406, -37.357418060302734, 14.762603759765625, 186.02613830566406, 91.89107513427734, 180.15919494628906, -22.451461791992188, 20.856857299804688, 116.0017318725586, -14.821601867675781, 70.58038330078125, -19.982826232910156, 184.51385498046875, -196.05899047851562, 135.6143798828125, 230.3398895263672, 186.61737060546875, 281.5407409667969, 7.962278366088867, 446.0457763671875, 194.55816650390625, -111.15303039550781, 200.53953552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000514.npy"}
|
|
{"epoch": 0.7770219198790628, "step": 515, "batch_size": 64, "mean": 66.12013244628906, "std": 112.36283874511719, "min": -279.9820861816406, "p10": -75.49588088989256, "median": 69.07321548461914, "p90": 210.8037216186524, "max": 251.96482849121094, "pos_frac": 0.6875, "sample": [-12.638690948486328, 134.76422119140625, -11.173454284667969, 35.871620178222656, 251.96482849121094, 178.48724365234375, 8.034481048583984, 192.58241271972656, 8.868118286132812, 216.03280639648438, 68.49274444580078, 38.02021026611328, 107.03733825683594, -9.368371963500977, 142.4076385498047, 101.50811767578125, -104.9447021484375, 222.6848602294922, -99.16634368896484, -84.68341064453125, 98.77579498291016, 5.906684875488281, -12.74542236328125, 58.69874572753906, -279.9820861816406, 148.2129364013672, 198.12063598632812, 69.6536865234375, -26.38689422607422, -61.88225555419922, 167.69937133789062, 168.53627014160156, -2.3702964782714844, -0.7986049652099609, 45.5703125, 179.71908569335938, -130.2364501953125, 180.6698455810547, -21.969640731811523, 145.2417449951172, -81.33029174804688, 114.37071228027344, 239.60218811035156, -161.43618774414062, 96.20402526855469, 226.3041229248047, 28.264894485473633, -21.785802841186523, 71.8800277709961, 198.60252380371094, 20.56700897216797, 153.18109130859375, 116.82278442382812, 197.6334991455078, 238.70875549316406, -47.624942779541016, 142.02487182617188, 3.7024574279785156, 220.19627380371094, -49.86646270751953, 97.59561920166016, 111.4879150390625, -2.766817092895508, 4.135124206542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000515.npy"}
|
|
{"epoch": 0.7785336356764928, "step": 516, "batch_size": 64, "mean": 62.41304016113281, "std": 115.4272232055664, "min": -243.30691528320312, "p10": -69.98915634155273, "median": 63.10068130493164, "p90": 186.52190093994142, "max": 247.94325256347656, "pos_frac": 0.75, "sample": [142.17672729492188, 212.85948181152344, 160.63385009765625, -108.08094787597656, -69.21380615234375, 5.721012115478516, -70.32144927978516, 153.80494689941406, 182.36190795898438, 145.03260803222656, 61.477760314941406, 32.184295654296875, 237.476318359375, 240.9408721923828, 4.672496795654297, 2.9353199005126953, 104.24429321289062, 183.3135986328125, 247.94325256347656, 176.18382263183594, -65.17357635498047, -207.2075958251953, 84.53645324707031, 103.52715301513672, 159.36434936523438, 64.72360229492188, 186.67245483398438, 8.331329345703125, 182.4198455810547, 147.35018920898438, -243.30691528320312, 23.981735229492188, 0.9233493804931641, 3.6337966918945312, -19.741798400878906, 173.53231811523438, 181.50811767578125, -8.59687614440918, 3.3248252868652344, 15.783123016357422, 165.97982788085938, -225.262451171875, 146.01486206054688, 204.23171997070312, 45.219970703125, 61.39288330078125, 186.1706085205078, 44.0033073425293, 83.24491882324219, 130.99046325683594, 5.545555114746094, 156.87197875976562, 17.8494873046875, -63.805885314941406, -159.35806274414062, 130.5906524658203, -44.680477142333984, 104.19560241699219, -9.150718688964844, -0.19321823120117188, 69.48313903808594, -3.1167335510253906, -95.47008514404297, 201.75511169433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000516.npy"}
|
|
{"epoch": 0.780045351473923, "step": 517, "batch_size": 64, "mean": 47.63670349121094, "std": 108.44947052001953, "min": -198.46343994140625, "p10": -94.68219070434569, "median": 28.18082618713379, "p90": 200.16496582031252, "max": 321.6120300292969, "pos_frac": 0.65625, "sample": [20.980953216552734, -39.89146423339844, 2.110515594482422, 116.3703384399414, -39.850196838378906, 20.039447784423828, -70.39634704589844, 112.85134887695312, 233.64678955078125, 46.113956451416016, -0.5733184814453125, -12.107772827148438, -78.91854858398438, -13.629020690917969, 6.7869415283203125, -59.36644744873047, -6.877374649047852, -13.110801696777344, 35.70817565917969, 22.790542602539062, 121.39720153808594, -137.6256103515625, 321.6120300292969, 162.71646118164062, -198.46343994140625, 62.54615020751953, -17.578338623046875, 103.4196548461914, 78.94705200195312, 173.38079833984375, 92.84932708740234, 169.42286682128906, 4.288423538208008, 96.9142837524414, -103.01095581054688, -81.64630889892578, 31.605667114257812, 204.48960876464844, -45.254150390625, -108.99553680419922, 66.6874771118164, 207.28048706054688, 274.02142333984375, 51.331851959228516, 121.07577514648438, 173.1260223388672, 6.303783416748047, 82.19430541992188, -119.5033950805664, -27.763694763183594, -112.94608306884766, 103.35090637207031, 23.621292114257812, 182.44680786132812, 231.29400634765625, 203.07431030273438, 64.23780822753906, -12.268413543701172, 21.479652404785156, 26.284671783447266, 193.37649536132812, -100.26899719238281, 146.54254150390625, 30.076980590820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000517.npy"}
|
|
{"epoch": 0.781557067271353, "step": 518, "batch_size": 64, "mean": 68.7750015258789, "std": 112.75688934326172, "min": -184.2701873779297, "p10": -63.49792861938476, "median": 35.2451286315918, "p90": 217.809684753418, "max": 351.01837158203125, "pos_frac": 0.703125, "sample": [116.66000366210938, -2.084136962890625, -3.7726211547851562, 214.1798553466797, -33.030120849609375, 10.845695495605469, 33.782981872558594, 103.60829162597656, -49.5955924987793, 36.707275390625, 111.262939453125, -71.6273193359375, 102.30512237548828, 107.10032653808594, 106.82952117919922, -109.38066101074219, 54.34727478027344, 125.73230743408203, -96.67916107177734, 197.98313903808594, 339.1708984375, -184.2701873779297, 1.3456649780273438, 139.8843231201172, 154.34222412109375, 189.97230529785156, -70.78129577636719, 164.00030517578125, -71.53952026367188, 148.85958862304688, -3.0552024841308594, -40.96930694580078, 5.130674362182617, 119.92643737792969, 30.42279052734375, 2.804046630859375, 2.570636749267578, 1.12713623046875, 89.65898132324219, 301.98455810546875, 20.673416137695312, 48.509925842285156, -0.1863231658935547, 219.36532592773438, 131.09120178222656, 351.01837158203125, -67.3818359375, 179.71353149414062, 261.08514404296875, -54.43547821044922, -12.794412612915039, -18.857629776000977, 235.73623657226562, 0.520416259765625, 31.38507843017578, 182.94851684570312, 169.21759033203125, 54.46769714355469, -4.936861038208008, -4.9342498779296875, 5.481201171875, 8.761083602905273, 260.2351379394531, 129.15701293945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000518.npy"}
|
|
{"epoch": 0.783068783068783, "step": 519, "batch_size": 64, "mean": 40.31169891357422, "std": 135.5418243408203, "min": -359.118896484375, "p10": -127.91051712036132, "median": 42.43497848510742, "p90": 200.63755645751954, "max": 252.3878173828125, "pos_frac": 0.609375, "sample": [-164.03793334960938, 172.88331604003906, 140.64617919921875, -206.1265106201172, -53.864322662353516, 200.02256774902344, -10.423053741455078, 252.3878173828125, 16.318069458007812, 162.56051635742188, 67.74658203125, 18.344194412231445, 215.366455078125, -62.25426483154297, 83.22449493408203, 113.88963317871094, 200.01055908203125, 22.14371681213379, -187.08029174804688, -57.88604736328125, 194.06744384765625, 191.56031799316406, -178.9630584716797, 243.5664825439453, 192.08274841308594, 200.901123046875, -96.38545227050781, -10.038673400878906, 57.74635314941406, 117.57254028320312, -67.76776123046875, 178.44937133789062, 46.03947448730469, -74.7869873046875, 96.25917053222656, -60.322662353515625, -78.67640686035156, 3.853740692138672, 236.1343231201172, -113.3641357421875, 103.60498046875, 8.485504150390625, 185.86721801757812, 68.45125579833984, 38.830482482910156, -359.118896484375, 81.23085021972656, -33.52037048339844, 50.358489990234375, -128.93386840820312, 191.33102416992188, -72.01595306396484, -141.4307098388672, 243.13156127929688, 75.81990051269531, 27.785179138183594, 232.45616149902344, 116.87264251708984, -3.486713409423828, -125.52269744873047, -61.322654724121094, -19.74784278869629, 198.9337158203125, -99.91014099121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000519.npy"}
|
|
{"epoch": 0.7845804988662132, "step": 520, "batch_size": 64, "mean": 59.882991790771484, "std": 107.39602661132812, "min": -180.3370819091797, "p10": -57.47820510864258, "median": 46.11784362792969, "p90": 189.26777191162108, "max": 385.8900146484375, "pos_frac": 0.71875, "sample": [171.10182189941406, -12.639001846313477, -3.5682716369628906, -21.12984848022461, 189.33360290527344, 45.385009765625, 79.18046569824219, 9.466941833496094, -6.169139862060547, 385.8900146484375, 144.52365112304688, 5.672113418579102, -25.947097778320312, -56.054359436035156, 112.43319702148438, 82.38858795166016, 34.45887756347656, -58.08842468261719, 14.124706268310547, 187.6566619873047, 300.9987487792969, 96.14903259277344, 33.44194793701172, 213.2545623779297, 96.1654052734375, 52.496665954589844, 26.294357299804688, -101.24763488769531, -120.56937408447266, 155.4061279296875, 58.57005310058594, -4.098869323730469, 2.8881759643554688, 21.747352600097656, 5.7463836669921875, -1.1992683410644531, 200.1599578857422, 125.4515609741211, 33.314056396484375, -121.73078155517578, -180.3370819091797, 46.55836486816406, 58.070770263671875, -99.89237976074219, 86.18017578125, 84.41575622558594, 45.96788024902344, 150.23460388183594, 134.51849365234375, 46.26780700683594, 64.81666564941406, 206.4329376220703, 179.56288146972656, 72.5676498413086, 278.04241943359375, 34.595130920410156, 84.22219848632812, 4.770439147949219, -15.221126556396484, -0.6194610595703125, -7.512153625488281, 186.69261169433594, 189.11416625976562, -168.19532775878906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000520.npy"}
|
|
{"epoch": 0.7860922146636432, "step": 521, "batch_size": 64, "mean": 87.26436614990234, "std": 114.36663055419922, "min": -212.64266967773438, "p10": -21.768653106689452, "median": 83.08900833129883, "p90": 218.6067077636719, "max": 436.2671813964844, "pos_frac": 0.71875, "sample": [187.35147094726562, 78.3004150390625, 185.27728271484375, 79.16778564453125, 182.2332305908203, 126.2145767211914, 184.8033447265625, -7.8066864013671875, -6.3513031005859375, 34.630126953125, 61.973419189453125, -31.677989959716797, 203.5904541015625, 71.29828643798828, 169.95745849609375, 98.21451568603516, 11.60417366027832, 25.7408447265625, -10.195266723632812, 187.58383178710938, -12.615913391113281, 208.32504272460938, 53.633209228515625, 16.674346923828125, -11.50663948059082, 145.25555419921875, -109.44383239746094, 196.7861328125, -20.528533935546875, 184.99447631835938, 251.3431854248047, 436.2671813964844, -124.9421615600586, 23.929752349853516, -14.524482727050781, -22.300132751464844, -13.315437316894531, 107.80661010742188, -0.1836700439453125, 255.87802124023438, 266.32965087890625, 17.81441879272461, -101.25202178955078, 111.8138198852539, -2.9000682830810547, 27.92043113708496, 168.54100036621094, 103.99937438964844, 6.597698211669922, 127.97311401367188, 170.58358764648438, 115.8374252319336, -212.64266967773438, 298.0010681152344, 212.24932861328125, 134.81553649902344, 64.18287658691406, -49.221343994140625, 92.82844543457031, -18.424034118652344, 130.4404296875, 87.0102310180664, 221.331298828125, 227.6473388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000521.npy"}
|
|
{"epoch": 0.7876039304610734, "step": 522, "batch_size": 64, "mean": 52.237709045410156, "std": 131.67739868164062, "min": -304.6348571777344, "p10": -94.05048599243163, "median": 43.3870964050293, "p90": 217.50940704345703, "max": 308.9533386230469, "pos_frac": 0.640625, "sample": [-68.52639770507812, 78.61145782470703, 5.711275100708008, 84.13603973388672, 123.35124969482422, 197.05223083496094, 74.49210357666016, 248.41912841796875, 200.65789794921875, 61.600547790527344, -173.1116180419922, -138.18539428710938, 88.04376220703125, 223.45626831054688, 40.55686950683594, 172.77725219726562, -59.37798309326172, 78.18388366699219, 168.6289520263672, 14.855558395385742, 217.9215545654297, -3.7411766052246094, 72.09687042236328, -271.1129150390625, 308.9533386230469, 1.9884109497070312, -1.8460655212402344, -92.38396453857422, -304.6348571777344, 32.47856140136719, -24.876144409179688, 202.44790649414062, -51.521392822265625, 234.9370574951172, 162.4564666748047, 80.34855651855469, -10.944580078125, 74.12315368652344, 8.826480865478516, 46.217323303222656, 38.26976013183594, -14.956809997558594, 8.745399475097656, -47.52008819580078, 125.96829223632812, -0.6510772705078125, 123.15780639648438, -204.18943786621094, 216.5477294921875, -70.04856872558594, -30.19133758544922, -155.38099670410156, 183.93414306640625, 306.38970947265625, -15.804367065429688, 21.631675720214844, -94.76470947265625, 271.31793212890625, 156.32350158691406, 134.79843139648438, -0.40169334411621094, -21.259662628173828, 103.76545715332031, 204.4647216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000522.npy"}
|
|
{"epoch": 0.7891156462585034, "step": 523, "batch_size": 64, "mean": 61.7940673828125, "std": 117.31547546386719, "min": -327.47589111328125, "p10": -80.73647994995116, "median": 71.85765838623047, "p90": 191.20109252929691, "max": 270.3287353515625, "pos_frac": 0.71875, "sample": [52.548282623291016, 225.82891845703125, -7.006374359130859, -35.92626190185547, 131.43804931640625, 98.6113510131836, 116.60792541503906, 41.31175994873047, 104.95184326171875, 29.198760986328125, -0.21723365783691406, 109.36589050292969, 236.3916015625, -133.73330688476562, 46.688331604003906, 70.974853515625, 106.45672607421875, -13.006338119506836, 60.170684814453125, -162.36505126953125, 13.7640380859375, 194.16082763671875, -61.37141418457031, 60.44781494140625, -73.521484375, 170.68484497070312, -59.76496124267578, 31.246028900146484, 7.572944641113281, 174.20880126953125, 25.654111862182617, 139.1219024658203, 158.14944458007812, 131.82913208007812, 166.8502960205078, 184.2950439453125, 181.62059020996094, 72.74046325683594, 20.07811737060547, -327.47589111328125, 89.37094116210938, -173.81939697265625, 241.67822265625, 114.08049774169922, 182.64376831054688, -39.899024963378906, -8.291484832763672, 158.32952880859375, 88.79557800292969, -83.82862091064453, 244.6327667236328, 43.67214584350586, 129.27691650390625, -121.90636444091797, 166.25732421875, 201.9005126953125, 108.94965362548828, 270.3287353515625, -12.336280822753906, -172.1111297607422, -47.193519592285156, 94.85116577148438, 40.34368896484375, 150.51376342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000523.npy"}
|
|
{"epoch": 0.7906273620559335, "step": 524, "batch_size": 64, "mean": 66.46137237548828, "std": 102.31000518798828, "min": -193.60653686523438, "p10": -50.28665466308593, "median": 56.727638244628906, "p90": 189.08448028564453, "max": 338.2618408203125, "pos_frac": 0.78125, "sample": [180.1604766845703, 43.554473876953125, 253.18106079101562, 68.946533203125, -25.6612548828125, -5.820869445800781, -71.0337142944336, -45.811614990234375, 99.10066223144531, 93.53396606445312, 226.27362060546875, 1.2425537109375, -71.07431030273438, 66.94685363769531, 67.81101989746094, 231.74871826171875, -191.16567993164062, -52.20452880859375, 27.69076156616211, 189.72555541992188, 178.96981811523438, -193.60653686523438, 187.58863830566406, -1.5945358276367188, 23.057586669921875, 8.514572143554688, 179.11062622070312, 102.1979751586914, 123.1269760131836, 16.306915283203125, -12.330291748046875, 116.70101165771484, 42.45399475097656, 88.58617401123047, 147.21826171875, 34.835140228271484, 20.405920028686523, 150.74453735351562, 38.60484313964844, -80.85255432128906, 66.05154418945312, 7.106719970703125, 8.054801940917969, 338.2618408203125, 41.48429870605469, 158.142578125, 186.7220001220703, 6.414127349853516, 47.40373229980469, 67.38754272460938, 120.89067840576172, 47.154090881347656, 45.014671325683594, 214.15794372558594, 75.83174133300781, 144.31423950195312, 192.281494140625, 121.57279205322266, -144.37603759765625, -11.777046203613281, 165.07522583007812, -2.4188461303710938, 92.02662658691406, 9.567955017089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000524.npy"}
|
|
{"epoch": 0.7921390778533636, "step": 525, "batch_size": 64, "mean": 53.06797790527344, "std": 118.82408905029297, "min": -259.8758850097656, "p10": -77.3021255493164, "median": 29.928451538085938, "p90": 202.10832672119145, "max": 361.1361083984375, "pos_frac": 0.625, "sample": [-26.687896728515625, -208.5024871826172, 2.21319580078125, -31.088863372802734, -117.28069305419922, 45.61872863769531, 13.032310485839844, 68.70452880859375, 261.3481140136719, -8.803153991699219, 145.9413299560547, -19.444747924804688, 160.58941650390625, 29.231048583984375, 141.61463928222656, 205.59451293945312, 110.28843688964844, -19.945945739746094, 229.59312438964844, -82.50897216796875, 67.64432525634766, -0.5037269592285156, 234.33212280273438, 182.40907287597656, -26.065284729003906, 111.94265747070312, -24.635406494140625, 100.05245971679688, 36.701820373535156, 112.24324035644531, 22.050888061523438, 234.5645751953125, 21.427200317382812, 116.19436645507812, 93.72776794433594, -33.46028137207031, 47.24343490600586, -10.933502197265625, 361.1361083984375, -18.956071853637695, -65.15281677246094, 66.52124786376953, 1.1063957214355469, -259.8758850097656, -26.716278076171875, 193.97389221191406, -1.4806098937988281, 20.492225646972656, 30.6258544921875, 145.94337463378906, -101.16109466552734, 192.12353515625, 3.3993911743164062, -32.81108093261719, 149.88491821289062, 317.0597229003906, -142.78805541992188, 73.09848022460938, 129.49151611328125, 143.5481719970703, -19.377578735351562, 186.22438049316406, -3.113157272338867, -131.2884979248047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000525.npy"}
|
|
{"epoch": 0.7936507936507936, "step": 526, "batch_size": 64, "mean": 48.03594207763672, "std": 108.19004821777344, "min": -191.18692016601562, "p10": -80.41678619384764, "median": 40.25423049926758, "p90": 182.13181152343753, "max": 328.3854064941406, "pos_frac": 0.734375, "sample": [230.85865783691406, 8.238262176513672, -35.89167022705078, 5.977293014526367, -96.45723724365234, 9.948295593261719, -1.5152244567871094, 300.2425537109375, -180.7282257080078, -152.50477600097656, 26.261749267578125, 86.52496337890625, -169.55853271484375, 19.367767333984375, 184.3013916015625, -35.162864685058594, 64.17308044433594, 157.89170837402344, 10.137580871582031, 147.72430419921875, 91.4613037109375, 219.66357421875, 40.86207580566406, 101.42825317382812, 195.67251586914062, 6.910984039306641, 81.72528839111328, 145.52374267578125, -83.98503112792969, 174.88003540039062, -4.389894485473633, 68.14712524414062, 49.74388885498047, -23.690576553344727, 177.0694580078125, 3.238828659057617, 107.78060150146484, 28.330055236816406, 28.893585205078125, 39.646385192871094, 82.47896575927734, 31.971607208251953, -36.05577087402344, 141.56564331054688, 69.14739990234375, -184.51901245117188, 0.7858924865722656, 155.23025512695312, 118.1356201171875, -42.743927001953125, 96.65875244140625, -16.578105926513672, 328.3854064941406, 37.44746780395508, 193.05191040039062, 5.0688629150390625, -72.09088134765625, 58.99357986450195, 64.4029769897461, 70.98347473144531, 90.26124572753906, -46.141082763671875, 90.30581665039062, -191.18692016601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000526.npy"}
|
|
{"epoch": 0.7951625094482238, "step": 527, "batch_size": 64, "mean": 43.136287689208984, "std": 114.24382019042969, "min": -226.97445678710938, "p10": -92.07853164672852, "median": 19.361989974975586, "p90": 203.64987030029297, "max": 294.19354248046875, "pos_frac": 0.625, "sample": [-0.05768013000488281, 16.199600219726562, 31.517723083496094, 120.784423828125, 28.112808227539062, 2.8984603881835938, 240.56857299804688, 84.43331909179688, 183.9624786376953, 294.19354248046875, 181.57164001464844, -20.658660888671875, 207.09835815429688, 18.910804748535156, 52.032623291015625, -91.95842742919922, 235.65066528320312, -0.16179466247558594, 107.89144897460938, -92.1300048828125, -136.34368896484375, -73.11258697509766, -29.192092895507812, -119.40104675292969, 71.49742126464844, 11.24920654296875, 263.97686767578125, 94.24378967285156, 17.607067108154297, 14.9166259765625, 102.56550598144531, 18.98511505126953, -64.27841186523438, 57.690670013427734, 30.08816909790039, 204.43629455566406, 44.487327575683594, -25.1329345703125, -14.891508102416992, 190.2410888671875, 71.64575958251953, 19.73886489868164, 185.33517456054688, 1.9237785339355469, -37.806304931640625, -19.786712646484375, -155.76177978515625, 191.3733367919922, -3.7972564697265625, -2.595804214477539, -7.56732177734375, -0.2924213409423828, 201.81488037109375, 143.50082397460938, -226.97445678710938, -16.83502960205078, 104.53244018554688, -24.46263885498047, 96.59400939941406, 224.99636840820312, -161.74835205078125, -205.08851623535156, 96.65585327148438, 24.835044860839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000527.npy"}
|
|
{"epoch": 0.7966742252456538, "step": 528, "batch_size": 64, "mean": 49.24766540527344, "std": 126.35791015625, "min": -266.1196594238281, "p10": -73.92042236328125, "median": 24.748342514038086, "p90": 216.35011749267582, "max": 332.38079833984375, "pos_frac": 0.71875, "sample": [123.86396789550781, -266.1196594238281, 203.77606201171875, -14.825929641723633, 47.37572479248047, 145.23931884765625, 77.4169921875, 4.0341339111328125, 9.319602966308594, -8.701126098632812, -8.305206298828125, -49.17828369140625, 219.9146270751953, -186.5665283203125, 6.123605728149414, -3.1599960327148438, 182.5003662109375, 13.211812973022461, 16.525510787963867, -69.16902160644531, -75.46793365478516, 325.51025390625, -0.7844924926757812, -249.34408569335938, -7.643943786621094, 38.925933837890625, -70.30956268310547, -8.369621276855469, 17.14539909362793, 177.70013427734375, 68.88139343261719, 31.778549194335938, 38.06978988647461, 238.9195556640625, 131.83692932128906, 18.71016502380371, 237.6887969970703, 7.958641052246094, 107.53312683105469, 121.96347045898438, 208.03292846679688, 225.00706481933594, 28.287961959838867, 21.208723068237305, -186.58775329589844, 74.99900817871094, 12.830799102783203, 17.863677978515625, 171.63525390625, 0.19598388671875, 59.89579772949219, -170.04354858398438, -193.3190155029297, 332.38079833984375, 293.8319396972656, 56.540653228759766, 5.737644195556641, -9.245475769042969, 180.13528442382812, 8.094842910766602, 153.86334228515625, 61.50041198730469, 130.58322143554688, 74.44252014160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000528.npy"}
|
|
{"epoch": 0.7981859410430839, "step": 529, "batch_size": 64, "mean": 97.94773864746094, "std": 104.52436065673828, "min": -166.07073974609375, "p10": -13.553533172607416, "median": 99.01433181762695, "p90": 216.61251831054688, "max": 296.9750061035156, "pos_frac": 0.828125, "sample": [124.61650085449219, -80.80491638183594, 87.44932556152344, -7.943626403808594, 49.60420608520508, -4.8744964599609375, 296.9750061035156, 206.60971069335938, -74.73443603515625, 20.60772705078125, 15.785070419311523, 0.33391380310058594, 14.646728515625, 105.256591796875, 175.05653381347656, 143.92971801757812, -91.31572723388672, 195.14991760253906, -5.10491943359375, 168.52047729492188, 217.91009521484375, -15.957778930664062, -28.753509521484375, 210.49008178710938, 92.7720718383789, 186.33380126953125, 21.011817932128906, 15.405113220214844, 189.84800720214844, 49.179443359375, 238.67767333984375, 178.99752807617188, 115.76008605957031, 191.8800048828125, 122.39335632324219, 115.71024322509766, 191.9910888671875, 207.91282653808594, 47.356258392333984, 240.46426391601562, 221.2825927734375, 194.68821716308594, 257.22271728515625, 31.166458129882812, 163.5352783203125, 190.8577880859375, 70.62553405761719, -2.6792335510253906, 129.79852294921875, 37.256103515625, 37.315452575683594, 78.2434310913086, 42.02912139892578, 209.3220672607422, -144.99720764160156, 213.5848388671875, 84.75423431396484, 205.42347717285156, 134.449462890625, 83.97798156738281, -166.07073974609375, 6.196754455566406, 15.075546264648438, 246.4510498046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000529.npy"}
|
|
{"epoch": 0.799697656840514, "step": 530, "batch_size": 64, "mean": 61.64900207519531, "std": 107.83966827392578, "min": -204.822509765625, "p10": -35.082127761840816, "median": 39.14255142211914, "p90": 193.022314453125, "max": 353.4507141113281, "pos_frac": 0.703125, "sample": [335.60723876953125, 62.390201568603516, 19.960847854614258, -18.472850799560547, 6.180946350097656, -10.93603515625, 293.2364501953125, -12.816200256347656, 162.98316955566406, 156.09974670410156, 16.541156768798828, 227.48910522460938, -9.954401016235352, 40.628265380859375, 124.81758117675781, 61.99303436279297, 76.89134216308594, 102.26496887207031, 152.47230529785156, 37.656837463378906, 1.7361946105957031, 19.336807250976562, -204.822509765625, 169.138427734375, 146.2371826171875, -0.5356502532958984, 114.16535949707031, 61.03437423706055, 18.95752716064453, 193.20135498046875, 13.553611755371094, 25.185020446777344, 249.5211181640625, 353.4507141113281, 65.50630187988281, 60.583473205566406, 53.708560943603516, 95.20726776123047, -66.22557067871094, 11.812732696533203, -37.06398010253906, 109.75602722167969, -24.02391815185547, 65.54672241210938, -100.19075775146484, -24.97289276123047, 185.69015502929688, -23.366607666015625, -171.439208984375, -75.10214233398438, -10.334863662719727, -60.49993896484375, 192.60455322265625, -30.457805633544922, 206.5870819091797, 92.89103698730469, 145.1623077392578, -27.303939819335938, -13.909255981445312, 25.107559204101562, 186.2675323486328, 95.22871398925781, 1.1284446716308594, 32.44537353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000530.npy"}
|
|
{"epoch": 0.8012093726379441, "step": 531, "batch_size": 64, "mean": 65.27767944335938, "std": 109.45316314697266, "min": -218.82749938964844, "p10": -49.87463455200195, "median": 40.8701171875, "p90": 209.33787078857426, "max": 323.4396057128906, "pos_frac": 0.703125, "sample": [-47.34510803222656, 133.12330627441406, 261.1688232421875, 142.78492736816406, 35.64430236816406, 102.36215209960938, 2.6581039428710938, 323.4396057128906, 66.09312438964844, -16.563594818115234, 47.698875427246094, -0.8174152374267578, 284.5290222167969, 5.1430206298828125, 150.104736328125, 173.86669921875, -152.47125244140625, -50.958717346191406, 27.334510803222656, 199.86746215820312, 115.96363830566406, 250.6304168701172, -77.37862396240234, -39.277244567871094, -46.802337646484375, 13.202417373657227, 97.23113250732422, 78.48648071289062, 23.28270721435547, 0.675018310546875, 65.65155792236328, -14.906618118286133, -5.97541618347168, 46.09593200683594, -218.82749938964844, -6.695125579833984, 131.628662109375, 157.44351196289062, 0.17803955078125, -21.364181518554688, 15.77191162109375, 104.04415130615234, 193.2933807373047, 195.43719482421875, -93.39753723144531, -98.10863494873047, 190.9266815185547, 201.6258544921875, 111.63600158691406, 34.291473388671875, -23.05553436279297, 34.345027923583984, 83.28012084960938, 212.6430206298828, 106.39639282226562, -22.195423126220703, 113.27328491210938, -60.44428634643555, 191.21929931640625, 218.13217163085938, -3.9374771118164062, 1.357595443725586, 212.79696655273438, 21.534881591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000531.npy"}
|
|
{"epoch": 0.8027210884353742, "step": 532, "batch_size": 64, "mean": 70.3089599609375, "std": 111.254150390625, "min": -178.459716796875, "p10": -81.2648223876953, "median": 61.062204360961914, "p90": 212.46119842529296, "max": 270.10406494140625, "pos_frac": 0.71875, "sample": [123.85214233398438, 66.78837585449219, 229.61788940429688, 204.452392578125, -38.88153076171875, -35.47228240966797, 163.34677124023438, 197.5562744140625, -4.157520294189453, 181.29042053222656, -89.8074722290039, 154.25914001464844, 154.9710235595703, 51.963470458984375, 61.50822448730469, 37.5540771484375, 60.61618423461914, 113.50711059570312, 1.7620372772216797, -129.7183837890625, -45.24017333984375, 11.174280166625977, 194.2974853515625, 59.03388977050781, -18.729461669921875, 73.47987365722656, 120.59005737304688, 270.10406494140625, 134.04135131835938, 254.16403198242188, 199.06715393066406, 16.602108001708984, -78.41592407226562, 212.935302734375, 31.428817749023438, 90.96888732910156, 158.56167602539062, -82.48577880859375, 31.868804931640625, 27.07567024230957, -127.40692138671875, -22.139511108398438, 32.618831634521484, -40.09318161010742, 212.92922973632812, 50.371604919433594, 188.77536010742188, -73.86975860595703, 50.66603088378906, 211.36912536621094, 171.62384033203125, 107.95233917236328, -25.93970489501953, 215.1013641357422, -178.459716796875, 102.67900085449219, -134.0414581298828, 169.94711303710938, 188.05715942382812, -102.49646759033203, 5.291345596313477, -11.126771926879883, 123.46620178222656, 218.96817016601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000532.npy"}
|
|
{"epoch": 0.8042328042328042, "step": 533, "batch_size": 64, "mean": 73.95171356201172, "std": 109.42864227294922, "min": -192.24742126464844, "p10": -46.32496681213379, "median": 75.58877944946289, "p90": 197.82415924072265, "max": 354.39801025390625, "pos_frac": 0.703125, "sample": [89.4495620727539, 187.12644958496094, 26.078773498535156, 28.370079040527344, 121.3963623046875, 83.67957305908203, 20.583709716796875, -52.02949905395508, 190.06214904785156, 143.06912231445312, -90.07980346679688, -3.7124595642089844, -6.03948974609375, -0.8327617645263672, 103.07647705078125, 354.39801025390625, 13.810983657836914, 132.944580078125, -80.07168579101562, -4.440818786621094, 13.367683410644531, -3.6773128509521484, 130.14141845703125, 12.88836669921875, -41.710060119628906, 142.26416015625, 173.4295196533203, 74.17742919921875, -2.2025909423828125, -179.26348876953125, -40.45527648925781, -0.006046295166015625, 191.85232543945312, 155.916015625, -2.865968704223633, 249.25169372558594, 89.40643310546875, 303.2554016113281, 42.491973876953125, -48.30278396606445, 25.872604370117188, 172.24337768554688, 145.7250213623047, 149.98226928710938, 206.4178466796875, 198.65158081054688, 89.27488708496094, -95.1512451171875, 9.240829467773438, -0.57257080078125, 11.017316818237305, 190.34364318847656, 166.28488159179688, -35.75794982910156, 0.27446937561035156, 77.00012969970703, 195.8935089111328, 148.59579467773438, 24.675071716308594, 193.67611694335938, 202.03695678710938, 235.67689514160156, -192.24742126464844, 96.95753479003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000533.npy"}
|
|
{"epoch": 0.8057445200302343, "step": 534, "batch_size": 64, "mean": 63.71331787109375, "std": 120.54720306396484, "min": -221.6282958984375, "p10": -83.79522247314453, "median": 55.31227111816406, "p90": 212.07357177734377, "max": 358.85528564453125, "pos_frac": 0.65625, "sample": [56.009361267089844, 5.938871383666992, 227.17886352539062, 21.924617767333984, -31.71345329284668, 76.71012115478516, 2.2442474365234375, -221.6282958984375, 123.6048583984375, -98.76756286621094, -9.307222366333008, 47.29278564453125, 195.44691467285156, 175.48648071289062, -112.00846099853516, 358.85528564453125, 129.06402587890625, 30.17636489868164, 57.91755676269531, -30.96014404296875, 148.57171630859375, 277.19659423828125, 125.00608825683594, 78.14248657226562, -13.080154418945312, -203.10726928710938, -166.19549560546875, -7.319662094116211, 231.60012817382812, 10.124557495117188, 132.55075073242188, -84.70970153808594, 232.67623901367188, 194.95550537109375, 201.96006774902344, 104.60409545898438, -93.17239379882812, 54.61518096923828, 295.99334716796875, 194.68441772460938, 78.27311706542969, 208.04519653320312, -52.28260040283203, -6.575225830078125, 188.3730926513672, 127.638916015625, -4.6107635498046875, 118.35432434082031, 25.800819396972656, -29.788223266601562, -9.37396240234375, 213.80001831054688, 106.7355728149414, -12.551776885986328, -32.04925537109375, 16.180511474609375, 168.065185546875, -81.66143798828125, 108.91929626464844, -45.03150939941406, 62.84473419189453, 40.50202178955078, 191.4408416748047, -21.95834732055664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000534.npy"}
|
|
{"epoch": 0.8072562358276644, "step": 535, "batch_size": 64, "mean": 58.104820251464844, "std": 113.05477142333984, "min": -211.6009063720703, "p10": -55.48249893188476, "median": 38.40397262573242, "p90": 211.77032775878914, "max": 281.38482666015625, "pos_frac": 0.640625, "sample": [38.9359130859375, -55.16971206665039, 137.7088165283203, 48.4461784362793, 22.83263397216797, 30.68020248413086, -197.13180541992188, -55.61655044555664, -120.50130462646484, -9.8812255859375, 192.20538330078125, -126.79400634765625, 46.810264587402344, 162.365234375, 4.190269470214844, -10.848625183105469, -15.7789306640625, 63.325477600097656, -0.9849071502685547, -30.079025268554688, 32.64668273925781, 86.75135803222656, 180.57278442382812, -86.32186889648438, 163.40980529785156, 26.353370666503906, 226.51480102539062, -136.87521362304688, -53.56671905517578, 2.800050735473633, 145.59568786621094, 230.75918579101562, 110.77349853515625, -3.7992477416992188, 101.59262084960938, 185.9822540283203, 37.872032165527344, 281.38482666015625, -3.39556884765625, 147.49703979492188, 48.39264678955078, -3.3267669677734375, 272.4271240234375, 182.92123413085938, -21.221115112304688, -211.6009063720703, 11.506103515625, 159.00552368164062, -14.86307144165039, 85.6503677368164, 100.91863250732422, -9.014427185058594, 43.0181999206543, 220.15530395507812, 5.143013000488281, 180.9008331298828, -7.34515380859375, 154.61648559570312, 181.58303833007812, -53.5772705078125, 117.02177429199219, -22.84722137451172, 268.2098083496094, 229.7728271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000535.npy"}
|
|
{"epoch": 0.8087679516250945, "step": 536, "batch_size": 64, "mean": 91.31442260742188, "std": 100.74017333984375, "min": -89.65083312988281, "p10": -18.282357978820794, "median": 73.94134140014648, "p90": 227.5754486083985, "max": 314.8973083496094, "pos_frac": 0.8125, "sample": [81.56379699707031, 29.342796325683594, -46.39923858642578, 196.0395965576172, 202.97573852539062, 28.5780029296875, 20.900283813476562, -33.33576965332031, 122.79379272460938, 9.913553237915039, -80.92729187011719, 148.438232421875, 107.9807357788086, 237.57159423828125, 181.53587341308594, 157.57485961914062, 119.3134765625, 203.8348388671875, 295.01904296875, 68.74665069580078, 3.9735851287841797, 7.311433792114258, 201.74130249023438, 211.015869140625, 47.02369689941406, 217.17181396484375, 180.84744262695312, 6.34831428527832, 101.5586166381836, 140.57142639160156, 232.03414916992188, -0.8489265441894531, 3.758808135986328, 98.56822204589844, -27.434722900390625, -30.64373779296875, 204.0155487060547, 7.4534759521484375, 182.09828186035156, 2.728322982788086, 236.8905029296875, 215.93026733398438, 31.566518783569336, -4.194429397583008, 17.334049224853516, 9.761627197265625, 12.329750061035156, 65.75435638427734, 82.53501892089844, 314.8973083496094, -3.0766963958740234, 189.7012481689453, 79.13603210449219, 247.42697143554688, 96.42024230957031, 56.31858825683594, 19.295204162597656, 269.3306884765625, -11.825124740600586, -89.65083312988281, 38.67833709716797, 152.0807342529297, -21.04974365234375, -2.2213592529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000536.npy"}
|
|
{"epoch": 0.8102796674225246, "step": 537, "batch_size": 64, "mean": 75.2706069946289, "std": 119.19561767578125, "min": -161.2834930419922, "p10": -73.71214523315427, "median": 68.46341323852539, "p90": 209.22779235839846, "max": 354.333740234375, "pos_frac": 0.671875, "sample": [160.8641357421875, 102.1218490600586, -13.110931396484375, 175.80841064453125, 276.77349853515625, 193.38812255859375, 122.482421875, -0.09034538269042969, 186.8604736328125, 83.07154846191406, 102.93330383300781, -6.959333419799805, 203.21180725097656, 225.89007568359375, 148.42369079589844, 182.75921630859375, 200.54269409179688, 3.6870555877685547, 0.6956863403320312, -8.697835922241211, -0.6570549011230469, 91.03726196289062, 17.015884399414062, 7.399589538574219, 2.0818614959716797, 257.01458740234375, 148.01397705078125, 25.93354034423828, -115.82318878173828, -8.719709396362305, 53.85527801513672, 32.82437515258789, 154.7316436767578, -55.82768249511719, -125.18656921386719, 27.707942962646484, 314.6533203125, 100.7001953125, -8.628753662109375, -21.121288299560547, 17.0145263671875, 200.9699249267578, -161.2834930419922, 124.67414855957031, -36.508575439453125, -16.27327537536621, 168.35635375976562, -30.402862548828125, 140.99774169921875, 99.73921203613281, -14.35565185546875, 136.4071044921875, -28.396278381347656, 180.08958435058594, 205.77267456054688, 354.333740234375, -120.63685607910156, 172.30906677246094, -99.39312744140625, 39.43553161621094, 210.70855712890625, 271.57037353515625, -154.09339904785156, -81.37691497802734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000537.npy"}
|
|
{"epoch": 0.8117913832199547, "step": 538, "batch_size": 64, "mean": 77.04296875, "std": 105.49772644042969, "min": -190.87640380859375, "p10": -37.74515075683593, "median": 65.0468978881836, "p90": 192.057373046875, "max": 379.74774169921875, "pos_frac": 0.78125, "sample": [-10.052757263183594, 188.954833984375, 29.249351501464844, 17.071468353271484, 105.131591796875, -190.87640380859375, 76.38545227050781, -40.881500244140625, 124.52334594726562, 177.25869750976562, 77.96485137939453, 192.82000732421875, 22.429977416992188, 163.8681640625, 10.025426864624023, 47.777618408203125, 174.43707275390625, 169.35707092285156, 24.13951873779297, 29.331890106201172, 50.89263916015625, 2.932464599609375, 201.67361450195312, 174.33328247070312, 47.060890197753906, -75.60649108886719, 164.9972686767578, -60.91468048095703, 233.99887084960938, -4.136146545410156, -30.427001953125, 190.27789306640625, -21.468109130859375, 21.73095703125, 7.207099914550781, 52.78297424316406, 48.571746826171875, 65.56503295898438, 6.2732391357421875, 42.66267395019531, 302.5865173339844, -1.139251708984375, 243.02792358398438, 101.96844482421875, -121.05255889892578, 148.5574188232422, -63.65715789794922, 187.27822875976562, 170.79322814941406, 379.74774169921875, 187.94195556640625, 64.52876281738281, 104.67454528808594, -146.78457641601562, 130.50582885742188, 122.56938171386719, 226.78289794921875, -1.9846687316894531, 125.24360656738281, 43.068145751953125, 68.08808135986328, 69.1260986328125, 100.0905532836914, -18.534893035888672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000538.npy"}
|
|
{"epoch": 0.8133030990173847, "step": 539, "batch_size": 64, "mean": 58.27008819580078, "std": 107.07814025878906, "min": -166.4160919189453, "p10": -32.719846343994135, "median": 22.33016014099121, "p90": 207.94600219726564, "max": 386.3973388671875, "pos_frac": 0.703125, "sample": [57.615478515625, 97.369140625, -0.9948501586914062, 231.22080993652344, 11.441244125366211, -33.88190460205078, 208.43661499023438, 190.982177734375, 129.74191284179688, 14.357170104980469, 43.91804504394531, 3.31451416015625, 96.8802490234375, 1.9259490966796875, -23.87451171875, -17.57160186767578, -6.1476287841796875, -3.5481719970703125, 216.1204376220703, 133.3682861328125, -0.8509197235107422, -46.01325607299805, 53.98309326171875, -104.11923217773438, 11.489761352539062, -166.4160919189453, -14.977453231811523, 165.3154296875, 279.3895263671875, 24.196044921875, 30.136009216308594, -15.262664794921875, -30.008377075195312, 12.983726501464844, 312.2055358886719, 63.99053192138672, 10.238533020019531, 80.30142211914062, 206.80123901367188, 12.846414566040039, -17.412887573242188, 64.70526123046875, 125.44154357910156, 24.87761688232422, 96.01123046875, 91.11000061035156, -101.35047149658203, 147.2635498046875, 8.577537536621094, -26.520355224609375, 195.50636291503906, 43.42845916748047, 20.464275360107422, -62.240501403808594, 177.43093872070312, 77.585205078125, -6.6760406494140625, 17.325115203857422, -157.0041046142578, 94.62482452392578, 9.099214553833008, 264.5629577636719, 386.3973388671875, 19.175704956054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000539.npy"}
|
|
{"epoch": 0.8148148148148148, "step": 540, "batch_size": 64, "mean": 89.25696563720703, "std": 107.19074249267578, "min": -155.3792724609375, "p10": -39.131171417236324, "median": 70.82586288452148, "p90": 224.91548461914067, "max": 333.2434387207031, "pos_frac": 0.78125, "sample": [1.040008544921875, -5.560546875, 333.2434387207031, 198.1466522216797, 5.127836227416992, -155.3792724609375, 165.58218383789062, 155.1259002685547, 83.23534393310547, -6.632976531982422, 41.12192916870117, 193.51095581054688, 189.6356964111328, 37.652000427246094, -109.96550750732422, 58.4163818359375, 169.49725341796875, 144.8720703125, 19.566997528076172, 152.24427795410156, 29.724349975585938, -60.30192565917969, 122.9442138671875, 41.367889404296875, 252.80416870117188, 159.4168701171875, -41.91246795654297, 164.72933959960938, -91.08465576171875, 26.731796264648438, 135.12966918945312, 197.02452087402344, 237.22357177734375, 156.2167205810547, 229.07754516601562, -8.169357299804688, -32.6414794921875, 202.3275146484375, -61.30558776855469, 2.966951370239258, -1.0952339172363281, 237.7782745361328, 179.10501098632812, 215.20401000976562, 207.33950805664062, 40.23637390136719, 241.4320526123047, 30.14571762084961, 115.36517333984375, -94.46503448486328, 44.84850311279297, 146.41812133789062, 57.71551513671875, 9.764999389648438, -2.0735549926757812, 51.5335693359375, -7.1959075927734375, 257.70294189453125, 35.44554138183594, 96.20706176757812, 209.12960815429688, 16.015241622924805, 83.81246948242188, 209.32540893554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000540.npy"}
|
|
{"epoch": 0.8163265306122449, "step": 541, "batch_size": 64, "mean": 74.39794921875, "std": 99.93413543701172, "min": -188.62216186523438, "p10": -56.28004302978514, "median": 70.85831069946289, "p90": 196.64139251708986, "max": 254.60519409179688, "pos_frac": 0.71875, "sample": [181.14901733398438, 67.31092834472656, 104.58146667480469, 192.35293579101562, -16.896629333496094, 139.4390869140625, -64.70681762695312, 196.90469360351562, 182.69093322753906, 13.003814697265625, 16.318954467773438, -61.884246826171875, -18.912460327148438, 150.84715270996094, -10.371467590332031, -188.62216186523438, 182.8903350830078, 178.46585083007812, 69.63935852050781, 196.0270233154297, -71.0051040649414, 37.75262451171875, 199.96905517578125, 203.4539794921875, 174.784912109375, 114.181640625, 248.81231689453125, -43.20356750488281, -67.42320251464844, -6.382678985595703, 126.00598907470703, 25.58672523498535, 197.04307556152344, 254.60519409179688, 13.774192810058594, 137.37429809570312, 140.3932342529297, -10.000732421875, 143.23464965820312, 2.862691879272461, -3.495115280151367, -106.37246704101562, 163.48406982421875, 178.94395446777344, 17.47574806213379, 102.83624267578125, 156.182861328125, 192.368896484375, -99.79502868652344, 41.15950012207031, 0.0071239471435546875, 173.55825805664062, 72.07726287841797, 66.75212097167969, -22.805313110351562, -5.81427001953125, 89.37992858886719, 84.64141845703125, -21.43265724182129, 37.95775604248047, 93.47547912597656, -6.235349655151367, 200.7078399658203, 24.36377716064453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000541.npy"}
|
|
{"epoch": 0.817838246409675, "step": 542, "batch_size": 64, "mean": 83.38833618164062, "std": 118.50373077392578, "min": -254.99168395996094, "p10": -73.39374847412107, "median": 74.93363571166992, "p90": 219.66357421875, "max": 313.73028564453125, "pos_frac": 0.796875, "sample": [157.33457946777344, 172.87158203125, 21.143447875976562, 296.0407409667969, 91.9614028930664, 22.3975830078125, 125.92431640625, 54.62242126464844, 187.64642333984375, 96.2267837524414, 7.208354949951172, -22.809051513671875, -100.79460906982422, 219.4610595703125, 24.24164390563965, 193.64242553710938, 64.87792205810547, 313.73028564453125, -163.52703857421875, 193.4416046142578, 193.89398193359375, 260.8572692871094, 26.86688995361328, 1.5139389038085938, -132.94422912597656, -254.99168395996094, 156.2463836669922, 34.62548828125, 149.99908447265625, -53.353759765625, 193.989990234375, 203.7084197998047, 0.9654293060302734, 219.7503662109375, -112.55684661865234, -11.601551055908203, -5.10137939453125, 143.21652221679688, 3.1544342041015625, 138.45785522460938, 6.12384033203125, 23.967300415039062, 204.86929321289062, -95.188720703125, 83.05735778808594, -1.3171615600585938, 204.164794921875, 36.169273376464844, 66.8099136352539, 227.30859375, 146.31634521484375, 223.1014404296875, -81.98231506347656, 41.32074737548828, -8.340600967407227, 3.184467315673828, 26.499771118164062, 198.08935546875, 171.99905395507812, 58.20489501953125, 83.64311981201172, 251.82855224609375, 150.21566772460938, 204.47006225585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000542.npy"}
|
|
{"epoch": 0.8193499622071051, "step": 543, "batch_size": 64, "mean": 82.76486206054688, "std": 110.51863098144531, "min": -221.91726684570312, "p10": -32.66131668090819, "median": 84.27839279174805, "p90": 220.8239929199219, "max": 290.86614990234375, "pos_frac": 0.796875, "sample": [1.5292510986328125, 30.392467498779297, 82.62159729003906, 0.7055721282958984, 133.32533264160156, 6.884593963623047, 172.231201171875, 210.92056274414062, -191.88682556152344, 277.819580078125, 87.34538269042969, 181.98593139648438, 171.63575744628906, 222.3473663330078, 248.28872680664062, 139.6215057373047, 186.90005493164062, -116.96469116210938, 81.51734161376953, 217.2694549560547, 13.358602523803711, -14.149269104003906, 0.4507789611816406, 61.38861083984375, 7.262889862060547, 190.54281616210938, -221.91726684570312, 1.5866622924804688, -57.97507858276367, -59.85943603515625, 25.646591186523438, 171.189697265625, 251.45440673828125, -1.5640373229980469, 182.52072143554688, 110.95994567871094, 196.99887084960938, -5.950311660766602, -23.993972778320312, -8.831024169921875, 92.50105285644531, 139.70945739746094, 247.28610229492188, 35.35687255859375, 190.46725463867188, 23.80188751220703, 107.15267181396484, 290.86614990234375, 23.400371551513672, 124.89955139160156, -14.160167694091797, 85.93518829345703, 119.75003051757812, -36.375892639160156, 150.0465545654297, 60.933197021484375, 122.97238159179688, 41.091705322265625, 12.00677490234375, -66.7459716796875, 186.00144958496094, 254.18289184570312, 6.487819671630859, 135.77345275878906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000543.npy"}
|
|
{"epoch": 0.8208616780045351, "step": 544, "batch_size": 64, "mean": 91.50540924072266, "std": 124.33623504638672, "min": -169.7861785888672, "p10": -58.8451202392578, "median": 91.19115829467773, "p90": 264.87761840820315, "max": 328.42718505859375, "pos_frac": 0.765625, "sample": [12.79193115234375, -167.30398559570312, 73.0545654296875, 27.505571365356445, 139.69485473632812, 92.27764892578125, -62.169647216796875, 32.774024963378906, -51.087890625, -34.38439178466797, 49.230133056640625, 86.04873657226562, -45.721954345703125, 269.3119201660156, 123.33597564697266, -1.839426040649414, -162.5635986328125, 150.77279663085938, 131.28109741210938, 96.0985107421875, 140.5550537109375, 159.63400268554688, 181.9710693359375, 182.04205322265625, 241.07061767578125, 19.321725845336914, 11.848220825195312, -12.730392456054688, 1.4024410247802734, -169.7861785888672, 328.42718505859375, -147.1966552734375, 191.10513305664062, -9.0635986328125, 202.8874969482422, 200.51295471191406, 293.1997375488281, -39.93182373046875, 62.565330505371094, 90.10466766357422, 180.17115783691406, 49.44670867919922, 273.0290832519531, 77.72651672363281, 79.01549530029297, 101.92916870117188, 151.8852081298828, 158.1935577392578, 254.53091430664062, -64.66036987304688, 28.93613624572754, 172.35479736328125, 317.2138977050781, 299.6007080078125, -77.9081802368164, -26.058975219726562, 241.03512573242188, 28.8494873046875, 203.42724609375, 158.39791870117188, 3.438140869140625, 99.81784057617188, 142.1856689453125, 316.74310302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000544.npy"}
|
|
{"epoch": 0.8223733938019653, "step": 545, "batch_size": 64, "mean": 52.23409652709961, "std": 110.82625579833984, "min": -216.61978149414062, "p10": -80.35747680664062, "median": 33.44884490966797, "p90": 201.9886444091797, "max": 257.7542724609375, "pos_frac": 0.671875, "sample": [137.3497314453125, 111.85982513427734, -78.37667846679688, 219.42359924316406, 22.638195037841797, -11.378944396972656, -41.297950744628906, -88.80956268310547, -52.252296447753906, 36.22135543823242, 172.14312744140625, -3.6805419921875, 194.12879943847656, 41.437042236328125, 64.49832916259766, -1.0778732299804688, 68.12419128417969, 210.7559051513672, -66.10743713378906, 105.2940673828125, -32.514015197753906, 200.06951904296875, -29.745956420898438, 202.81112670898438, 118.2369384765625, -38.604576110839844, 105.9615707397461, 27.80338478088379, -37.61073303222656, 6.023710250854492, -5.438961029052734, -81.20639038085938, -141.9117431640625, 157.96136474609375, -82.557373046875, 11.701242446899414, 191.6704864501953, 207.6461181640625, 160.9123992919922, 4.066383361816406, -12.316577911376953, 256.2368469238281, 174.0448760986328, 13.983047485351562, 29.33739471435547, 89.23882293701172, -54.232566833496094, 132.41558837890625, 94.90338134765625, -216.61978149414062, 37.29705810546875, 59.849609375, 13.70904541015625, 257.7542724609375, 102.04537963867188, -129.5834197998047, 5.543949127197266, 152.27230834960938, -202.72830200195312, 2.100343704223633, 96.65775299072266, 183.40036010742188, 240.82913208007812, 30.676334381103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000545.npy"}
|
|
{"epoch": 0.8238851095993953, "step": 546, "batch_size": 64, "mean": 60.09575653076172, "std": 121.90217590332031, "min": -257.3849182128906, "p10": -128.0470977783203, "median": 52.003318786621094, "p90": 194.93643646240236, "max": 341.74761962890625, "pos_frac": 0.765625, "sample": [201.98733520507812, -62.240234375, 188.46585083007812, 76.0327377319336, 100.16522216796875, 169.56544494628906, -24.221405029296875, -65.60466003417969, 52.531097412109375, -135.3548583984375, -257.3849182128906, 171.26976013183594, 7.367954254150391, 33.12351989746094, -182.91348266601562, 268.6470642089844, 11.82630729675293, -14.298759460449219, -63.42925262451172, 27.021812438964844, -171.25152587890625, 31.781156539916992, 123.1815185546875, 196.26651000976562, 12.079978942871094, 80.85324096679688, 168.1544189453125, -26.516876220703125, 162.54945373535156, 218.77789306640625, 166.17279052734375, 17.39063262939453, 172.13449096679688, 51.47554016113281, 29.12057876586914, -131.49002075195312, 39.920509338378906, 341.74761962890625, 142.42466735839844, 155.69131469726562, 2.3209266662597656, 31.600805282592773, -120.01361083984375, 134.58865356445312, 22.612451553344727, 125.25116729736328, -18.88238525390625, 22.187503814697266, 101.32489776611328, 120.5284423828125, 165.3881072998047, 37.50910949707031, 239.83001708984375, -172.6368408203125, 18.926353454589844, 78.7181396484375, 171.03611755371094, 108.58125305175781, 191.8329315185547, 66.9128646850586, 209.91513061523438, -171.2718963623047, 186.97012329101562, 9.877988815307617], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000546.npy"}
|
|
{"epoch": 0.8253968253968254, "step": 547, "batch_size": 64, "mean": 85.8291244506836, "std": 116.36243438720703, "min": -234.43850708007812, "p10": -11.057892608642575, "median": 76.2388687133789, "p90": 225.42056732177736, "max": 381.4462890625, "pos_frac": 0.828125, "sample": [3.8206024169921875, 37.294647216796875, 10.507314682006836, 15.944717407226562, 278.341064453125, 99.03089904785156, -104.42960357666016, 251.10821533203125, 127.20205688476562, 158.6208038330078, 217.63067626953125, 160.77731323242188, 36.58832550048828, 83.97294616699219, 90.74613952636719, -11.963356018066406, 130.48463439941406, 187.61758422851562, 38.897701263427734, 28.0703125, 236.25291442871094, 196.33216857910156, 190.25726318359375, 9.79339599609375, 277.8407287597656, 169.31118774414062, -8.945144653320312, 101.5259017944336, 90.99398803710938, 8.62176513671875, 125.76155090332031, -3.9467105865478516, -225.11346435546875, 111.77949523925781, 17.6712646484375, 77.97389221191406, 207.0528564453125, 9.305057525634766, 220.0350341796875, 64.06185913085938, 126.16094207763672, 190.70272827148438, 153.0420379638672, 165.9083251953125, -164.77606201171875, 74.50384521484375, 38.766822814941406, -234.43850708007812, 54.278533935546875, 23.085372924804688, 227.72865295410156, 205.3112030029297, 381.4462890625, -1.1001205444335938, 68.99878692626953, -29.180389404296875, 16.38785171508789, 214.64138793945312, -3.785463333129883, 50.293907165527344, 10.142255783081055, 10.000579833984375, 246.25146484375, -48.134071350097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000547.npy"}
|
|
{"epoch": 0.8269085411942555, "step": 548, "batch_size": 64, "mean": 53.29730224609375, "std": 108.64143371582031, "min": -184.13275146484375, "p10": -79.65626983642578, "median": 42.54033660888672, "p90": 204.19227142333986, "max": 275.0751953125, "pos_frac": 0.671875, "sample": [155.23829650878906, 169.86282348632812, -52.7950439453125, -70.38015747070312, -151.467529296875, 87.67704772949219, -7.8463897705078125, 245.3170166015625, 178.0559539794922, 46.10875701904297, -40.938987731933594, 201.48020935058594, 0.4971199035644531, 17.97069549560547, 65.6419677734375, -6.683828353881836, -88.72232055664062, 43.51561737060547, -11.276031494140625, 139.65643310546875, 257.17767333984375, 56.704132080078125, -81.86978149414062, 19.728782653808594, 155.489990234375, 47.88386154174805, 235.4619140625, 208.1497802734375, 17.778289794921875, 118.199951171875, -77.64117431640625, 123.06919860839844, -20.822463989257812, 152.9310760498047, 39.36006164550781, -65.53898620605469, 221.29408264160156, 39.97918701171875, -45.56177520751953, 136.7447509765625, 41.56505584716797, -6.208215713500977, 44.03630828857422, -184.13275146484375, 275.0751953125, 0.885650634765625, -19.29389190673828, 87.09492492675781, -131.67063903808594, 127.39839935302734, 77.97708129882812, -131.59210205078125, 47.06299591064453, 148.1120147705078, 119.5514144897461, 7.459495544433594, 170.88446044921875, -11.56414794921875, 24.922027587890625, -75.72004699707031, 205.35458374023438, 188.55560302734375, -80.51988220214844, 26.36351776123047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000548.npy"}
|
|
{"epoch": 0.8284202569916855, "step": 549, "batch_size": 64, "mean": 66.16154479980469, "std": 128.8261260986328, "min": -274.1339416503906, "p10": -85.72416305541991, "median": 60.13515090942383, "p90": 205.69386291503906, "max": 402.398681640625, "pos_frac": 0.6875, "sample": [-22.297332763671875, 26.18561553955078, -54.16565704345703, 110.99909210205078, 12.683845520019531, -92.56986999511719, 89.92517852783203, 5.048547744750977, 46.589622497558594, 225.26748657226562, 240.14537048339844, 1.7314071655273438, 94.42411041259766, 205.01870727539062, -4.0388641357421875, -56.200172424316406, -131.27845764160156, 82.44601440429688, 156.9396514892578, 166.0138702392578, 193.62356567382812, 156.1802978515625, 319.1073913574219, -122.25373840332031, 74.00631713867188, 19.30816650390625, 41.8745231628418, 205.98321533203125, 168.57467651367188, -69.75084686279297, 204.75753784179688, 191.96231079101562, 402.398681640625, -30.948183059692383, 172.5487518310547, 296.1644287109375, -16.242406845092773, -202.49710083007812, 5.5090789794921875, 162.45510864257812, -274.1339416503906, 39.03883361816406, -13.368797302246094, 79.38507080078125, 46.12360763549805, 161.16360473632812, -31.844764709472656, 145.40036010742188, 262.3771667480469, 73.68067932128906, -1.1259307861328125, 153.5428924560547, 174.84573364257812, 108.43479919433594, 84.67704772949219, -18.423919677734375, 5.286924362182617, -5.725635528564453, -28.88180923461914, 87.32750701904297, 193.2745361328125, -207.30372619628906, -94.61952209472656, 19.578536987304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000549.npy"}
|
|
{"epoch": 0.8299319727891157, "step": 550, "batch_size": 64, "mean": 44.2348518371582, "std": 109.08250427246094, "min": -214.12574768066406, "p10": -94.53095245361327, "median": 25.156248092651367, "p90": 194.38953399658206, "max": 313.1095886230469, "pos_frac": 0.71875, "sample": [89.22978973388672, 222.97479248046875, 198.43348693847656, 17.71710205078125, -124.15864562988281, 13.874488830566406, 1.1010684967041016, -21.595657348632812, -22.984275817871094, 92.6956787109375, 184.95364379882812, 58.795143127441406, 7.772483825683594, 0.36904144287109375, 17.259057998657227, 232.61209106445312, 218.78839111328125, 90.47843170166016, 29.736968994140625, 20.57552719116211, 166.50079345703125, -20.49032211303711, 146.0363006591797, 112.09060668945312, 176.97938537597656, -51.28669738769531, 313.1095886230469, -12.953960418701172, 15.110084533691406, -129.6614227294922, -137.59115600585938, 35.05747985839844, -75.19766235351562, 121.69932556152344, 5.628387451171875, 62.50602722167969, 89.03789520263672, 33.058074951171875, -55.427711486816406, 15.043891906738281, -97.77899169921875, 102.21138000488281, 32.46182632446289, 89.41502380371094, -25.220748901367188, 139.457763671875, 13.165863037109375, 9.304216384887695, 62.92570495605469, 139.2050018310547, -137.21298217773438, 107.73135375976562, 232.853515625, 47.173606872558594, 178.6053924560547, 18.300012588500977, -214.12574768066406, 13.635824203491211, 198.9636688232422, -183.42657470703125, 149.0297393798828, -71.05931854248047, -25.510372161865234, -86.95219421386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000550.npy"}
|
|
{"epoch": 0.8314436885865457, "step": 551, "batch_size": 64, "mean": 57.685813903808594, "std": 104.24187469482422, "min": -239.09490966796875, "p10": -22.85417652130127, "median": 27.452186584472656, "p90": 193.9668395996094, "max": 484.55517578125, "pos_frac": 0.75, "sample": [-22.602317810058594, 76.55630493164062, 103.05343627929688, 47.902618408203125, 113.18876647949219, 38.194679260253906, 484.55517578125, 8.780994415283203, 44.14088439941406, 24.80272674560547, -17.507007598876953, 33.93739318847656, 179.1405029296875, -5.78407096862793, 49.936859130859375, 13.697795867919922, 194.66119384765625, 27.78887176513672, -239.09490966796875, -22.61083984375, -16.330507278442383, 7.215629577636719, 117.20209503173828, 43.41986083984375, 99.41812896728516, 192.3466796875, -36.61073303222656, 90.44495391845703, 49.29450988769531, 169.68936157226562, 15.572565078735352, -49.203330993652344, 0.282562255859375, -32.523345947265625, 39.555633544921875, 291.30792236328125, 8.434417724609375, 148.45999145507812, -22.958463668823242, 7.4308929443359375, 11.320816040039062, 39.62486267089844, -10.485145568847656, 1.6304969787597656, 177.28274536132812, 254.87588500976562, 197.5611114501953, 15.695697784423828, 20.817459106445312, 3.9652137756347656, 74.33350372314453, 128.40939331054688, 32.432647705078125, -21.914546966552734, 27.115501403808594, -7.7686309814453125, -11.784069061279297, 234.06163024902344, 25.74005126953125, 13.698604583740234, -90.37774658203125, -27.02676010131836, 197.12840270996094, 150.3668212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000551.npy"}
|
|
{"epoch": 0.8329554043839759, "step": 552, "batch_size": 64, "mean": 66.1895523071289, "std": 100.6671371459961, "min": -216.3938751220703, "p10": -30.003371429443355, "median": 60.87495040893555, "p90": 200.9389190673828, "max": 280.70208740234375, "pos_frac": 0.78125, "sample": [15.50303840637207, 42.0831298828125, 25.492835998535156, -32.218894958496094, 141.00567626953125, 49.76620101928711, 2.9708919525146484, 50.71958923339844, 40.56065368652344, 149.80824279785156, 256.3473205566406, 280.70208740234375, 129.955322265625, 71.52700805664062, -18.194007873535156, -81.9416732788086, 18.729225158691406, 133.45899963378906, 56.61827850341797, -24.833816528320312, 53.70452880859375, 108.28787231445312, 114.36100769042969, 160.83346557617188, 171.26327514648438, -22.54931640625, 68.80086517333984, 143.00624084472656, 216.52146911621094, -215.27511596679688, -0.23510169982910156, -5.319305419921875, 59.966583251953125, -195.15646362304688, 84.51590728759766, -0.040283203125, 201.37777709960938, 40.741546630859375, 10.018714904785156, -10.155738830566406, 206.26272583007812, 25.72723388671875, -56.32544708251953, 111.90855407714844, 199.9149169921875, 16.683441162109375, 49.9923095703125, 77.40206909179688, -216.3938751220703, 212.69285583496094, 204.11207580566406, 116.53836822509766, 11.092939376831055, -47.38941192626953, 121.77542114257812, 85.93230438232422, 75.64781951904297, 92.76305389404297, 195.49461364746094, 116.32490539550781, 61.78331756591797, 24.891197204589844, 176.5340118408203, 80.03790283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000552.npy"}
|
|
{"epoch": 0.8344671201814059, "step": 553, "batch_size": 64, "mean": 66.2669677734375, "std": 121.91097259521484, "min": -212.33355712890625, "p10": -68.78916778564452, "median": 43.96969985961914, "p90": 239.02127685546878, "max": 353.40313720703125, "pos_frac": 0.671875, "sample": [265.74774169921875, -29.35620880126953, -5.7177276611328125, 101.67969512939453, 7.791023254394531, -3.751932144165039, 170.89146423339844, 299.13861083984375, -10.714942932128906, -18.080078125, 34.07897186279297, 156.22525024414062, 233.46865844726562, 297.1275634765625, -39.56598663330078, 137.10791015625, -16.394407272338867, 50.965370178222656, 94.74069213867188, -212.33355712890625, -3.220174789428711, 150.9214630126953, -26.68213653564453, 154.99842834472656, 202.06564331054688, 38.131065368652344, 40.48249053955078, 47.4569091796875, 4.622894287109375, -60.990997314453125, 289.4413146972656, 193.95928955078125, 95.20439910888672, 353.40313720703125, 35.765785217285156, 134.97665405273438, 195.982421875, 168.62818908691406, 116.11471557617188, 83.40300750732422, 28.60681915283203, 68.63710021972656, 23.961606979370117, -8.449146270751953, -73.7481689453125, 82.97603607177734, -1.1190986633300781, 241.40097045898438, -185.10589599609375, -117.26632690429688, 65.907470703125, 178.18556213378906, 129.232177734375, -72.13124084472656, -57.76929473876953, 20.844955444335938, 6.629001617431641, -192.3015594482422, 130.21533203125, 76.68460845947266, -44.456703186035156, -78.23896789550781, 266.1247253417969, 24.553726196289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000553.npy"}
|
|
{"epoch": 0.8359788359788359, "step": 554, "batch_size": 64, "mean": 77.0845947265625, "std": 108.83079528808594, "min": -236.19468688964844, "p10": -33.445901870727525, "median": 80.07657623291016, "p90": 202.92339324951172, "max": 284.86785888671875, "pos_frac": 0.75, "sample": [93.97416687011719, 243.26780700683594, 199.5226593017578, 270.30865478515625, -12.500129699707031, 99.79679870605469, 9.43614387512207, 9.510822296142578, 86.07673645019531, 167.78997802734375, -14.538894653320312, -8.472419738769531, -5.3250885009765625, 159.27560424804688, -201.54586791992188, 33.87269592285156, 110.98318481445312, -17.051422119140625, 145.96871948242188, 30.49847984313965, -4.286994934082031, 176.74996948242188, 49.029571533203125, -79.000244140625, 175.71372985839844, 171.38980102539062, 204.51211547851562, 48.17038345336914, -1.8812599182128906, 50.07946014404297, 199.58206176757812, -8.200241088867188, 154.04605102539062, 89.5467529296875, 7.1330108642578125, -95.95774841308594, 191.38917541503906, 36.687713623046875, 150.37774658203125, 1.22259521484375, -236.19468688964844, 204.3553924560547, 74.076416015625, 116.70222473144531, -58.64936828613281, 140.71641540527344, 22.754552841186523, 131.96376037597656, 59.26844787597656, 17.47320556640625, 166.64215087890625, -20.59766387939453, -38.95228958129883, 171.27413940429688, 213.28712463378906, 22.015846252441406, 193.03829956054688, 186.68991088867188, 93.37181854248047, 205.7532958984375, 284.86785888671875, -130.12588500976562, 181.13629150390625, 15.394882202148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000554.npy"}
|
|
{"epoch": 0.8374905517762661, "step": 555, "batch_size": 64, "mean": 61.125946044921875, "std": 123.02202606201172, "min": -279.846435546875, "p10": -95.16076278686522, "median": 61.674232482910156, "p90": 211.12746887207035, "max": 264.436279296875, "pos_frac": 0.734375, "sample": [-1.0089359283447266, -224.09573364257812, 86.50422668457031, 75.85545349121094, 228.96145629882812, 5.554832458496094, -279.846435546875, -12.744873046875, -139.32632446289062, 130.6063690185547, 220.736328125, -57.45539855957031, 136.35357666015625, 47.2957878112793, 150.82078552246094, 237.48013305664062, 39.39490509033203, -4.736946105957031, 5.5257415771484375, 222.96136474609375, 116.76746368408203, 6.8548431396484375, 218.29261779785156, 62.35859680175781, 192.04449462890625, 50.23793029785156, 111.46548461914062, 194.55117797851562, 45.52528381347656, 199.38531494140625, 175.63072204589844, 175.44403076171875, 66.71097564697266, 161.74453735351562, -134.38082885742188, 216.15982055664062, -224.115234375, 175.39320373535156, -86.4726333618164, 39.6254768371582, 60.9898681640625, 50.37309265136719, -36.40093231201172, 37.982765197753906, -85.75605773925781, -14.305133819580078, 113.02154541015625, 111.29581451416016, 179.35855102539062, -26.251068115234375, 169.58627319335938, 64.2070083618164, -218.73060607910156, -2.4663162231445312, 182.3751678466797, 60.79158020019531, 96.83322143554688, 56.243255615234375, 118.46980285644531, -98.88424682617188, 264.436279296875, 18.901670455932617, 175.60498046875, 2.324361801147461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000555.npy"}
|
|
{"epoch": 0.8390022675736961, "step": 556, "batch_size": 64, "mean": 56.72555923461914, "std": 119.89167022705078, "min": -224.3889617919922, "p10": -93.47297058105468, "median": 32.16741752624512, "p90": 214.7489685058594, "max": 333.13995361328125, "pos_frac": 0.71875, "sample": [182.15887451171875, 21.622039794921875, 52.7957763671875, -9.964237213134766, 16.723922729492188, 186.58883666992188, 217.30517578125, 63.75050735473633, -12.268564224243164, -91.57559204101562, 20.68040657043457, 3.0021514892578125, 95.88030242919922, 221.023681640625, 23.420909881591797, 6.5871124267578125, 41.79229736328125, -7.422172546386719, 116.25111389160156, 175.130859375, -32.667274475097656, 168.95530700683594, 208.78448486328125, 49.483436584472656, -46.93297576904297, 31.32416534423828, -224.3889617919922, 22.8859806060791, 143.16424560546875, -149.76544189453125, 1.627389907836914, -193.0574951171875, 101.10832214355469, 38.31346893310547, 43.61524963378906, 2.639730453491211, 174.9582977294922, 180.46006774902344, 11.413978576660156, 7.654111862182617, 191.3473663330078, 333.13995361328125, 32.515132904052734, -3.0666885375976562, 261.8118896484375, 254.01235961914062, -57.572784423828125, 31.8197021484375, -154.33639526367188, -3.8719024658203125, 219.7119598388672, -98.76573181152344, -74.17542266845703, 243.6072998046875, 6.853672027587891, -97.0982666015625, 77.57603454589844, -94.2861328125, 90.91990661621094, 191.96939086914062, 206.62203979492188, 203.63429260253906, 92.0751724243164, -87.066650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000556.npy"}
|
|
{"epoch": 0.8405139833711263, "step": 557, "batch_size": 64, "mean": 84.66233825683594, "std": 100.8117904663086, "min": -106.69363403320312, "p10": -30.54860763549804, "median": 73.65000534057617, "p90": 222.37172851562502, "max": 288.49200439453125, "pos_frac": 0.75, "sample": [214.36212158203125, 138.57481384277344, 111.50407409667969, 44.2447509765625, -9.870220184326172, -39.29571533203125, 204.1873016357422, 45.66545867919922, 204.94696044921875, 115.29415893554688, -5.1501312255859375, -13.343887329101562, -5.292350769042969, 77.39806365966797, 0.3083381652832031, 98.74774169921875, 51.397891998291016, 0.043212890625, 197.2705078125, 108.32218170166016, 14.781047821044922, 16.547576904296875, 228.19931030273438, 107.06271362304688, -63.36177062988281, -72.2882080078125, 182.17910766601562, 233.21832275390625, 40.22404098510742, -70.65882110595703, 279.6827087402344, 142.72784423828125, 179.09100341796875, -10.938037872314453, -25.660202026367188, 205.04052734375, 116.85308074951172, 143.79696655273438, 42.73017883300781, 224.49212646484375, -24.198562622070312, 22.708803176879883, -32.643638610839844, 5.81011962890625, -1.041910171508789, -106.69363403320312, 288.49200439453125, 254.6136474609375, 190.1300048828125, 7.242713928222656, 17.401954650878906, 84.06104278564453, 31.038192749023438, 182.0006103515625, 167.0951385498047, 98.65789794921875, 217.42413330078125, 89.59295654296875, 49.39327621459961, 69.90194702148438, -60.29156494140625, -4.6060943603515625, 232.11302185058594, 187.15260314941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000557.npy"}
|
|
{"epoch": 0.8420256991685563, "step": 558, "batch_size": 64, "mean": 92.00345611572266, "std": 113.40168762207031, "min": -147.10873413085938, "p10": -37.62586364746093, "median": 80.2795181274414, "p90": 233.19063262939457, "max": 407.2151184082031, "pos_frac": 0.765625, "sample": [189.27835083007812, 236.8209686279297, 224.7198486328125, -20.949417114257812, 201.31898498535156, 10.667335510253906, 194.46701049804688, -52.809940338134766, 2.8124008178710938, 125.36808776855469, -30.811256408691406, 171.73040771484375, -63.42787170410156, 29.357078552246094, -2.2293701171875, 162.57232666015625, 196.7723846435547, 0.019775390625, 133.41162109375, 81.81423950195312, 407.2151184082031, 176.4054412841797, -3.366260528564453, 193.93128967285156, 148.80665588378906, -99.42056274414062, 219.8134765625, 32.576744079589844, 239.30059814453125, 73.0455551147461, 180.502685546875, 46.00047302246094, 113.44076538085938, 238.4999237060547, 8.586845397949219, 3.823659896850586, 120.49586486816406, 13.85791015625, 78.74479675292969, 273.15899658203125, 162.7177276611328, -65.447265625, 298.4591369628906, 30.660598754882812, 201.5049285888672, 198.28863525390625, -19.100303649902344, -40.546409606933594, 265.2196044921875, -12.499267578125, 155.0395965576172, -147.10873413085938, 43.9969482421875, -70.0472412109375, 99.45608520507812, -12.2940673828125, 132.94293212890625, 9.330482482910156, -27.33099365234375, 8.27862548828125, 221.90802001953125, 50.877662658691406, 120.01299285888672, 27.5784912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000558.npy"}
|
|
{"epoch": 0.8435374149659864, "step": 559, "batch_size": 64, "mean": 81.60662841796875, "std": 129.54037475585938, "min": -182.05323791503906, "p10": -51.906917572021484, "median": 66.9769515991211, "p90": 226.31678161621096, "max": 510.8939208984375, "pos_frac": 0.6875, "sample": [26.356277465820312, -51.167808532714844, -16.917316436767578, -20.253944396972656, 119.80098724365234, 510.8939208984375, 274.5352783203125, 322.6995849609375, 46.41234588623047, -8.65481185913086, -180.1231689453125, 233.48776245117188, 77.96599578857422, 153.22425842285156, 7.092008590698242, 204.74911499023438, 196.68313598632812, 85.78656768798828, 169.70974731445312, 95.73941040039062, 211.39431762695312, 12.316604614257812, -51.61796569824219, 167.66384887695312, -19.742637634277344, 84.98143005371094, -125.69178771972656, 194.32327270507812, 85.99560546875, 196.58175659179688, 55.98790740966797, 97.40345764160156, -38.54248809814453, 317.78778076171875, 223.1901092529297, -52.03075408935547, 114.21621704101562, 13.980207443237305, -55.495567321777344, -21.562889099121094, -12.852783203125, 11.405952453613281, -24.29531478881836, 19.830810546875, 198.79690551757812, 217.9246826171875, 172.3249053955078, -44.70056915283203, 166.8700714111328, -20.454818725585938, 23.130239486694336, 227.6567840576172, -118.676513671875, 26.479324340820312, 170.17401123046875, 237.06900024414062, 26.139263153076172, 160.72286987304688, -1.9254989624023438, 16.02885627746582, 129.89096069335938, 220.8692169189453, -56.68848419189453, -182.05323791503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000559.npy"}
|
|
{"epoch": 0.8450491307634165, "step": 560, "batch_size": 64, "mean": 72.34988403320312, "std": 99.82637023925781, "min": -156.44705200195312, "p10": -43.24795379638672, "median": 53.43750190734863, "p90": 206.5373550415039, "max": 280.40789794921875, "pos_frac": 0.796875, "sample": [49.58930206298828, 54.550384521484375, 213.78309631347656, 41.62714385986328, -156.44705200195312, -32.154518127441406, 206.40850830078125, 18.328231811523438, 238.64547729492188, -1.4525203704833984, 92.51600646972656, 6.174568176269531, 129.02279663085938, 35.23974609375, 1.65576171875, 9.16937255859375, -1.6007843017578125, 5.426477432250977, 30.411808013916016, 132.13978576660156, -83.48175048828125, 159.71844482421875, 73.67656707763672, 6.570335388183594, 194.208251953125, 150.0335693359375, 1.9627914428710938, 199.32923889160156, 201.68394470214844, 0.43912315368652344, 16.23565673828125, -19.19324493408203, -43.958091735839844, 230.38803100585938, 168.98731994628906, -79.63214111328125, 51.67906951904297, -64.47474670410156, 195.51573181152344, -41.590965270996094, 205.79193115234375, 149.55836486816406, 59.56096649169922, 110.19297790527344, 15.080726623535156, 108.35535430908203, 5.941749572753906, 198.52508544921875, 40.173885345458984, -33.98078918457031, 55.42356872558594, 280.40789794921875, -46.78124237060547, 1.6203765869140625, 224.7923583984375, 217.72259521484375, -109.77293395996094, 69.64933776855469, 188.2657012939453, 88.44206237792969, 206.5925750732422, 84.23983764648438, 52.32461929321289, 67.13478088378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000560.npy"}
|
|
{"epoch": 0.8465608465608465, "step": 561, "batch_size": 64, "mean": 69.26907348632812, "std": 110.48786163330078, "min": -167.11399841308594, "p10": -51.51099700927733, "median": 36.39494323730469, "p90": 216.7552154541016, "max": 293.12762451171875, "pos_frac": 0.71875, "sample": [3.8303756713867188, 15.933971405029297, 10.517669677734375, -10.744516372680664, 28.064760208129883, 199.17770385742188, 109.59791564941406, -19.562149047851562, 188.378173828125, 43.69847106933594, 28.91656494140625, 154.80592346191406, -0.7984733581542969, 125.07231140136719, 24.06573486328125, -92.74606323242188, 40.90232849121094, 173.98739624023438, 210.30023193359375, -10.721420288085938, -43.68586730957031, 242.8114013671875, 158.2634735107422, -73.13221740722656, 94.33139038085938, -19.993667602539062, 147.96490478515625, -39.26033401489258, -167.11399841308594, 293.12762451171875, -0.16454696655273438, 28.98426628112793, 219.52163696289062, 230.2977752685547, 43.24937438964844, 34.56404113769531, 52.34290313720703, 38.22584533691406, 8.96809196472168, 292.09368896484375, 289.14599609375, 254.62217712402344, 206.40628051757812, 209.46348571777344, 168.5292510986328, -124.8655014038086, -85.16287994384766, 7.74481201171875, 108.45500183105469, 9.23017692565918, -54.8646240234375, 83.4647445678711, 11.230445861816406, -13.73194694519043, 93.73947143554688, 202.49142456054688, 0.8181514739990234, 149.05348205566406, 115.06015014648438, -107.90432739257812, -2.3668956756591797, 11.586015701293945, 178.86471557617188, -41.86175537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000561.npy"}
|
|
{"epoch": 0.8480725623582767, "step": 562, "batch_size": 64, "mean": 88.87841033935547, "std": 116.1578369140625, "min": -139.45382690429688, "p10": -29.660414123535155, "median": 75.32349395751953, "p90": 223.00035095214844, "max": 419.03778076171875, "pos_frac": 0.734375, "sample": [17.64385223388672, 201.0494842529297, -88.04811096191406, 292.4809875488281, 234.23072814941406, 213.75567626953125, 62.990806579589844, 382.76251220703125, 172.94837951660156, -11.613578796386719, 110.39967346191406, 81.38873291015625, -139.45382690429688, 16.451427459716797, 64.12005615234375, -4.873439788818359, 74.26432800292969, 33.589385986328125, 124.58740234375, -88.87672424316406, 157.50396728515625, 196.32000732421875, -20.147979736328125, -26.446815490722656, 15.158760070800781, 193.54168701171875, -7.620260238647461, 32.98931121826172, 57.706787109375, -3.239788055419922, 190.48098754882812, -49.002716064453125, 182.01602172851562, 220.118896484375, 86.83609008789062, 419.03778076171875, -13.51608657836914, -30.546951293945312, 3.0143470764160156, 248.29385375976562, -6.371162414550781, 24.753623962402344, 76.38265991210938, 128.95806884765625, 206.79000854492188, 203.35635375976562, 224.23526000976562, 93.42132568359375, 149.7662353515625, -27.591827392578125, 184.47622680664062, 64.03964233398438, -23.35368537902832, 87.91057586669922, 123.64201354980469, 88.7563705444336, 190.05970764160156, 258.0899658203125, 161.57229614257812, -103.88511657714844, 15.527069091796875, -87.18045043945312, 9.113548278808594, 43.453887939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000562.npy"}
|
|
{"epoch": 0.8495842781557067, "step": 563, "batch_size": 64, "mean": 76.26375579833984, "std": 117.79057312011719, "min": -217.7953643798828, "p10": -65.7395248413086, "median": 70.69063568115234, "p90": 213.5291473388672, "max": 263.49908447265625, "pos_frac": 0.796875, "sample": [94.86449432373047, 179.62815856933594, 49.35842514038086, 0.15655517578125, 204.9056854248047, 171.4696044921875, -217.7953643798828, 227.69302368164062, 203.18971252441406, 7.388572692871094, 20.217609405517578, 199.48573303222656, 27.87230682373047, -67.1201171875, 196.06610107421875, 65.01876831054688, -172.32960510253906, 103.56331634521484, -41.046348571777344, 263.49908447265625, 60.20661926269531, 250.21221923828125, -62.51814270019531, 229.24826049804688, 172.92611694335938, 112.30196380615234, 136.54029846191406, 1.3083114624023438, 74.98666381835938, -201.16397094726562, 2.291961669921875, -27.87887191772461, 15.826606750488281, 29.703445434570312, -2.803386688232422, 203.63119506835938, 43.6927375793457, -79.32656860351562, 121.49456787109375, 40.892269134521484, 206.86126708984375, 216.38681030273438, 194.78201293945312, 16.16790771484375, -142.11639404296875, 81.90754699707031, 66.39460754394531, -33.339134216308594, 204.9586181640625, 129.604736328125, -155.09799194335938, 86.94793701171875, 23.373790740966797, 156.11508178710938, 157.64105224609375, 62.388519287109375, 216.69700622558594, 260.3653564453125, 46.90638732910156, 199.484130859375, 140.13560485839844, -56.9365234375, 150.75778198242188, 12.836273193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000563.npy"}
|
|
{"epoch": 0.8510959939531368, "step": 564, "batch_size": 64, "mean": 96.79098510742188, "std": 119.0526351928711, "min": -233.91810607910156, "p10": -55.08726730346679, "median": 92.01725387573242, "p90": 235.070751953125, "max": 320.14520263671875, "pos_frac": 0.8125, "sample": [193.36312866210938, 17.10456085205078, 231.00973510742188, 212.63128662109375, 91.6625747680664, 267.52313232421875, 119.11564636230469, -205.9517364501953, -45.90489959716797, 16.49365234375, 218.95156860351562, 181.10870361328125, 57.30326461791992, -27.007471084594727, 12.586578369140625, 31.579269409179688, -18.370582580566406, 176.2933349609375, 207.70635986328125, 123.91873931884766, 85.39192199707031, 0.34319496154785156, 120.07919311523438, 170.79818725585938, 213.1945037841797, 184.51559448242188, 259.7803649902344, -59.02256774902344, -6.55616569519043, 131.1494140625, 263.67547607421875, 267.3912353515625, 236.16162109375, 126.73793029785156, 39.815643310546875, -148.85501098632812, 186.65699768066406, 71.05802917480469, 92.37193298339844, 10.821678161621094, 191.46168518066406, 58.368675231933594, 90.29393005371094, -28.864091873168945, -93.02334594726562, 88.68460083007812, 62.988731384277344, 320.14520263671875, 81.7121353149414, -69.42427062988281, 82.40165710449219, 152.31869506835938, 244.45184326171875, 19.047487258911133, 183.1903076171875, 52.346839904785156, 188.31484985351562, 232.525390625, -233.91810607910156, 206.23583984375, -69.1612548828125, 17.891223907470703, 123.02803039550781, 186.98094177246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000564.npy"}
|
|
{"epoch": 0.8526077097505669, "step": 565, "batch_size": 64, "mean": 68.25919342041016, "std": 103.5545883178711, "min": -181.486328125, "p10": -59.70692558288574, "median": 65.04922103881836, "p90": 186.1691787719727, "max": 325.9161376953125, "pos_frac": 0.796875, "sample": [7.123023986816406, 28.704837799072266, 3.3382186889648438, 31.847412109375, 81.27252960205078, 63.5850830078125, 157.1602783203125, 66.51335906982422, 23.88482666015625, 39.03607940673828, 58.48101043701172, 125.65473937988281, 302.3544006347656, -181.486328125, -32.289154052734375, 174.4654083251953, 82.35592651367188, 106.22543334960938, 325.9161376953125, 230.8577880859375, 130.20492553710938, -89.69618225097656, 10.737930297851562, 25.49419403076172, 50.297393798828125, 144.72470092773438, 75.59273529052734, 15.830482482910156, 101.94947052001953, 100.89859771728516, 191.733154296875, 8.30284309387207, -79.8670654296875, -39.899234771728516, 123.96098327636719, -54.75471878051758, 8.79960823059082, 175.42388916015625, -180.41998291015625, 86.29098510742188, 167.71902465820312, -2.6040992736816406, -61.82929992675781, 99.23153686523438, -126.52252197265625, 138.97048950195312, 47.31535339355469, 175.64622497558594, 247.7440948486328, 33.412513732910156, 174.56011962890625, -15.148513793945312, -90.93121337890625, 0.8400840759277344, 27.13705062866211, 41.92526626586914, -29.191253662109375, 165.89755249023438, 114.82469940185547, 103.9664306640625, 199.03285217285156, 150.08612060546875, 115.22095489501953, 190.67901611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000565.npy"}
|
|
{"epoch": 0.854119425547997, "step": 566, "batch_size": 64, "mean": 63.429542541503906, "std": 134.62738037109375, "min": -319.77642822265625, "p10": -84.4240249633789, "median": 45.52089309692383, "p90": 220.77859802246095, "max": 384.83148193359375, "pos_frac": 0.703125, "sample": [108.9100341796875, 53.672996520996094, -70.29633331298828, 132.28237915039062, -41.775787353515625, 34.33369064331055, 199.0525360107422, 221.28033447265625, 1.6533489227294922, 146.23208618164062, 87.05294799804688, 132.24513244628906, 36.19160461425781, 26.489055633544922, 14.530448913574219, 384.83148193359375, -319.77642822265625, 144.25717163085938, 223.97543334960938, 166.0482177734375, 233.8199005126953, -22.228012084960938, -43.96151351928711, 95.71778106689453, 2.801523208618164, -125.28269958496094, 106.19282531738281, 174.64891052246094, 209.11041259765625, 0.4686431884765625, 222.2646942138672, 181.8079071044922, 180.6224365234375, 15.997100830078125, -53.06121826171875, -195.10604858398438, -66.5545883178711, 34.18301010131836, 334.57659912109375, -70.55622863769531, 37.36878967285156, -90.19993591308594, 302.61163330078125, -30.07672119140625, 194.13754272460938, 20.05139923095703, -127.16940307617188, -177.38336181640625, 71.54483795166016, -20.14276885986328, -70.9468994140625, 10.634422302246094, -51.93013000488281, 196.33673095703125, 2.6151695251464844, 91.77896118164062, -59.938453674316406, 219.60787963867188, 91.5584945678711, -131.67083740234375, 165.0670623779297, 147.69378662109375, 159.45187377929688, 211.83877563476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000566.npy"}
|
|
{"epoch": 0.8556311413454271, "step": 567, "batch_size": 64, "mean": 80.01945495605469, "std": 109.96278381347656, "min": -152.70333862304688, "p10": -86.89714584350584, "median": 75.88079071044922, "p90": 206.3765670776367, "max": 310.7426452636719, "pos_frac": 0.796875, "sample": [142.5294952392578, -5.522274017333984, 2.3875656127929688, 186.80372619628906, -112.81721496582031, 185.5744171142578, 216.77505493164062, 175.5308837890625, 185.66519165039062, 57.34593963623047, 202.61260986328125, 61.149505615234375, 129.73617553710938, -24.201004028320312, 139.30606079101562, -30.348182678222656, -7.863761901855469, 48.511322021484375, 33.16419219970703, 39.08639144897461, 200.136474609375, 12.571617126464844, 5.175642013549805, 208.54803466796875, -95.05692291259766, 0.1944103240966797, -152.70333862304688, -115.34783172607422, 126.3061294555664, -133.24978637695312, 165.14697265625, 68.79045104980469, 204.9805908203125, 310.7426452636719, 238.38519287109375, 0.8953971862792969, 138.69537353515625, 53.88471984863281, 144.78135681152344, 206.11647033691406, -132.61114501953125, 122.52518463134766, 82.97113037109375, 112.70600891113281, 187.99855041503906, 168.65867614746094, 6.00933837890625, 2.9419403076171875, 219.891357421875, 188.10360717773438, 56.445838928222656, 43.849308013916016, 206.488037109375, -0.531982421875, 26.447708129882812, -144.40548706054688, -67.857666015625, 113.47222900390625, 234.99148559570312, 53.89923095703125, 167.8662567138672, 85.05235290527344, 26.060640335083008, 145.8526611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000567.npy"}
|
|
{"epoch": 0.8571428571428571, "step": 568, "batch_size": 64, "mean": 99.56451416015625, "std": 111.56114196777344, "min": -97.8143310546875, "p10": -37.151942443847645, "median": 79.35431289672852, "p90": 236.76211853027348, "max": 429.38018798828125, "pos_frac": 0.84375, "sample": [174.85147094726562, 82.2301025390625, 29.18326187133789, 48.810935974121094, 112.92437744140625, 162.2000732421875, 179.04547119140625, -69.23920440673828, 27.470829010009766, 14.791885375976562, -48.77687072753906, 196.0713348388672, 70.94365692138672, 287.5906677246094, 10.060379028320312, -67.16429138183594, 54.718894958496094, -17.82758331298828, 50.73253631591797, 154.92218017578125, -97.8143310546875, -26.603118896484375, 76.47852325439453, 189.14198303222656, 242.22496032714844, 225.28067016601562, 168.87661743164062, 287.73016357421875, -68.50408935546875, 62.566864013671875, 159.46461486816406, 125.07553100585938, 92.09441375732422, -23.179908752441406, 171.06942749023438, 12.60496711730957, 10.820068359375, 135.969482421875, 0.2438030242919922, 178.76712036132812, 3.5379180908203125, 223.95086669921875, 127.1793212890625, 102.9361572265625, 178.8543701171875, 63.628395080566406, 40.84889221191406, 6.051788330078125, 13.169525146484375, 65.81881713867188, 109.97817993164062, 204.97113037109375, -68.2977294921875, 165.29428100585938, 212.54171752929688, 46.69024658203125, 0.67633056640625, 360.2015686035156, 241.6827392578125, 257.12054443359375, -41.67286682128906, 40.872886657714844, 210.86550903320312, 429.38018798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000568.npy"}
|
|
{"epoch": 0.8586545729402872, "step": 569, "batch_size": 64, "mean": 47.89672088623047, "std": 110.10760498046875, "min": -209.17889404296875, "p10": -99.56154022216792, "median": 30.9890718460083, "p90": 191.92330322265627, "max": 311.02783203125, "pos_frac": 0.703125, "sample": [173.48658752441406, -145.05393981933594, 2.243803024291992, 175.85552978515625, 32.77091979980469, 200.23416137695312, 34.50105285644531, 311.02783203125, 61.132080078125, 192.83245849609375, 80.84825134277344, -48.883018493652344, 20.596675872802734, 27.147382736206055, -209.17889404296875, 58.20802307128906, 13.734203338623047, 290.420166015625, -41.367591857910156, 88.0250244140625, 149.22682189941406, 188.65988159179688, -3.7694664001464844, 235.41598510742188, -24.46185302734375, 188.17465209960938, 96.17378234863281, 12.453311920166016, 60.754058837890625, -154.84146118164062, -154.58889770507812, 91.28404235839844, 8.62774658203125, 18.924121856689453, -36.480079650878906, -22.151473999023438, 204.19784545898438, -2.3793773651123047, 58.61261749267578, 20.492074966430664, 75.88772583007812, -39.790626525878906, -140.8975067138672, -22.06769561767578, 140.2286834716797, -152.836669921875, 66.30476379394531, 11.775650024414062, 24.685894012451172, 116.10810089111328, 58.099246978759766, 92.31867980957031, 189.80194091796875, -9.647361755371094, -26.467641830444336, 30.444223403930664, 102.41778564453125, 31.533920288085938, 210.50485229492188, -38.491546630859375, -121.2809066772461, 16.952613830566406, 177.1203155517578, 19.78063201904297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000569.npy"}
|
|
{"epoch": 0.8601662887377173, "step": 570, "batch_size": 64, "mean": 94.40391540527344, "std": 112.80229949951172, "min": -140.54283142089844, "p10": -38.238444709777816, "median": 96.09739303588867, "p90": 216.77617187500002, "max": 435.3050231933594, "pos_frac": 0.78125, "sample": [214.88394165039062, -102.3400650024414, 183.1234130859375, 78.11660766601562, 143.40545654296875, 35.79181671142578, -91.13983154296875, -1.8586044311523438, 56.10765075683594, -20.755722045898438, 98.96143341064453, 93.23335266113281, -2.6690921783447266, 179.97052001953125, -15.217918395996094, 182.8348846435547, 0.8328990936279297, 2.7111854553222656, 107.51747131347656, 119.08322143554688, 114.16416931152344, -140.09292602539062, 41.106040954589844, 189.70936584472656, 213.09713745117188, 3.287374496459961, 25.344890594482422, 181.39561462402344, 237.76345825195312, -45.05492401123047, 6.686195373535156, 125.7532730102539, 187.47805786132812, -99.90501403808594, -0.6076316833496094, 143.23397827148438, 210.10552978515625, 59.23344421386719, 198.68055725097656, 30.664003372192383, 252.47402954101562, 160.5247802734375, 272.4871520996094, -22.33332633972168, 48.258018493652344, 179.76036071777344, 191.76092529296875, 239.12538146972656, 167.26864624023438, -140.54283142089844, 203.07989501953125, 3.7849960327148438, 82.6911849975586, 168.8273162841797, 224.69004821777344, 54.993560791015625, 63.42316818237305, -5.64002799987793, 137.65896606445312, 178.40744018554688, 435.3050231933594, 39.182621002197266, 217.58712768554688, -55.558929443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000570.npy"}
|
|
{"epoch": 0.8616780045351474, "step": 571, "batch_size": 64, "mean": 53.015411376953125, "std": 116.28044128417969, "min": -204.17977905273438, "p10": -110.88075866699218, "median": 61.66129112243652, "p90": 181.64605560302735, "max": 368.8466796875, "pos_frac": 0.703125, "sample": [129.19200134277344, 140.815185546875, 78.41877746582031, 11.238515853881836, -2.2587738037109375, 105.18620300292969, 117.8123779296875, 101.71212005615234, 153.58071899414062, 368.8466796875, 112.98434448242188, 11.297447204589844, -146.40341186523438, -3.184856414794922, -3.2484359741210938, 158.73939514160156, 21.348358154296875, 179.867431640625, 33.83686828613281, 120.8146743774414, 181.90838623046875, 23.70026397705078, 68.35523986816406, 91.34068298339844, 225.01797485351562, 82.22169494628906, 98.863037109375, 155.85855102539062, -204.17977905273438, 18.710044860839844, 3.370027542114258, -115.26399230957031, -15.988670349121094, -175.03262329101562, 161.29490661621094, -3.017690658569336, -13.817634582519531, 2.834430694580078, -100.65321350097656, -60.98384094238281, 29.158897399902344, 60.63519287109375, 184.83523559570312, 150.47030639648438, 62.6873893737793, 195.24911499023438, 225.53408813476562, -196.52098083496094, 71.31319427490234, -48.344268798828125, -51.09678649902344, -183.50515747070312, 123.88077545166016, 30.179031372070312, 177.32464599609375, -41.01519012451172, 138.56402587890625, 153.67762756347656, 26.232505798339844, 181.03395080566406, -44.24813461303711, 9.627143859863281, -176.58956909179688, 198.77035522460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000571.npy"}
|
|
{"epoch": 0.8631897203325775, "step": 572, "batch_size": 64, "mean": 61.39219665527344, "std": 105.3299560546875, "min": -189.9368896484375, "p10": -45.54056243896484, "median": 39.370487213134766, "p90": 205.59699096679688, "max": 230.79071044921875, "pos_frac": 0.703125, "sample": [6.343238830566406, -43.98358154296875, 1.9285659790039062, 97.63943481445312, 208.41517639160156, 42.89491271972656, 185.26849365234375, 33.721534729003906, -7.027378082275391, 206.89453125, -26.051742553710938, 168.54725646972656, 229.43563842773438, 207.18115234375, -38.110347747802734, 202.56939697265625, -79.14041137695312, 9.213106155395508, 35.84606170654297, 146.61183166503906, 45.195804595947266, -27.888153076171875, 132.71775817871094, 190.5255889892578, 207.09677124023438, 94.46560668945312, -35.159324645996094, 132.62503051757812, -6.537681579589844, 30.693771362304688, 230.79071044921875, 189.26678466796875, 180.9486846923828, -189.9368896484375, 140.53634643554688, 13.565906524658203, 63.915283203125, -63.37284851074219, 10.152454376220703, 116.52918243408203, 124.02526092529297, -16.337697982788086, 129.4637451171875, 171.6309814453125, 34.70061492919922, 161.5438232421875, 6.7202301025390625, -143.67247009277344, 219.16476440429688, -7.4428253173828125, 96.73431396484375, 151.16319274902344, -36.70256042480469, -13.298851013183594, -158.33526611328125, -46.20783996582031, 44.31543731689453, 118.20819091796875, -23.386497497558594, -138.02938842773438, 193.24237060546875, 0.5365123748779297, 10.183858871459961, 6.55302619934082], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000572.npy"}
|
|
{"epoch": 0.8647014361300076, "step": 573, "batch_size": 64, "mean": 68.35444641113281, "std": 132.92703247070312, "min": -244.27847290039062, "p10": -88.83261108398438, "median": 66.3289680480957, "p90": 252.02437438964844, "max": 408.1785888671875, "pos_frac": 0.71875, "sample": [41.37127685546875, 133.1084442138672, -190.26206970214844, 193.58749389648438, -98.93976593017578, 66.7198486328125, -5.520326614379883, 21.35802459716797, 112.87564086914062, -84.35464477539062, -192.41427612304688, -188.62161254882812, -10.602506637573242, -64.58180236816406, -42.24224853515625, 214.04415893554688, -32.45628356933594, 23.065200805664062, 202.0169677734375, -244.27847290039062, 65.9380874633789, 82.11168670654297, 247.79525756835938, 110.07076263427734, -6.927978515625, 22.770893096923828, 408.1785888671875, 0.3392219543457031, 170.84521484375, 96.81153106689453, 3.132293701171875, 30.941162109375, 79.4623031616211, 53.22279357910156, 117.25537109375, -23.441852569580078, 92.19346618652344, 84.21356201171875, 25.99793243408203, 290.1958312988281, 178.59970092773438, 182.8446502685547, 184.71636962890625, -5.449556350708008, -58.16349792480469, 113.4340591430664, 267.10064697265625, 6.9329071044921875, 42.58385467529297, 106.21660614013672, 253.83685302734375, 302.58502197265625, 82.09689331054688, 125.76629638671875, 0.36138916015625, -108.80685424804688, 370.38958740234375, -50.90886688232422, 100.06419372558594, 93.81377410888672, 130.0172882080078, 54.085182189941406, -90.75173950195312, 288.3408508300781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000573.npy"}
|
|
{"epoch": 0.8662131519274376, "step": 574, "batch_size": 64, "mean": 87.66343688964844, "std": 97.78755950927734, "min": -110.8290023803711, "p10": -15.674074554443358, "median": 78.55154037475586, "p90": 203.1748779296875, "max": 459.5191955566406, "pos_frac": 0.78125, "sample": [88.38219451904297, -54.444435119628906, -17.09088897705078, -110.8290023803711, 118.83778381347656, 136.87477111816406, 22.461349487304688, 36.794647216796875, 10.151849746704102, -25.79650115966797, -11.729568481445312, 122.89216613769531, 8.908309936523438, 43.170066833496094, -5.19976806640625, 4.575248718261719, -5.041378021240234, 72.30036926269531, 43.93639373779297, 120.89549255371094, 152.6975555419922, 39.68468475341797, 12.5660400390625, 171.89227294921875, -14.653175354003906, 221.3040008544922, 76.12372589111328, 203.4812469482422, -1.1496868133544922, 212.29019165039062, 144.0093994140625, -0.22198486328125, 80.97935485839844, 55.92523193359375, 171.174560546875, 145.07730102539062, -86.96761322021484, 146.8840789794922, 202.98556518554688, 10.583681106567383, 123.64361572265625, 31.594879150390625, 186.47662353515625, 234.51644897460938, 148.49014282226562, 203.25601196289062, 128.08424377441406, 127.42901611328125, 83.60643005371094, 17.50811004638672, -9.60858154296875, 199.34991455078125, -16.111602783203125, 145.9188690185547, 116.65648651123047, 34.43030548095703, 59.840145111083984, 184.97467041015625, 192.34307861328125, 236.61131286621094, -24.30561065673828, 172.56361389160156, 459.5191955566406, 28.957138061523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000574.npy"}
|
|
{"epoch": 0.8677248677248677, "step": 575, "batch_size": 64, "mean": 63.039207458496094, "std": 132.0177764892578, "min": -227.87689208984375, "p10": -95.59893417358398, "median": 40.89686584472656, "p90": 234.24727935791014, "max": 449.173828125, "pos_frac": 0.65625, "sample": [118.38762664794922, -165.74391174316406, -227.87689208984375, -6.125139236450195, 449.173828125, 127.10943603515625, -35.695335388183594, 130.04855346679688, 38.66474914550781, -23.92238998413086, 115.49061584472656, -90.3211898803711, 232.2313690185547, -31.140399932861328, -156.64132690429688, 101.76548767089844, -142.14883422851562, 121.18238830566406, -2.9664688110351562, 7.914222717285156, 55.505897521972656, 234.52615356445312, 177.01620483398438, 6.821022033691406, -4.849037170410156, 25.648727416992188, -166.0057830810547, 154.60302734375, 280.9353942871094, -8.410179138183594, 14.345380783081055, 75.97571563720703, 208.39356994628906, 311.915283203125, 187.60552978515625, -65.35517120361328, 236.81494140625, -9.13336181640625, 260.65887451171875, 176.50575256347656, -26.353775024414062, 41.04345703125, 87.93631744384766, 40.750274658203125, 9.301162719726562, -18.31867218017578, 54.159217834472656, -186.02523803710938, 124.55934143066406, 325.6177062988281, 233.59657287597656, 106.75953674316406, 28.120101928710938, 52.009395599365234, -97.86082458496094, 33.623374938964844, 86.09141540527344, -4.430095672607422, 13.825037002563477, 109.90182495117188, -21.10851287841797, 191.221435546875, 147.02381896972656, -9.83810806274414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000575.npy"}
|
|
{"epoch": 0.8692365835222978, "step": 576, "batch_size": 64, "mean": 90.69012451171875, "std": 118.3714599609375, "min": -241.9419403076172, "p10": -47.734458923339844, "median": 79.53715515136719, "p90": 230.85740356445316, "max": 354.22772216796875, "pos_frac": 0.765625, "sample": [15.430328369140625, 178.02642822265625, -16.735755920410156, 77.36016845703125, 272.51617431640625, 36.15538024902344, 201.4862823486328, 12.868310928344727, 81.71414184570312, 148.600830078125, -65.94939422607422, -92.51304626464844, 208.3519287109375, -59.5523681640625, 112.17093658447266, -152.41954040527344, -45.741355895996094, 182.0624542236328, 354.22772216796875, 235.60150146484375, 203.05186462402344, 237.31393432617188, 136.62393188476562, 8.482892990112305, 150.7779541015625, 99.57740783691406, 13.785554885864258, -68.55497741699219, 295.9205017089844, -48.588645935058594, 47.85675048828125, 49.28699493408203, 83.26930236816406, 195.08135986328125, 75.9322509765625, 61.90415954589844, 272.5400390625, -12.441024780273438, -4.849578857421875, 15.426559448242188, 214.52159118652344, -27.213603973388672, 186.5369415283203, 25.471311569213867, 219.60264587402344, 4.3610382080078125, 217.71400451660156, 202.27859497070312, -5.553762435913086, 219.787841796875, 215.56614685058594, -241.9419403076172, 247.91123962402344, 179.23191833496094, 82.1836929321289, 206.57135009765625, -2.6131534576416016, -3.2713851928710938, 27.016761779785156, 100.55118560791016, 0.6110134124755859, 57.862266540527344, 43.05675506591797, 137.86729431152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000576.npy"}
|
|
{"epoch": 0.8707482993197279, "step": 577, "batch_size": 64, "mean": 59.095054626464844, "std": 114.10347747802734, "min": -264.23223876953125, "p10": -71.8713493347168, "median": 47.250288009643555, "p90": 202.78093719482422, "max": 295.5339660644531, "pos_frac": 0.75, "sample": [19.799354553222656, 13.489089965820312, -16.245933532714844, 43.712745666503906, 9.45904541015625, 160.50796508789062, 17.978641510009766, -17.58147621154785, 248.7391357421875, -5.332725524902344, -68.98831939697266, 203.88687133789062, 169.4167938232422, -55.4700927734375, -106.30079650878906, 213.23028564453125, 237.13638305664062, 184.3707275390625, -158.83229064941406, 138.27435302734375, 216.4208526611328, 94.03058624267578, 14.46950912475586, 167.05104064941406, 33.759002685546875, -77.91791534423828, -132.82070922851562, 139.01376342773438, 163.1846160888672, 142.452880859375, -34.63714599609375, 183.0561065673828, -43.59271240234375, 49.041229248046875, 56.698333740234375, 19.266448974609375, 45.459346771240234, 18.347442626953125, 8.611106872558594, 10.543529510498047, 49.25603485107422, 20.2598876953125, 216.12083435058594, 4.404958724975586, 200.20042419433594, 295.5339660644531, -241.04244995117188, 146.79544067382812, 55.56095886230469, 61.798065185546875, 33.93690490722656, 146.56842041015625, 180.126220703125, 190.02413940429688, 4.3417205810546875, 60.37819290161133, -264.23223876953125, -1.6801166534423828, -73.10693359375, 138.02554321289062, -1.6950759887695312, 104.13548278808594, 98.44465637207031, 54.24137878417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000577.npy"}
|
|
{"epoch": 0.872260015117158, "step": 578, "batch_size": 64, "mean": 60.51626968383789, "std": 122.33454132080078, "min": -233.31944274902344, "p10": -88.2426643371582, "median": 39.77635192871094, "p90": 200.39102478027345, "max": 415.71807861328125, "pos_frac": 0.671875, "sample": [134.33216857910156, -2.7181549072265625, 93.91615295410156, 57.80859375, 82.24779510498047, -87.7739486694336, 100.53324890136719, 22.160552978515625, 172.12811279296875, 1.6318988800048828, 16.989456176757812, 415.71807861328125, 8.819999694824219, 146.61228942871094, 187.024658203125, -233.31944274902344, 163.94024658203125, 196.96878051757812, 166.15493774414062, 104.1490478515625, 176.1807861328125, 115.08296203613281, 0.6585025787353516, -5.754199981689453, 13.615324020385742, 80.0478286743164, 235.20770263671875, 2.1880035400390625, 102.84358215332031, -5.988746643066406, 286.68341064453125, 193.9820556640625, 201.39108276367188, -99.06015014648438, -8.029701232910156, 37.70448303222656, -6.449094772338867, 107.87391662597656, 68.32099151611328, 91.24761962890625, 198.05755615234375, -174.6617431640625, 41.84822082519531, -40.02496337890625, -26.935104370117188, 15.82354736328125, -54.109092712402344, 185.73353576660156, -108.98431396484375, 224.3605499267578, 113.68570709228516, -104.26947784423828, 324.877685546875, -18.581453323364258, -67.3316650390625, -88.44354248046875, 29.411056518554688, 81.63175201416016, -40.552085876464844, 11.22163200378418, -27.006954193115234, -6.012763977050781, -163.00726318359375, 231.2394561767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000578.npy"}
|
|
{"epoch": 0.873771730914588, "step": 579, "batch_size": 64, "mean": 86.57614135742188, "std": 101.84280395507812, "min": -194.95985412597656, "p10": -48.493403625488256, "median": 90.96808624267578, "p90": 206.25039672851562, "max": 314.53173828125, "pos_frac": 0.796875, "sample": [128.90744018554688, 97.38117980957031, 73.71395111083984, 42.83814239501953, 63.44451141357422, -150.77316284179688, 218.5111541748047, 92.29289245605469, 145.32635498046875, 314.53173828125, 110.73785400390625, 182.4607696533203, 148.32144165039062, 206.3460693359375, 142.6580047607422, 20.202369689941406, 180.43385314941406, -57.85380554199219, -9.510459899902344, 166.1586151123047, 240.0897216796875, -80.33589172363281, 192.2118377685547, -5.070592880249023, 80.00172424316406, 171.1327667236328, 213.7248992919922, 16.675125122070312, 200.9108428955078, 193.06822204589844, -65.43810272216797, 21.12347412109375, 150.09146118164062, -2.232633590698242, 127.21207427978516, 131.57521057128906, 42.562835693359375, -14.551511764526367, -61.43074035644531, 191.91136169433594, 16.201873779296875, 5.372278213500977, -13.79443359375, 24.949180603027344, 83.10067749023438, 4.422517776489258, -26.6524658203125, -194.95985412597656, 206.02716064453125, 162.74099731445312, 108.16473388671875, 218.98388671875, 78.44070434570312, 230.99632263183594, 141.16835021972656, 5.925985336303711, 153.9308319091797, 152.216552734375, 42.43854522705078, 89.64328002929688, -73.98179626464844, 28.74386978149414, 156.4193115234375, 81.01359558105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000579.npy"}
|
|
{"epoch": 0.8752834467120182, "step": 580, "batch_size": 64, "mean": 74.09309387207031, "std": 127.99964141845703, "min": -208.28054809570312, "p10": -55.95884628295898, "median": 61.8408088684082, "p90": 244.1212844848633, "max": 379.76605224609375, "pos_frac": 0.671875, "sample": [-93.23652648925781, -52.77555847167969, -38.612823486328125, 13.424482345581055, -43.449127197265625, 13.15086555480957, 186.76661682128906, 89.31062316894531, 89.324951171875, 39.25959014892578, 236.99156188964844, 85.72270965576172, 74.76222229003906, -12.533382415771484, -40.67633819580078, 74.46196746826172, 176.39097595214844, 137.81912231445312, 4.246122360229492, 44.92390441894531, -3.8617820739746094, 140.98240661621094, 268.065673828125, 61.227256774902344, 150.98387145996094, -42.51200866699219, 315.98089599609375, -8.722320556640625, -12.76228141784668, 247.1768798828125, 233.971923828125, 206.24319458007812, -8.005592346191406, 150.66461181640625, -41.77766418457031, 193.4595947265625, -17.078380584716797, -208.28054809570312, 11.661922454833984, 379.76605224609375, -188.49778747558594, 44.483428955078125, -80.13174438476562, 144.28195190429688, 314.64959716796875, -15.225648880004883, 59.304603576660156, 69.93575286865234, 279.3568420410156, 157.55441284179688, 155.051025390625, 154.99090576171875, -57.32311248779297, -156.3147430419922, 12.179447174072266, -45.888427734375, 62.45436096191406, 149.4925994873047, 133.506103515625, 350.3404235839844, 47.16459655761719, -124.27339172363281, 123.54866027832031, 148.8623504638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000580.npy"}
|
|
{"epoch": 0.8767951625094482, "step": 581, "batch_size": 64, "mean": 69.28839874267578, "std": 122.08566284179688, "min": -159.46902465820312, "p10": -98.24797210693359, "median": 47.52997970581055, "p90": 207.87549591064453, "max": 450.7839050292969, "pos_frac": 0.703125, "sample": [141.38291931152344, 211.39691162109375, 44.28807830810547, 2.8331832885742188, 86.95356750488281, -2.5429534912109375, 13.156600952148438, -97.0645751953125, 108.22073364257812, 105.78595733642578, -114.446044921875, 17.701213836669922, 10.509122848510742, 25.22662925720215, 173.95046997070312, -90.70048522949219, 172.78012084960938, 99.96666717529297, 139.2559356689453, 30.53358268737793, 187.82887268066406, 32.375091552734375, 206.93838500976562, -35.5543212890625, 256.33154296875, 208.33203125, 186.37489318847656, -48.83879089355469, 47.798500061035156, 84.44679260253906, -59.40203857421875, 190.23672485351562, 63.26542282104492, 163.13632202148438, 142.46212768554688, 126.06906127929688, 47.26145935058594, -159.46902465820312, 279.1942138671875, 263.98895263671875, -51.55479431152344, -158.43881225585938, 132.459228515625, 43.33534240722656, 124.498779296875, -98.75514221191406, 450.7839050292969, -5.5540771484375, -18.692134857177734, -124.53073120117188, 198.26580810546875, -37.104270935058594, 124.40638732910156, 191.11532592773438, 21.616230010986328, 169.3391876220703, -3.0349578857421875, -116.24356079101562, -116.8741455078125, 32.634220123291016, 43.44390869140625, 190.13540649414062, 208.27711486816406, -27.03453826904297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000581.npy"}
|
|
{"epoch": 0.8783068783068783, "step": 582, "batch_size": 64, "mean": 65.6463394165039, "std": 112.81784057617188, "min": -196.03927612304688, "p10": -61.12399902343749, "median": 54.007856369018555, "p90": 200.93421478271486, "max": 340.221435546875, "pos_frac": 0.71875, "sample": [113.68573760986328, 185.52456665039062, 58.80803680419922, 103.830322265625, 44.76759338378906, 42.567134857177734, 241.4183807373047, 55.183109283447266, 186.23300170898438, 148.94390869140625, 92.46453857421875, 52.832603454589844, 287.87646484375, 340.221435546875, -52.49969482421875, 203.07110595703125, -64.82012939453125, -6.3047637939453125, 123.24235534667969, 125.80138397216797, -6.018821716308594, 141.77053833007812, 15.119800567626953, 237.18795776367188, 179.42120361328125, 260.4459228515625, -15.837728500366211, 1.643472671508789, -140.70742797851562, -187.5312957763672, -8.078603744506836, 6.387603759765625, 113.79943084716797, -29.81121826171875, -25.880178451538086, 173.30592346191406, 153.27630615234375, 59.945858001708984, 217.78106689453125, 7.361660003662109, 96.91970825195312, 115.19410705566406, 83.11302185058594, 9.753267288208008, 7.113126754760742, -12.757308959960938, -68.60230255126953, 171.14923095703125, 9.593040466308594, -196.03927612304688, 3.330770492553711, 24.823394775390625, 0.7424736022949219, -0.4215545654296875, -116.29653930664062, 65.77064514160156, 173.50006103515625, -42.823890686035156, 186.13739013671875, -36.131317138671875, -93.37911987304688, 13.89776611328125, 174.40237426757812, 195.94813537597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000582.npy"}
|
|
{"epoch": 0.8798185941043084, "step": 583, "batch_size": 64, "mean": 83.887939453125, "std": 125.07829284667969, "min": -230.25364685058594, "p10": -47.25640487670898, "median": 88.61177825927734, "p90": 232.49913024902344, "max": 322.97381591796875, "pos_frac": 0.6875, "sample": [92.29296875, -24.041229248046875, -8.764518737792969, -2.682567596435547, 179.37899780273438, 84.93058776855469, -16.83165740966797, -17.28155517578125, -156.14453125, -10.931716918945312, 218.0639190673828, -13.343868255615234, -60.65145492553711, 168.3517303466797, 105.87753295898438, -51.11259460449219, 67.88079833984375, 2.3358631134033203, 213.87857055664062, 189.0669708251953, 33.98091125488281, -165.28286743164062, 185.1327362060547, 11.045372009277344, 167.7722930908203, 142.19625854492188, 305.7401428222656, -230.25364685058594, 31.765613555908203, 196.00051879882812, 42.29081344604492, 56.01197052001953, -23.510482788085938, 27.566482543945312, 251.49954223632812, 239.3544158935547, -12.127593994140625, -38.258628845214844, 222.46392822265625, 179.4629364013672, 109.06758117675781, 235.20010375976562, -206.09524536132812, 322.97381591796875, 2.735483169555664, 189.0242919921875, 155.50689697265625, 162.57049560546875, 143.10107421875, -13.829093933105469, 283.7845458984375, 107.18692779541016, 206.82521057128906, 17.094072341918945, 128.32406616210938, 230.5123291015625, -89.46995544433594, 115.33785247802734, -1.5809326171875, 194.89028930664062, 222.90855407714844, 233.35061645507812, 43.16468811035156, -6.878196716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000583.npy"}
|
|
{"epoch": 0.8813303099017384, "step": 584, "batch_size": 64, "mean": 70.59056091308594, "std": 131.31869506835938, "min": -196.6083221435547, "p10": -79.7304084777832, "median": 38.76611137390137, "p90": 257.48529663085947, "max": 371.8002624511719, "pos_frac": 0.640625, "sample": [219.40957641601562, 282.1044006347656, 41.97954177856445, -4.488929748535156, 76.37496948242188, -3.173585891723633, 276.51153564453125, 233.83197021484375, 171.87054443359375, 197.9391326904297, 109.76631164550781, -2.559297561645508, -16.579439163208008, -81.0483169555664, -159.15878295898438, 117.6210708618164, 124.38663482666016, 210.64077758789062, 190.9063720703125, -9.35623550415039, 192.3955078125, 267.6224365234375, -2.9673385620117188, 184.18946838378906, 371.8002624511719, 184.9955596923828, 109.75177764892578, -14.212356567382812, 51.08882141113281, 24.3480224609375, 199.88714599609375, -64.37660217285156, 149.0911407470703, 20.167022705078125, -135.89129638671875, 196.78704833984375, -16.205886840820312, -18.281890869140625, -21.86913299560547, -52.88655090332031, 4.357646942138672, 104.18412017822266, -76.65528869628906, 35.55268096923828, 101.17719268798828, 311.694091796875, -2.3714046478271484, 69.90733337402344, 86.60872650146484, -131.69937133789062, -196.6083221435547, 33.87809371948242, -49.18694305419922, 183.91571044921875, 1.35040283203125, 313.8721008300781, 272.0483093261719, 12.713655471801758, 12.960752487182617, 34.95597839355469, -22.86707305908203, 132.21176147460938, -133.2965850830078, -183.3192596435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000584.npy"}
|
|
{"epoch": 0.8828420256991686, "step": 585, "batch_size": 64, "mean": 51.45219802856445, "std": 112.80396270751953, "min": -259.77105712890625, "p10": -46.385207366943355, "median": 40.0779914855957, "p90": 198.93442687988284, "max": 379.4570617675781, "pos_frac": 0.703125, "sample": [27.813373565673828, 165.2118377685547, 100.02236938476562, -5.0217132568359375, 61.06586456298828, 68.2523193359375, 62.20880126953125, 221.6588134765625, -14.383583068847656, 8.831794738769531, -186.2385711669922, 26.453372955322266, 162.22604370117188, 38.34495544433594, -1.0262107849121094, 379.4570617675781, -148.29718017578125, -5.466423034667969, 77.22651672363281, 119.57627868652344, 119.779052734375, 5.98402214050293, -8.14314079284668, -141.84593200683594, -11.23084831237793, 4.632467269897461, -47.989501953125, 9.635185241699219, 250.36094665527344, 155.39602661132812, 9.701709747314453, 22.184539794921875, 21.334156036376953, -8.849010467529297, -232.23240661621094, 6.77618408203125, 12.372589111328125, 162.99383544921875, 210.66964721679688, 126.33467102050781, 45.65700149536133, 166.86563110351562, 85.44950866699219, -42.64185333251953, -259.77105712890625, 210.29397583007812, 214.47398376464844, 199.83753967285156, -59.225563049316406, -21.06096649169922, 43.31831359863281, 41.81102752685547, -37.666656494140625, 3.8560352325439453, 62.56951904296875, 111.9740219116211, 74.3486328125, 196.82716369628906, 196.67108154296875, 104.69329833984375, 82.5088882446289, -1.2834014892578125, 71.20284271240234, -23.548011779785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000585.npy"}
|
|
{"epoch": 0.8843537414965986, "step": 586, "batch_size": 64, "mean": 69.48262023925781, "std": 86.9936294555664, "min": -102.50166320800781, "p10": -15.689715003967283, "median": 45.20894241333008, "p90": 198.73119354248047, "max": 289.518310546875, "pos_frac": 0.78125, "sample": [74.055908203125, 23.84278106689453, 170.20904541015625, 43.592323303222656, 30.099761962890625, 137.16586303710938, 24.066709518432617, 8.537933349609375, -13.021003723144531, -16.83344841003418, 141.27452087402344, -19.63359832763672, 3.3825225830078125, 247.05294799804688, 211.60284423828125, -7.790781021118164, 125.019287109375, 114.48722839355469, 78.25601196289062, 134.86123657226562, 23.93563461303711, 68.77131652832031, 24.160114288330078, 195.94239807128906, 31.32928466796875, -3.940420150756836, 15.954719543457031, 18.978424072265625, 166.42349243164062, 26.12936019897461, 173.612548828125, 171.7156219482422, 70.3296127319336, 61.15653610229492, 256.06854248046875, -0.5254364013671875, -102.50166320800781, 238.5277099609375, -3.8644676208496094, 7.577238082885742, 2.418947219848633, 20.33843994140625, -42.465545654296875, 138.4475860595703, 86.59153747558594, -8.888912200927734, 1.6955795288085938, 73.00032806396484, -5.2236328125, 125.03221130371094, 289.518310546875, -36.29118347167969, 15.205245971679688, 46.8255615234375, 199.9263916015625, 51.883399963378906, -60.905052185058594, 203.27294921875, 50.90293884277344, -36.15953063964844, 183.34814453125, 10.652690887451172, 103.94818115234375, 83.80476379394531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000586.npy"}
|
|
{"epoch": 0.8858654572940288, "step": 587, "batch_size": 64, "mean": 67.91419982910156, "std": 147.01974487304688, "min": -592.3355712890625, "p10": -83.76937866210936, "median": 73.691162109375, "p90": 216.44420776367193, "max": 358.5184020996094, "pos_frac": 0.78125, "sample": [32.94508743286133, -135.9304656982422, -4.130531311035156, 151.074462890625, 169.1881103515625, -158.68988037109375, 46.33115005493164, -60.764801025390625, 184.10385131835938, 166.60260009765625, 20.317890167236328, 77.22509765625, -87.01423645019531, 3.5444183349609375, -6.569091796875, 95.0884017944336, 164.03634643554688, -5.344982147216797, -96.29804992675781, 232.03761291503906, 221.4481964111328, 154.03787231445312, 0.142120361328125, 204.7682342529297, 120.74661254882812, 26.31812286376953, 221.56011962890625, 9.876144409179688, -592.3355712890625, 23.731388092041016, 127.05525970458984, 154.39585876464844, 113.25279235839844, 142.6708984375, 10.902400970458984, 70.1572265625, 107.81025695800781, -317.121826171875, -73.93344116210938, 358.5184020996094, 170.3177032470703, 7.324253082275391, 198.53097534179688, 146.8405303955078, 1.4758281707763672, 167.82904052734375, 14.509017944335938, 188.73828125, 183.63674926757812, 123.93400573730469, 234.94407653808594, -99.73158264160156, 168.60964965820312, 15.249923706054688, 59.9615478515625, 281.5087890625, 22.38024139404297, -18.64117431640625, -76.19804382324219, 58.981956481933594, 83.50275421142578, 333.3471374511719, 196.58267211914062, 11.120353698730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000587.npy"}
|
|
{"epoch": 0.8873771730914588, "step": 588, "batch_size": 64, "mean": 78.0347671508789, "std": 131.193115234375, "min": -236.71783447265625, "p10": -96.3377914428711, "median": 65.48341369628906, "p90": 223.01445312500002, "max": 391.406982421875, "pos_frac": 0.78125, "sample": [72.56318664550781, -104.64404296875, -59.401344299316406, 216.67037963867188, 20.699508666992188, 152.57305908203125, 184.9327392578125, 35.01988220214844, 223.44711303710938, 148.7672119140625, 376.76153564453125, 53.26380920410156, 12.42750358581543, 152.2550811767578, 59.687110900878906, 137.39198303222656, 222.00491333007812, 368.5271911621094, 23.707181930541992, 391.406982421875, 99.78055572509766, 175.8629608154297, -31.14718246459961, 194.48133850097656, 2.328195571899414, 69.25103759765625, 27.218048095703125, -12.575874328613281, 5.638099670410156, -117.56845092773438, 4.911386489868164, 113.19192504882812, -149.18505859375, 153.73838806152344, 137.65872192382812, 251.51885986328125, 24.709184646606445, 134.52740478515625, 88.07589721679688, -98.48662567138672, -36.684837341308594, -49.978240966796875, 90.13724517822266, 231.1143341064453, 152.7045440673828, 62.92982482910156, 184.50979614257812, -10.281597137451172, 38.30854797363281, 184.18833923339844, 41.26176452636719, -194.58468627929688, 55.22209167480469, 47.85785675048828, 140.2163543701172, 63.56083679199219, -91.32384490966797, 25.10387420654297, 67.40599060058594, -194.04922485351562, 182.55599975585938, 268.2944030761719, 210.48382568359375, -236.71783447265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000588.npy"}
|
|
{"epoch": 0.8888888888888888, "step": 589, "batch_size": 64, "mean": 78.2611312866211, "std": 96.94042205810547, "min": -144.99627685546875, "p10": -41.7246223449707, "median": 82.95345306396484, "p90": 191.52336425781252, "max": 326.26220703125, "pos_frac": 0.8125, "sample": [45.59088134765625, 37.470252990722656, -135.19712829589844, 203.63275146484375, 109.68399810791016, 0.28086090087890625, -87.9880142211914, -12.779144287109375, 50.16748809814453, -7.7663726806640625, -55.99864196777344, 35.48835754394531, 326.26220703125, 1.9904670715332031, 183.7039031982422, 81.6871109008789, 99.7751235961914, 150.1266326904297, 152.36904907226562, 179.78009033203125, 88.29400634765625, 106.12985229492188, 185.17086791992188, -112.04965209960938, 252.8942413330078, 128.8663330078125, -45.10033416748047, 120.59859466552734, 151.9322967529297, 157.86871337890625, 49.68836212158203, 0.9045448303222656, 192.23123168945312, 85.86689758300781, -33.84796142578125, 91.7300033569336, 85.35143280029297, 40.592796325683594, 84.21979522705078, 198.50733947753906, 46.52415466308594, 80.01588439941406, 205.11532592773438, 122.12793731689453, 61.37553405761719, -74.59347534179688, 156.98709106445312, -23.684986114501953, -5.843692779541016, 33.3388671875, -144.99627685546875, 183.41094970703125, 173.95252990722656, 20.653907775878906, 64.23291778564453, 189.87167358398438, 123.09577941894531, 155.20115661621094, 28.430015563964844, 33.341087341308594, 101.48391723632812, 31.347759246826172, 257.57342529296875, 1.621927261352539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000589.npy"}
|
|
{"epoch": 0.890400604686319, "step": 590, "batch_size": 64, "mean": 72.46513366699219, "std": 107.32760620117188, "min": -191.5050811767578, "p10": -52.17394409179687, "median": 69.54734420776367, "p90": 208.2011672973633, "max": 268.12152099609375, "pos_frac": 0.71875, "sample": [122.2117691040039, 44.54248809814453, 135.76895141601562, 6.370994567871094, 198.16632080078125, 182.28738403320312, 130.09707641601562, -186.7623291015625, 75.01155090332031, -28.747276306152344, 46.35768127441406, 0.8912849426269531, 12.483192443847656, 37.77565002441406, -191.5050811767578, 190.69918823242188, 173.7222137451172, 44.434173583984375, -17.344066619873047, -0.6360626220703125, -61.94627380371094, 118.44866943359375, 9.063056945800781, 211.47671508789062, 205.78005981445312, -24.86603546142578, -46.4609375, 113.41523742675781, 268.12152099609375, 95.65509033203125, -38.263832092285156, 265.6563415527344, 14.308244705200195, -12.365478515625, 92.85345458984375, 74.38430786132812, 86.38422393798828, 15.911514282226562, 74.22456359863281, 8.406967163085938, 224.12344360351562, 125.84968566894531, 262.1623840332031, 199.00823974609375, 191.2105712890625, 119.86027526855469, 94.6253662109375, 232.18092346191406, -64.2874755859375, 27.289718627929688, -63.51427459716797, 209.23878479003906, 137.46261596679688, -33.85606384277344, -0.2937774658203125, 196.26083374023438, -2.5992298126220703, 64.87012481689453, 197.8977508544922, -4.133815765380859, 48.69278335571289, -54.62237548828125, 191.55348205566406, -107.22444152832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000590.npy"}
|
|
{"epoch": 0.891912320483749, "step": 591, "batch_size": 64, "mean": 91.94720458984375, "std": 141.78790283203125, "min": -213.2427520751953, "p10": -83.64671783447264, "median": 78.25871276855469, "p90": 240.82086944580078, "max": 503.00543212890625, "pos_frac": 0.765625, "sample": [3.130067825317383, 209.87347412109375, 181.90541076660156, 187.4720916748047, 195.05816650390625, 47.561676025390625, 201.7196044921875, 110.03492736816406, 55.94240951538086, 95.87644958496094, 63.55555725097656, -153.68753051757812, 219.6167449951172, -96.2231674194336, 110.7742691040039, -15.066146850585938, 23.194671630859375, 55.77117919921875, 107.1180419921875, 4.373790740966797, 242.19107055664062, 20.51152992248535, 363.1421203613281, -186.11448669433594, 191.6599884033203, -178.64247131347656, 222.5533447265625, 110.71484375, -8.610748291015625, 6.506561279296875, -213.2427520751953, -181.44810485839844, 301.1616516113281, 237.6237335205078, -4.127555847167969, 218.9197235107422, -8.78818130493164, 91.2158203125, 196.30014038085938, 20.846576690673828, 9.215057373046875, -9.396453857421875, 65.30160522460938, 172.18031311035156, 1.0361347198486328, 277.5076904296875, -16.097177505493164, -91.97911071777344, -8.986030578613281, 503.00543212890625, -64.2044677734375, 14.100532531738281, 291.03460693359375, 32.15081024169922, 12.900508880615234, 216.54006958007812, 226.5430908203125, 153.38621520996094, 176.45494079589844, 130.5636444091797, 229.8661346435547, 302.591552734375, 3.816190719604492, 206.71556091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000591.npy"}
|
|
{"epoch": 0.8934240362811792, "step": 592, "batch_size": 64, "mean": 70.43186950683594, "std": 112.14576721191406, "min": -122.56804656982422, "p10": -54.722562408447246, "median": 32.87695121765137, "p90": 214.3179702758789, "max": 406.1944274902344, "pos_frac": 0.734375, "sample": [78.57310485839844, 230.5767822265625, 210.9785614013672, 17.64798927307129, 193.6796875, 197.2642364501953, 251.4080810546875, 28.037715911865234, 8.353202819824219, 34.58428192138672, -38.330299377441406, 134.9510498046875, 52.35408020019531, 25.774620056152344, -66.937744140625, 96.61913299560547, 200.97434997558594, 131.17965698242188, 10.482643127441406, 30.16860580444336, -101.27595520019531, 196.13311767578125, 116.85853576660156, 284.8085021972656, 19.13046646118164, 169.0313262939453, 121.7225112915039, 189.22579956054688, 215.7491455078125, 7.7067413330078125, -111.18350982666016, 146.03292846679688, -26.925689697265625, 172.58412170410156, 39.22740936279297, 87.01445007324219, -118.47145080566406, 125.86630249023438, -122.56804656982422, -61.74781799316406, -6.709201812744141, -7.852094650268555, 36.035377502441406, -21.872467041015625, -81.13482666015625, 2.309906005859375, 53.19670104980469, 322.67059326171875, 247.28109741210938, 11.84808349609375, 406.1944274902344, -0.6498489379882812, 31.169620513916016, -0.18531036376953125, 13.902679443359375, 6.16596794128418, -14.03485107421875, -13.234001159667969, -35.68014144897461, 8.916156768798828, 60.17778396606445, 29.292747497558594, 128.94204711914062, 153.63067626953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000592.npy"}
|
|
{"epoch": 0.8949357520786092, "step": 593, "batch_size": 64, "mean": 74.90463256835938, "std": 114.67764282226562, "min": -213.51950073242188, "p10": -71.01905670166012, "median": 64.82245635986328, "p90": 221.57353973388675, "max": 278.91290283203125, "pos_frac": 0.796875, "sample": [213.6101531982422, 40.08686447143555, 213.80465698242188, 16.446014404296875, 184.59765625, -37.63226318359375, 106.5169448852539, -213.51950073242188, 224.90306091308594, 48.127166748046875, 203.77374267578125, 33.69765090942383, 278.91290283203125, 182.95384216308594, 64.79533386230469, 13.67245864868164, 130.27499389648438, 127.99813842773438, 190.93362426757812, 140.00634765625, 25.713552474975586, 33.06884765625, 244.44107055664062, 231.77993774414062, -26.792572021484375, -6.702903747558594, 10.728212356567383, 183.65060424804688, 126.43811798095703, -129.80404663085938, 78.41536712646484, -85.32768249511719, 30.482009887695312, 64.84957885742188, 109.25978088378906, 182.39456176757812, 0.7048721313476562, -6.817626953125, 138.73617553710938, -158.33462524414062, -151.0519561767578, 0.19541168212890625, 236.14695739746094, -145.41812133789062, 175.82293701171875, 169.1781005859375, 179.6848907470703, 11.796815872192383, -18.602134704589844, 258.22186279296875, 86.72201538085938, -14.540155410766602, 207.7508544921875, 116.85243225097656, 88.14730072021484, -113.68192291259766, 12.159090042114258, 229.8474578857422, 1.4739036560058594, 15.138397216796875, 24.713592529296875, 40.438026428222656, 17.2388916015625, 154.8188934326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000593.npy"}
|
|
{"epoch": 0.8964474678760394, "step": 594, "batch_size": 64, "mean": 66.18623352050781, "std": 101.03882598876953, "min": -274.03973388671875, "p10": -59.315158081054676, "median": 60.4705924987793, "p90": 190.19012908935548, "max": 246.87213134765625, "pos_frac": 0.765625, "sample": [72.82803344726562, 180.6765899658203, -90.94181823730469, -15.409385681152344, 78.41224670410156, 141.3922119140625, 21.79145050048828, 56.612335205078125, 94.89280700683594, 8.494178771972656, -81.86943054199219, 32.0850944519043, -8.162521362304688, 172.3408966064453, 26.8659610748291, 7.808071136474609, -77.2303466796875, 3.5819664001464844, 79.20437622070312, 10.977188110351562, 208.65585327148438, 101.56806945800781, 1.211660385131836, 190.34864807128906, 209.52870178222656, 126.79499816894531, 192.65408325195312, -45.59027862548828, 184.59548950195312, -2.5758609771728516, 167.78091430664062, 158.67306518554688, 246.87213134765625, 167.55267333984375, 77.32867431640625, -50.20097351074219, -66.97547149658203, 127.47537231445312, 145.84454345703125, -63.22123718261719, 188.14923095703125, 14.00291633605957, 63.405517578125, 184.03378295898438, 2.0591812133789062, 18.858861923217773, 57.535667419433594, 199.27813720703125, 0.8183155059814453, 189.82025146484375, 161.49774169921875, -274.03973388671875, 111.5130615234375, 246.023681640625, 38.688568115234375, 120.58006286621094, 0.6250457763671875, 54.14966583251953, 116.54310607910156, -34.255897521972656, 104.57841491699219, -28.118453979492188, -26.833274841308594, -65.66624450683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000594.npy"}
|
|
{"epoch": 0.8979591836734694, "step": 595, "batch_size": 64, "mean": 86.800048828125, "std": 130.23133850097656, "min": -214.1940460205078, "p10": -37.90769653320312, "median": 77.47121810913086, "p90": 235.11702728271484, "max": 434.9447937011719, "pos_frac": 0.75, "sample": [-3.8785171508789062, 77.5704116821289, 434.9447937011719, 110.78340148925781, -9.302894592285156, 7.86656379699707, 81.9166488647461, 32.18680953979492, 12.413955688476562, 203.86184692382812, 258.1731262207031, 18.311309814453125, 90.33831787109375, -153.20799255371094, 206.52723693847656, -1.7373428344726562, -1.5641117095947266, 9.67974853515625, 212.91419982910156, 52.47547149658203, 20.867454528808594, 123.81025695800781, 249.5297393798828, 11.804100036621094, 334.55255126953125, 61.05181121826172, 120.09978485107422, 90.11849975585938, 234.8765411376953, 137.26885986328125, -2.6122970581054688, -123.50880432128906, 67.25593566894531, -39.487640380859375, 194.48681640625, 190.70863342285156, 272.69342041015625, -194.56405639648438, -203.70787048339844, 232.66619873046875, 192.172607421875, 206.7172393798828, 173.0628662109375, 40.144317626953125, -98.84658813476562, 212.8542938232422, 95.34576416015625, 158.66983032226562, -34.221160888671875, 25.173084259033203, -7.568670272827148, 250.01736450195312, 158.1439208984375, 40.402442932128906, -32.34557342529297, 39.73579406738281, 226.7443084716797, 189.860107421875, 77.37202453613281, 7.1326904296875, -0.5782718658447266, -214.1940460205078, 196.0056915283203, 235.2200927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000595.npy"}
|
|
{"epoch": 0.8994708994708994, "step": 596, "batch_size": 64, "mean": 34.055152893066406, "std": 104.47257232666016, "min": -195.46133422851562, "p10": -85.95968856811523, "median": 15.840761184692383, "p90": 191.43274383544923, "max": 271.9449462890625, "pos_frac": 0.5625, "sample": [-93.00234985351562, -18.82433319091797, 219.0397491455078, 17.41693878173828, 35.889549255371094, -91.70487213134766, -12.074031829833984, 0.29370880126953125, -67.67780303955078, 205.01260375976562, -117.53373718261719, 58.25844192504883, -102.8548583984375, -34.0980224609375, 271.9449462890625, 190.03416442871094, 170.2924041748047, 104.85060119628906, -137.09414672851562, 61.884300231933594, -50.126708984375, 192.03213500976562, 64.157958984375, 128.42587280273438, 202.79635620117188, 179.35728454589844, 115.1438217163086, -21.341588973999023, 59.07488250732422, -155.17333984375, -72.55426025390625, 95.45088195800781, 27.488006591796875, 1.142496109008789, 99.53931427001953, 5.254322052001953, 59.31085968017578, 249.0935516357422, 14.264583587646484, -15.913352966308594, -33.50486755371094, -14.472137451171875, -22.426395416259766, -195.46133422851562, -31.554733276367188, -12.982830047607422, 27.871307373046875, -20.299304962158203, 214.38323974609375, 87.91802978515625, 166.84521484375, -3.795318603515625, -29.50452423095703, -65.90691375732422, -44.50330352783203, 174.03700256347656, 25.411415100097656, 133.30860900878906, -36.49543762207031, 38.84507751464844, 25.790843963623047, 56.19435119628906, -33.87371826171875, -63.770896911621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000596.npy"}
|
|
{"epoch": 0.9009826152683296, "step": 597, "batch_size": 64, "mean": 67.43927001953125, "std": 103.57025909423828, "min": -157.8306884765625, "p10": -48.80931243896483, "median": 39.55415153503418, "p90": 205.56270141601564, "max": 268.40814208984375, "pos_frac": 0.734375, "sample": [-8.143817901611328, -33.46045684814453, -110.107421875, -23.679214477539062, 216.7122802734375, -24.9417724609375, 41.23076248168945, -85.17610931396484, -34.792724609375, 209.57382202148438, 8.536479949951172, 125.137451171875, 1.4913043975830078, 201.14907836914062, -54.81642150878906, 9.818700790405273, 190.90057373046875, 220.12998962402344, 190.844970703125, -9.440027236938477, 102.82093048095703, 15.388137817382812, -100.63761901855469, 35.48082733154297, 194.5220947265625, 178.51873779296875, 6.284711837768555, -78.20223999023438, 188.42547607421875, 8.01249885559082, 159.38853454589844, 50.6055908203125, 209.09036254882812, 37.877540588378906, 90.61762237548828, 108.35005187988281, -13.298538208007812, 108.44796752929688, 27.334230422973633, 135.20364379882812, 1.7587471008300781, -114.35562133789062, 13.668731689453125, -12.188100814819336, 3.327322006225586, -7.922208786010742, 12.318513870239258, 61.703521728515625, 177.04290771484375, 108.57869720458984, 207.45425415039062, 5.290927886962891, 127.99793243408203, -11.311691284179688, -157.8306884765625, 135.73226928710938, 0.6488609313964844, 157.1027374267578, 268.40814208984375, 260.3030700683594, 73.08445739746094, 188.7394256591797, 121.30402374267578, 200.05923461914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000597.npy"}
|
|
{"epoch": 0.9024943310657596, "step": 598, "batch_size": 64, "mean": 42.597984313964844, "std": 110.22261810302734, "min": -197.776611328125, "p10": -79.82489395141602, "median": 20.95485782623291, "p90": 201.21783294677735, "max": 345.7210998535156, "pos_frac": 0.65625, "sample": [-5.024209976196289, -130.7550506591797, 14.447013854980469, 45.44792938232422, 83.83007049560547, 4.037837982177734, 9.076324462890625, 198.94961547851562, 203.3653106689453, -193.20310974121094, 76.8130111694336, 39.939697265625, 47.79434585571289, -32.31782531738281, 136.47926330566406, -27.66828155517578, 258.1725769042969, -64.38906860351562, 178.76747131347656, 36.87477493286133, -14.464929580688477, -197.776611328125, -44.85816955566406, 103.6204833984375, 84.06326293945312, 34.46588897705078, 65.42156982421875, 75.37255859375, 8.375007629394531, -78.04127502441406, -56.80651092529297, -120.19364166259766, 9.000612258911133, 139.0395050048828, 219.67660522460938, -4.124797821044922, -4.1834869384765625, 4.270458221435547, 91.13609313964844, 147.158935546875, 186.50503540039062, 27.68353271484375, -3.760560989379883, 202.18992614746094, -26.24523162841797, 137.00584411621094, 36.00056457519531, 1.8257904052734375, 161.8158416748047, 21.965499877929688, -86.90020751953125, -80.58930206298828, 11.540153503417969, 11.21026611328125, 19.944215774536133, -47.73700714111328, 166.77554321289062, 133.6626739501953, -44.52073669433594, -36.606956481933594, 345.7210998535156, 205.35238647460938, -166.697265625, 208.340576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000598.npy"}
|
|
{"epoch": 0.9040060468631897, "step": 599, "batch_size": 64, "mean": 51.21691131591797, "std": 112.16082000732422, "min": -180.23269653320312, "p10": -77.56856536865233, "median": 42.13171195983887, "p90": 193.634196472168, "max": 314.63018798828125, "pos_frac": 0.65625, "sample": [-64.00013732910156, -31.239809036254883, 51.190834045410156, 54.618247985839844, -56.709327697753906, 106.91056060791016, 33.07258987426758, 269.6210021972656, -105.87945556640625, -180.23269653320312, -5.507987976074219, -60.08293914794922, 18.30475616455078, 68.94349670410156, 144.15725708007812, 7.391166687011719, -143.4080047607422, 1.7874107360839844, 1.5013771057128906, -54.789154052734375, 197.64869689941406, 149.67626953125, 87.16119384765625, -175.61431884765625, -0.27159881591796875, -37.77406311035156, 3.3338165283203125, 194.13169860839844, -83.38360595703125, 60.5831298828125, 233.56654357910156, 192.47335815429688, -123.67081451416016, -40.824607849121094, 61.23856735229492, 8.622848510742188, 130.98580932617188, -35.740509033203125, 86.76416778564453, 90.29818725585938, 164.43841552734375, -16.990478515625, 29.723798751831055, -54.1640625, 178.88473510742188, 76.730224609375, 229.28765869140625, -9.214706420898438, -148.63478088378906, 10.224422454833984, 166.27313232421875, 258.1004333496094, 87.418701171875, 179.87156677246094, 178.6619110107422, 26.218164443969727, 314.63018798828125, 167.84585571289062, 99.04315185546875, -5.301332473754883, -15.377029418945312, 84.63555145263672, 136.7590789794922, 83.96353149414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000599.npy"}
|
|
{"epoch": 0.9055177626606198, "step": 600, "batch_size": 64, "mean": 62.33649444580078, "std": 118.17801666259766, "min": -200.3740692138672, "p10": -70.80405197143554, "median": 35.34097480773926, "p90": 229.04798278808596, "max": 394.4859313964844, "pos_frac": 0.703125, "sample": [135.65390014648438, 65.79375457763672, 5.70591926574707, 10.886726379394531, 230.06942749023438, -25.332916259765625, 41.061710357666016, -136.20822143554688, 175.42648315429688, 242.23548889160156, -10.410621643066406, -2.6873741149902344, 43.146095275878906, 148.74192810058594, -1.891998291015625, 23.740938186645508, -85.84259033203125, -96.45438385009766, -112.68475341796875, 224.35719299316406, 26.651485443115234, 81.46363830566406, 133.62730407714844, 114.79353332519531, -57.806419372558594, 330.99090576171875, 124.37613677978516, 190.06436157226562, -27.48883819580078, 216.093994140625, 245.51937866210938, 4.358619689941406, -200.3740692138672, -12.649986267089844, 89.52947998046875, -53.903438568115234, -29.351165771484375, 150.29019165039062, 1.5967750549316406, 394.4859313964844, -8.890846252441406, 182.91510009765625, 3.881988525390625, 238.7744140625, -3.756864547729492, 8.083995819091797, -76.37446594238281, -32.56925964355469, -151.66921997070312, 72.65888977050781, 3.2683181762695312, 108.96311950683594, 79.5987777709961, 25.853073120117188, 139.67530822753906, 8.3028564453125, 67.01252746582031, 54.55727005004883, 90.60772705078125, 226.66461181640625, 1.6474838256835938, 46.143043518066406, 29.6202392578125, 276.99273681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000600.npy"}
|
|
{"epoch": 0.9070294784580499, "step": 601, "batch_size": 64, "mean": 84.12042236328125, "std": 127.8721923828125, "min": -231.39451599121094, "p10": -43.694178771972645, "median": 106.54754638671875, "p90": 220.18341217041018, "max": 373.01568603515625, "pos_frac": 0.71875, "sample": [175.74343872070312, 20.43584442138672, 209.04806518554688, -141.77967834472656, 217.46331787109375, 128.0893096923828, 359.5020751953125, -112.81143188476562, 97.03446960449219, -11.317573547363281, 228.9897003173828, -179.85092163085938, 197.42510986328125, 21.968141555786133, -35.59112548828125, 142.02017211914062, 184.7127685546875, 324.58831787109375, -47.16691589355469, 173.84848022460938, 221.3491668701172, 7.340797424316406, 1.1504058837890625, 226.61831665039062, 210.82666015625, 146.49855041503906, -3.406881332397461, -26.999141693115234, 140.565185546875, 29.976287841796875, 16.00244903564453, 151.59130859375, 209.74969482421875, -29.307533264160156, -5.343576431274414, 137.45620727539062, -10.993904113769531, 140.94580078125, 61.041229248046875, 14.006986618041992, -93.84687805175781, 126.52345275878906, 15.159496307373047, 201.37908935546875, 132.086669921875, 155.05641174316406, 31.512348175048828, -16.97454071044922, -28.5760440826416, 15.53619384765625, -7.372917175292969, 275.82769775390625, -0.10504341125488281, 4.757228851318359, 186.66781616210938, 210.12393188476562, 373.01568603515625, 116.06062316894531, 177.22036743164062, 2.5855178833007812, -154.84461975097656, 174.54568481445312, 127.34361267089844, -231.39451599121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000601.npy"}
|
|
{"epoch": 0.90854119425548, "step": 602, "batch_size": 64, "mean": 77.23280334472656, "std": 105.1982421875, "min": -195.63357543945312, "p10": -12.265997314453122, "median": 56.85498046875, "p90": 218.21792449951172, "max": 355.0142517089844, "pos_frac": 0.796875, "sample": [-7.643218994140625, 161.05303955078125, 221.74484252929688, 355.0142517089844, 67.83381652832031, 43.255279541015625, 71.97940063476562, 4.878034591674805, 22.292831420898438, -8.692035675048828, 239.8199462890625, 11.970531463623047, 220.02955627441406, 183.0634765625, -117.15478515625, 213.99078369140625, -195.63357543945312, 249.30538940429688, 160.0467071533203, -13.79769515991211, -3.74090576171875, -3.8393096923828125, 20.942092895507812, -8.323284149169922, 45.87614440917969, 177.203369140625, 89.13883209228516, 184.42770385742188, 23.363971710205078, 102.65802001953125, 20.92642593383789, 170.06277465820312, 98.3827133178711, 41.18478012084961, 25.064403533935547, 25.328845977783203, 32.7704963684082, 69.96558380126953, 188.67823791503906, 89.64282989501953, 23.176311492919922, 147.3389892578125, 183.94882202148438, 172.27919006347656, 20.922775268554688, -192.35491943359375, 98.87728118896484, 77.23324584960938, 3.56768798828125, 17.74091339111328, -20.254131317138672, -1.8491668701171875, 184.5517578125, -41.39623260498047, 39.224647521972656, 203.1714324951172, 86.59212493896484, 81.60140991210938, 229.00344848632812, 222.02220153808594, -87.25994110107422, 171.60186767578125, 19.58916473388672, 30.500030517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000602.npy"}
|
|
{"epoch": 0.91005291005291, "step": 603, "batch_size": 64, "mean": 75.89717864990234, "std": 116.25586700439453, "min": -225.31381225585938, "p10": -59.9656524658203, "median": 59.05718231201172, "p90": 194.87301330566407, "max": 352.6250915527344, "pos_frac": 0.75, "sample": [192.23760986328125, 145.45945739746094, 175.8133544921875, -31.759986877441406, -119.94071960449219, 75.22157287597656, 57.079750061035156, 127.30392456054688, -6.989044189453125, 22.842239379882812, 192.6679229736328, 78.39598083496094, -4.067024230957031, -105.13751983642578, 23.41468048095703, 311.3307189941406, -125.58491516113281, 61.03461456298828, 24.151504516601562, 352.6250915527344, 80.57134246826172, 4.55450439453125, 195.23361206054688, 72.66673278808594, -11.389728546142578, 55.46728515625, 194.0316162109375, -65.11123657226562, 175.2339324951172, -200.33653259277344, -0.6873245239257812, 24.170867919921875, 167.35025024414062, 118.31681823730469, -47.95928955078125, 176.54930114746094, 44.931941986083984, 46.83498764038086, -10.169134140014648, 196.94625854492188, 168.93887329101562, 182.11343383789062, 125.80046081542969, 54.39087677001953, 120.80147552490234, 221.81820678710938, 48.51472473144531, -73.78292846679688, 80.86247253417969, 22.858354568481445, 33.27436828613281, 56.582366943359375, 174.00604248046875, 192.9509735107422, 111.32735443115234, -21.447784423828125, 8.49044418334961, -225.31381225585938, 192.65948486328125, 191.41140747070312, 204.93798828125, 3.519777297973633, 334.14892578125, -14.749458312988281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000603.npy"}
|
|
{"epoch": 0.9115646258503401, "step": 604, "batch_size": 64, "mean": 64.27925109863281, "std": 121.63274383544922, "min": -251.42591857910156, "p10": -76.63897094726562, "median": 52.69142150878906, "p90": 208.336572265625, "max": 422.73638916015625, "pos_frac": 0.703125, "sample": [149.3594970703125, 158.233642578125, -12.962787628173828, -66.53628540039062, 2.170360565185547, 161.9324188232422, -13.790569305419922, 0.6144580841064453, 198.3292694091797, -30.72797393798828, 44.80181884765625, -9.729194641113281, 3.476522445678711, 180.11053466796875, -73.47653198242188, -39.574066162109375, 185.24131774902344, 70.10481262207031, 41.376319885253906, 239.860595703125, -190.246826171875, -76.63172912597656, 33.93186950683594, -100.63983154296875, 155.83975219726562, 13.805307388305664, 14.87529182434082, 96.62554168701172, 63.12452697753906, -251.42591857910156, 38.249393463134766, -99.0342025756836, 61.60523223876953, -120.51028442382812, 191.24806213378906, -40.03514862060547, 121.13214111328125, -61.853759765625, 207.7384033203125, 153.15081787109375, 106.25283813476562, 208.42376708984375, 422.73638916015625, 32.804344177246094, 175.40084838867188, 95.02569580078125, 30.78044891357422, 208.13311767578125, 222.19544982910156, -76.64207458496094, 230.23062133789062, -5.5147857666015625, 104.89331817626953, 61.92359161376953, 157.84226989746094, 60.581024169921875, 170.9322967529297, 41.257850646972656, 3.1436214447021484, 231.63360595703125, 173.55596923828125, -54.82948303222656, -102.67778015136719, 216.02601623535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000604.npy"}
|
|
{"epoch": 0.9130763416477702, "step": 605, "batch_size": 64, "mean": 70.2017822265625, "std": 117.3382568359375, "min": -211.5469207763672, "p10": -63.29146118164061, "median": 65.52231216430664, "p90": 209.29546813964845, "max": 316.710693359375, "pos_frac": 0.71875, "sample": [70.72692108154297, 122.834228515625, -161.05892944335938, 316.710693359375, -19.51634979248047, 213.936767578125, 87.19325256347656, 236.89447021484375, 125.33821868896484, -30.05042266845703, 210.37942504882812, -19.332717895507812, 48.293739318847656, -28.082923889160156, 3.272480010986328, -101.59111022949219, 107.43269348144531, 180.35671997070312, 156.9793243408203, 56.93254470825195, -44.861175537109375, 9.675155639648438, 162.70840454101562, -200.9412841796875, 5.104820251464844, 149.5900115966797, 196.9838104248047, 192.27841186523438, 66.29889678955078, 64.7457275390625, 183.16009521484375, 53.71178436279297, -211.5469207763672, 176.18710327148438, -2.519134521484375, 243.30303955078125, 197.2864532470703, 148.00982666015625, 27.868011474609375, 122.49710083007812, -88.89134216308594, -17.109699249267578, 194.34634399414062, 31.946393966674805, 206.7662353515625, 24.70874786376953, 219.51034545898438, 186.7798614501953, 190.7294921875, -34.587158203125, 261.44232177734375, 67.32856750488281, 103.10414123535156, -33.73053741455078, -15.65472412109375, 13.116739273071289, 12.635408401489258, -179.26370239257812, 52.60154724121094, -71.19015502929688, 11.949281692504883, 145.22267150878906, -16.718994140625, 110.68304443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000605.npy"}
|
|
{"epoch": 0.9145880574452003, "step": 606, "batch_size": 64, "mean": 85.02445983886719, "std": 118.90487670898438, "min": -150.7921142578125, "p10": -63.280794525146476, "median": 79.75708770751953, "p90": 234.88397216796875, "max": 384.8249206542969, "pos_frac": 0.734375, "sample": [-44.23804473876953, 235.37615966796875, 51.14068603515625, 265.1396179199219, -103.80438995361328, 32.82976531982422, 179.09815979003906, 68.60450744628906, -3.3422374725341797, 171.01881408691406, 199.62979125976562, -24.903335571289062, 123.541259765625, 233.73553466796875, 199.26535034179688, -150.7921142578125, 127.78787231445312, 77.58395385742188, 4.512372970581055, 176.77783203125, 384.8249206542969, 17.14501953125, 62.83295822143555, 177.95263671875, -51.40271759033203, -100.89310455322266, 306.15643310546875, -37.163543701171875, 135.56097412109375, 151.3907470703125, 125.97145080566406, 5.201225280761719, -119.87064361572266, 186.105712890625, 94.39387512207031, 2.603090286254883, 81.93022155761719, 0.2382488250732422, 171.30477905273438, -1.7178115844726562, 210.707763671875, 257.0986022949219, 73.4861068725586, 135.61636352539062, 6.345008850097656, 302.41485595703125, -19.080795288085938, 87.59735107421875, -100.52600860595703, -68.37139892578125, 192.4527587890625, 52.82940673828125, -124.80606842041016, 38.27581787109375, -24.076461791992188, 148.660888671875, 100.23767852783203, -22.093379974365234, 113.91659545898438, 281.2965087890625, 160.68365478515625, 174.131591796875, 53.670654296875, -0.4278545379638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000606.npy"}
|
|
{"epoch": 0.9160997732426304, "step": 607, "batch_size": 64, "mean": 73.39010620117188, "std": 117.64994049072266, "min": -233.53070068359375, "p10": -77.7813331604004, "median": 65.69848823547363, "p90": 213.4987411499024, "max": 335.58880615234375, "pos_frac": 0.734375, "sample": [0.26464080810546875, 36.310760498046875, -160.88922119140625, 130.58798217773438, 144.3648223876953, 6.004081726074219, 157.91690063476562, 108.2792739868164, -233.53070068359375, -44.87560272216797, -6.148609161376953, -83.90528106689453, 117.13198852539062, 197.7300262451172, 44.4027099609375, -78.88604736328125, -13.918342590332031, 6.332403182983398, 44.859901428222656, 227.72340393066406, 34.921356201171875, 236.25143432617188, 154.94570922851562, 128.8700408935547, 128.3430938720703, 27.41449737548828, -18.322776794433594, 164.8661346435547, -10.433952331542969, -25.385108947753906, 228.5161895751953, 115.24337768554688, 132.2709503173828, 168.03173828125, -110.85098266601562, 218.49539184570312, 335.58880615234375, -13.25448989868164, 175.8544158935547, 22.034439086914062, -66.16226959228516, 11.504844665527344, 35.390167236328125, 176.72645568847656, 112.43841552734375, 11.274070739746094, -85.79830169677734, 158.51385498046875, 304.9955139160156, 185.87757873535156, 0.3810386657714844, 45.26860046386719, 147.33477783203125, 238.0009765625, 62.18095779418945, 201.8398895263672, -2.895702362060547, 97.67779541015625, 189.4971160888672, 199.68431091308594, 69.21601867675781, -181.193359375, -75.20366668701172, 167.26272583007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000607.npy"}
|
|
{"epoch": 0.9176114890400605, "step": 608, "batch_size": 64, "mean": 78.04742431640625, "std": 109.58146667480469, "min": -168.87548828125, "p10": -35.23138847351073, "median": 49.66997146606445, "p90": 209.67517852783203, "max": 337.4122314453125, "pos_frac": 0.765625, "sample": [194.7232666015625, -89.82405090332031, -0.6422691345214844, 197.60540771484375, 0.7253551483154297, -110.3712158203125, 163.36505126953125, 6.635223388671875, 21.446819305419922, -137.1083526611328, 147.40113830566406, 109.00804901123047, 161.2250213623047, 196.54762268066406, 80.55728149414062, 156.10354614257812, 261.7362060546875, -168.87548828125, -8.967653274536133, 20.846920013427734, 64.05104064941406, -22.83675765991211, -0.9120197296142578, 228.64939880371094, 191.10525512695312, 32.35909652709961, 2.3913040161132812, 99.73226928710938, 11.770866394042969, 210.0220489501953, 266.9793701171875, -40.543373107910156, 85.06195068359375, 152.24302673339844, 45.38494873046875, 129.720703125, 208.86581420898438, -22.38782501220703, -88.52940368652344, -7.84625244140625, -6.613746643066406, 17.50261688232422, 80.31440734863281, 45.60529327392578, -16.048294067382812, 151.42970275878906, 3.290283203125, 179.3979949951172, 337.4122314453125, 190.78221130371094, 89.72845458984375, 34.28467559814453, 53.582069396972656, 196.28524780273438, 3.7909889221191406, 157.20419311523438, -87.23921203613281, 198.9768829345703, 45.75787353515625, 7.367012023925781, 30.101219177246094, 222.98973083496094, 39.27580261230469, 272.438232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000608.npy"}
|
|
{"epoch": 0.9191232048374905, "step": 609, "batch_size": 64, "mean": 82.73019409179688, "std": 100.71949768066406, "min": -241.71014404296875, "p10": -54.439015960693354, "median": 94.78016662597656, "p90": 196.87055053710938, "max": 370.1851806640625, "pos_frac": 0.828125, "sample": [227.07505798339844, 193.75889587402344, 94.60916900634766, 12.126701354980469, 121.02511596679688, 27.09423828125, 189.9443817138672, 124.03399658203125, 6.0296478271484375, 0.8385086059570312, -56.39772033691406, 129.39666748046875, -22.733108520507812, 15.367122650146484, 370.1851806640625, 162.89315795898438, 202.71873474121094, 107.93101501464844, 79.86248779296875, 222.33544921875, -55.417442321777344, 105.95465087890625, 103.7342529296875, 82.69723510742188, -90.03738403320312, 213.49087524414062, 170.26597595214844, 94.95116424560547, -110.91878509521484, 158.5098876953125, 69.32535552978516, -1.082590103149414, 189.30319213867188, 101.55167388916016, -25.691253662109375, 131.641357421875, 73.61561584472656, 1.0663394927978516, 184.252685546875, 142.3030548095703, 79.19510650634766, -61.85090255737305, 76.32563018798828, 204.34332275390625, 192.31373596191406, -241.71014404296875, 14.97707748413086, 96.939697265625, 100.9862060546875, 13.49580192565918, 51.27876663208008, 118.91683959960938, 119.14350891113281, 64.80826568603516, 4.020437240600586, 17.88495635986328, 198.20411682128906, 125.7459487915039, 181.48599243164062, -56.426795959472656, 165.965576171875, 87.31459045410156, 45.920379638671875, -52.15602111816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000609.npy"}
|
|
{"epoch": 0.9206349206349206, "step": 610, "batch_size": 64, "mean": 81.54739379882812, "std": 120.92001342773438, "min": -215.51177978515625, "p10": -40.76374969482421, "median": 63.52171325683594, "p90": 239.8376647949219, "max": 354.8492126464844, "pos_frac": 0.671875, "sample": [49.028831481933594, 349.00872802734375, 277.740478515625, 164.3950958251953, 81.9909896850586, 205.4931640625, -2.348236083984375, 6.87535285949707, -149.77996826171875, 13.9080810546875, 103.36417388916016, -111.26798248291016, 116.98081970214844, 33.26478576660156, -17.224334716796875, 61.94197082519531, 192.3662872314453, -38.38444519042969, -38.533172607421875, -28.903175354003906, 57.586952209472656, -97.0731201171875, -22.5028076171875, 152.40525817871094, -41.71971130371094, -78.65626525878906, 35.72813415527344, 199.0056915283203, 62.87290954589844, 247.17340087890625, 50.50016784667969, 354.8492126464844, 238.2530517578125, 210.96456909179688, -8.533523559570312, 181.59100341796875, 197.18069458007812, -2.418783187866211, 182.22061157226562, 26.03326416015625, 19.497100830078125, 159.49630737304688, 64.54615020751953, -215.51177978515625, 262.39007568359375, -2.492359161376953, -8.396629333496094, -22.355960845947266, 190.49510192871094, 240.51678466796875, 117.73057556152344, 103.89385986328125, 64.17051696777344, -5.726076126098633, -0.10580635070800781, -28.77428436279297, -79.73313903808594, 195.36569213867188, 221.3601531982422, 121.42144775390625, 105.79643249511719, 71.69132995605469, 272.1792907714844, 156.2006072998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000610.npy"}
|
|
{"epoch": 0.9221466364323507, "step": 611, "batch_size": 64, "mean": 64.0162582397461, "std": 97.67646026611328, "min": -175.63973999023438, "p10": -41.9758903503418, "median": 60.17994689941406, "p90": 202.56426086425785, "max": 270.37017822265625, "pos_frac": 0.71875, "sample": [117.06201171875, 130.44586181640625, 42.73004150390625, 45.07634353637695, 162.73130798339844, -3.9434280395507812, 15.61651611328125, 62.44091796875, -3.284137725830078, 82.75941467285156, 3.5418357849121094, 147.3096923828125, 75.56240844726562, 123.94673156738281, -8.121315002441406, 15.762123107910156, -103.91354370117188, 208.34217834472656, -38.76967239379883, 270.37017822265625, 177.1260986328125, 57.72907257080078, 165.67117309570312, -77.65971374511719, -74.4283447265625, -6.381568908691406, 207.870849609375, 89.0511245727539, 102.34717559814453, 250.06289672851562, -2.05670166015625, -42.06963348388672, 61.579742431640625, 207.88751220703125, -41.75715637207031, -7.1877899169921875, -175.63973999023438, 231.3841094970703, 74.1939926147461, 26.557266235351562, 147.1085205078125, 70.1727294921875, -165.53665161132812, 58.7801513671875, 44.214515686035156, 88.4712905883789, 97.71475219726562, 52.149169921875, 159.67501831054688, 184.96652221679688, 64.81378173828125, 180.165283203125, -1.7985095977783203, 87.02417755126953, 4.9673309326171875, 31.201217651367188, 207.47305297851562, -106.87549591064453, 191.11041259765625, 52.119529724121094, -27.65875816345215, 7.19580078125, -4.516706466674805, 104.15756225585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000611.npy"}
|
|
{"epoch": 0.9236583522297808, "step": 612, "batch_size": 64, "mean": 94.00511169433594, "std": 111.64855194091797, "min": -230.08253479003906, "p10": -26.837554359436034, "median": 96.53324890136719, "p90": 238.36244354248055, "max": 316.55419921875, "pos_frac": 0.78125, "sample": [-5.162635803222656, 4.690948486328125, 140.40689086914062, 47.164817810058594, -41.27668762207031, 4.028495788574219, 186.3894500732422, 148.82278442382812, -13.149688720703125, 42.72956848144531, 163.87640380859375, 123.09931945800781, 60.56450653076172, 70.43118286132812, 174.60455322265625, 62.4468994140625, 52.50526428222656, -5.673271179199219, 97.16551208496094, 171.11734008789062, 168.46728515625, -42.36162567138672, 158.49423217773438, 38.14649963378906, 98.31343078613281, 40.85202407836914, 252.91819763183594, -18.18033218383789, -36.842647552490234, 34.065025329589844, 84.1186294555664, 209.87571716308594, 4.763599395751953, -9.10786247253418, -24.40271759033203, 219.11764526367188, 27.166297912597656, 219.115966796875, 277.2364196777344, -150.44485473632812, 255.93409729003906, 84.04151916503906, 213.86976623535156, -146.6514129638672, 284.5900573730469, 149.48390197753906, 188.1744384765625, -27.88105583190918, 177.2096405029297, 246.61021423339844, 97.57540130615234, -11.027000427246094, 294.8013610839844, 96.61466979980469, 38.453514099121094, 316.55419921875, 142.69122314453125, 82.59333038330078, 96.45182800292969, -230.08253479003906, 110.04096984863281, 163.08380126953125, 168.00953674316406, 189.0930938720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000612.npy"}
|
|
{"epoch": 0.9251700680272109, "step": 613, "batch_size": 64, "mean": 82.72808837890625, "std": 109.99542999267578, "min": -237.36874389648438, "p10": -27.07761306762695, "median": 72.91560363769531, "p90": 210.49488983154296, "max": 266.2475280761719, "pos_frac": 0.75, "sample": [-14.349822998046875, 53.327850341796875, -11.549163818359375, 61.81944274902344, 108.83753204345703, 169.03317260742188, 155.31378173828125, 96.68959045410156, -29.33856964111328, -7.235818862915039, 55.62706756591797, 63.00477981567383, -191.37380981445312, 175.05117797851562, 173.42095947265625, 71.34016418457031, -37.88353729248047, 2.864042282104492, 262.58404541015625, 197.80758666992188, 10.106063842773438, -15.690641403198242, -237.36874389648438, 16.448015213012695, 187.71640014648438, 74.49104309082031, 180.5039520263672, 0.7165355682373047, 16.505470275878906, 22.042465209960938, 251.45407104492188, 55.36716079711914, 116.19354248046875, -3.9381752014160156, -13.803382873535156, 80.16914367675781, -80.77887725830078, 181.63232421875, 132.76573181152344, 235.50436401367188, 179.64218139648438, 210.06942749023438, 63.50775909423828, 198.0128173828125, 259.031982421875, 4.169868469238281, 266.2475280761719, -2.2423572540283203, -15.455841064453125, 210.67723083496094, -21.802047729492188, -83.463623046875, 185.012939453125, 254.9320831298828, 84.88731384277344, 162.96920776367188, 22.560688018798828, 209.5354461669922, 146.2747344970703, 199.2565460205078, 45.178245544433594, 103.66160583496094, 121.6867904663086, -74.78018188476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000613.npy"}
|
|
{"epoch": 0.926681783824641, "step": 614, "batch_size": 64, "mean": 78.26979064941406, "std": 108.53416442871094, "min": -201.4066162109375, "p10": -60.407474517822244, "median": 87.96229934692383, "p90": 189.27186279296876, "max": 360.708984375, "pos_frac": 0.796875, "sample": [190.31008911132812, 144.84127807617188, 18.087970733642578, 143.5699462890625, 90.3894271850586, 118.8365707397461, 5.295726776123047, 0.27979278564453125, 119.20596313476562, 70.68873596191406, 224.82159423828125, 140.57763671875, 245.86109924316406, 222.1471710205078, 155.79721069335938, 175.0049285888672, 143.42466735839844, 73.4525375366211, 144.87594604492188, 154.94261169433594, -157.4138946533203, 24.440475463867188, -22.954513549804688, -201.4066162109375, 360.708984375, 17.162376403808594, -3.1150569915771484, -5.621185302734375, 179.29635620117188, -102.82131958007812, -105.48332977294922, 16.557048797607422, 8.658687591552734, 186.84933471679688, 32.37160873413086, 11.078035354614258, 154.8348846435547, 136.66842651367188, -173.84036254882812, 152.02896118164062, -3.6575469970703125, 178.33062744140625, 169.81301879882812, 152.1747589111328, 118.3842544555664, 155.13824462890625, 25.897865295410156, 93.44647216796875, 166.4403076171875, 47.31715393066406, 36.37007141113281, 77.3328628540039, -69.31813049316406, -39.615943908691406, 25.49777603149414, 227.45016479492188, 100.48597717285156, -26.073631286621094, 186.42190551757812, 17.832977294921875, 85.53517150878906, 64.7342529296875, -84.2054443359375, 213.1239013671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000614.npy"}
|
|
{"epoch": 0.9281934996220711, "step": 615, "batch_size": 64, "mean": 82.58614349365234, "std": 132.3500213623047, "min": -273.4795837402344, "p10": -71.43199157714842, "median": 63.07730484008789, "p90": 236.96203155517586, "max": 390.24017333984375, "pos_frac": 0.765625, "sample": [63.110816955566406, 101.85298156738281, 191.3898468017578, 19.75408935546875, 92.1519775390625, 191.56866455078125, 126.2027359008789, -161.2268524169922, 188.91049194335938, 50.20020294189453, -122.8714828491211, 74.77155303955078, -104.55621337890625, 160.0039520263672, 51.520103454589844, -111.81010437011719, 174.78338623046875, 253.36712646484375, 207.37216186523438, 11.367172241210938, 193.37283325195312, -35.84246063232422, 269.771240234375, -2.7981338500976562, 191.54071044921875, 62.3602294921875, -47.47077941894531, 60.59098815917969, 14.4462890625, 216.3595733642578, 204.10911560058594, 73.39399719238281, -14.385414123535156, 37.18111038208008, -4.347343444824219, 194.55087280273438, 271.9073181152344, -56.170867919921875, 27.517013549804688, 188.46495056152344, 245.79165649414062, 261.1490173339844, 188.55931091308594, 12.154861450195312, 189.01492309570312, 10.402606964111328, 49.252593994140625, -46.211669921875, 63.043792724609375, 216.1016387939453, -77.97247314453125, 390.24017333984375, 20.871826171875, 95.64472961425781, -26.398033142089844, 379.8271484375, 77.45077514648438, 167.26258850097656, 197.01666259765625, 3.26861572265625, 18.895835876464844, 40.08570861816406, -218.87350463867188, -273.4795837402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000615.npy"}
|
|
{"epoch": 0.9297052154195011, "step": 616, "batch_size": 64, "mean": 67.09675598144531, "std": 109.11443328857422, "min": -185.42916870117188, "p10": -42.55493316650391, "median": 61.60294723510742, "p90": 208.54808502197267, "max": 276.7551574707031, "pos_frac": 0.671875, "sample": [235.97616577148438, 25.134552001953125, -42.6441650390625, -29.828872680664062, 201.97885131835938, -2.9604320526123047, 263.80401611328125, 187.56739807128906, -0.9852561950683594, 125.10155487060547, 65.8961410522461, 12.802284240722656, 73.21623992919922, -173.92355346679688, 33.248653411865234, -42.34672546386719, 64.43325805664062, -39.628177642822266, -20.368911743164062, 228.88125610351562, 58.77263641357422, 204.7640838623047, -14.840631484985352, -17.02073860168457, 210.1697998046875, 0.7770290374755859, -15.229703903198242, 15.374839782714844, -37.6602783203125, 197.1959686279297, 177.925537109375, 18.558685302734375, 85.46185302734375, 178.5692901611328, 254.4643096923828, 167.98204040527344, -13.83474349975586, 86.73185729980469, -48.465362548828125, 194.15658569335938, 189.71441650390625, 73.43521118164062, 45.04003143310547, 81.61116027832031, 178.974609375, 276.7551574707031, -83.0608901977539, 17.871082305908203, -185.42916870117188, 16.866043090820312, -23.09518051147461, -119.04634094238281, 88.67371368408203, 67.79397583007812, 26.584007263183594, 225.01394653320312, 96.37454986572266, -11.69582748413086, 139.0732421875, -29.855567932128906, 189.08383178710938, 168.62686157226562, 77.25091552734375, -81.57433319091797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000616.npy"}
|
|
{"epoch": 0.9312169312169312, "step": 617, "batch_size": 64, "mean": 61.182498931884766, "std": 113.74920654296875, "min": -181.9364013671875, "p10": -63.12220993041991, "median": 49.02549362182617, "p90": 206.20315856933595, "max": 403.7718200683594, "pos_frac": 0.71875, "sample": [45.36328887939453, -20.253528594970703, -47.47840881347656, 27.32910919189453, 280.20050048828125, 124.84757995605469, 252.33963012695312, 187.16981506347656, 10.61331558227539, -107.55107879638672, 70.77114868164062, 33.378318786621094, -50.43772888183594, 0.8491878509521484, 2.9400978088378906, 207.51019287109375, -67.68663787841797, 245.05661010742188, 24.0548095703125, 5.510658264160156, 403.7718200683594, -19.683597564697266, -13.616886138916016, 197.77383422851562, 103.87380981445312, -0.48163604736328125, -1.6745948791503906, 125.3409423828125, 75.28627014160156, -4.488483428955078, 20.832012176513672, 23.999561309814453, 79.32633972167969, 197.21273803710938, 117.2904052734375, 239.74771118164062, 50.18830108642578, -164.69168090820312, -25.012596130371094, 203.15341186523438, 154.62841796875, 47.86268615722656, -85.55659484863281, -15.937381744384766, 216.55624389648438, 190.76754760742188, -158.64385986328125, -181.9364013671875, -146.7054901123047, 96.45121765136719, 114.20875549316406, 143.74896240234375, 54.61033630371094, 12.265813827514648, 174.42498779296875, 102.03691101074219, 24.001388549804688, 16.603248596191406, 63.570770263671875, 96.6263427734375, 55.724021911621094, 86.9464340209961, 73.22283935546875, -52.47187805175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000617.npy"}
|
|
{"epoch": 0.9327286470143613, "step": 618, "batch_size": 64, "mean": 73.2374038696289, "std": 112.28783416748047, "min": -153.99681091308594, "p10": -46.52479362487792, "median": 49.386478424072266, "p90": 218.00784912109376, "max": 375.0335388183594, "pos_frac": 0.78125, "sample": [288.11724853515625, 239.91680908203125, 29.074382781982422, 165.35281372070312, -13.987907409667969, 2.385915756225586, -49.951297760009766, 28.07830047607422, 356.13238525390625, 1.8397331237792969, 5.304176330566406, 318.68310546875, 156.25460815429688, 51.76466369628906, 199.05078125, 7.5511322021484375, 56.50530242919922, 2.428619384765625, 375.0335388183594, 120.5902099609375, 61.41571807861328, 270.36480712890625, 2.192380905151367, 41.148895263671875, 19.288856506347656, 211.07452392578125, 57.315948486328125, 77.99536895751953, 118.37053680419922, 68.77125549316406, 42.35398864746094, 16.274208068847656, 218.61489868164062, -13.090190887451172, 48.31312561035156, -79.47256469726562, 55.00773620605469, -122.45409393310547, 142.24075317382812, 40.900001525878906, -113.30499267578125, -2.441844940185547, 15.56222152709961, -153.99681091308594, 139.3870849609375, -6.5004119873046875, 50.45983123779297, -100.84028625488281, 127.57561492919922, 216.59140014648438, 119.3821029663086, 154.60911560058594, 7.6287994384765625, -3.289947509765625, 104.65960693359375, 38.55876159667969, -21.463699340820312, 82.54724884033203, 163.2901611328125, 179.72909545898438, -38.52961730957031, 137.53012084960938, -60.045989990234375, 33.345367431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000618.npy"}
|
|
{"epoch": 0.9342403628117913, "step": 619, "batch_size": 64, "mean": 79.39472198486328, "std": 116.8470687866211, "min": -299.9303283691406, "p10": -42.61441078186034, "median": 67.63306427001953, "p90": 226.852946472168, "max": 349.01531982421875, "pos_frac": 0.765625, "sample": [139.5913848876953, 35.37739562988281, 2.8960094451904297, 48.16064453125, 191.26315307617188, 254.0670928955078, 225.79193115234375, -73.50237274169922, -110.2698974609375, 288.82745361328125, 203.20718383789062, 74.552734375, 186.9003448486328, 12.227087020874023, 94.00570678710938, -10.651420593261719, 79.94750213623047, 69.92898559570312, -7.744096755981445, -6.992584228515625, -125.77974700927734, 265.6163635253906, 35.73077392578125, -79.91006469726562, -11.748003005981445, 169.31625366210938, 176.51402282714844, 226.0786590576172, 2.940328598022461, -34.048065185546875, 5.894889831542969, -299.9303283691406, -18.67156982421875, 253.06838989257812, 148.89288330078125, 158.30990600585938, 26.51370620727539, 202.65159606933594, 227.18478393554688, -100.00424194335938, 127.23764038085938, 257.5445251464844, 125.49092102050781, 1.1173572540283203, 164.4092559814453, 136.8459014892578, 131.4168243408203, 159.75668334960938, 173.9717254638672, 25.62881851196289, 43.84461212158203, 59.58918380737305, 10.967041015625, 65.33714294433594, 349.01531982421875, 18.39965057373047, 80.71405029296875, 121.0692367553711, -7.611440658569336, 13.415163040161133, -10.60284423828125, 110.6201400756836, 43.16627502441406, -46.285701751708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000619.npy"}
|
|
{"epoch": 0.9357520786092215, "step": 620, "batch_size": 64, "mean": 61.73576736450195, "std": 108.71211242675781, "min": -198.820068359375, "p10": -61.824773406982416, "median": 39.8568229675293, "p90": 209.87671051025393, "max": 265.0394287109375, "pos_frac": 0.703125, "sample": [22.15576934814453, -188.63848876953125, 0.039886474609375, 143.19720458984375, -91.23302459716797, -0.403350830078125, 67.71446228027344, 220.28236389160156, -19.03839874267578, 95.62992095947266, 204.62294006347656, 177.71250915527344, 141.3304443359375, 125.09109497070312, -6.829887390136719, 265.0394287109375, 121.08695983886719, 56.713043212890625, -9.865646362304688, 5.0325775146484375, -198.820068359375, 34.5764274597168, -40.72735595703125, 253.87066650390625, -65.75090026855469, 163.19882202148438, -24.47631072998047, -24.798919677734375, 212.12832641601562, -52.66381072998047, 174.59573364257812, 233.40682983398438, -15.305294036865234, -9.385374069213867, -26.070873260498047, 16.71210479736328, -0.5056953430175781, 36.304969787597656, 184.5963592529297, 5.349510192871094, 169.2564697265625, 53.13423156738281, 114.49267578125, 76.39749145507812, 12.629459381103516, -78.71270751953125, 263.98162841796875, 219.603271484375, 3.619173049926758, 159.75836181640625, 2.225881576538086, 202.93850708007812, -90.20809936523438, 14.781038284301758, -149.5679473876953, 28.567907333374023, 173.83676147460938, 43.40867614746094, 98.18408203125, 6.82550048828125, 138.51580810546875, 114.8180923461914, 128.1398162841797, 58.588279724121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000620.npy"}
|
|
{"epoch": 0.9372637944066515, "step": 621, "batch_size": 64, "mean": 69.102294921875, "std": 124.53330993652344, "min": -241.557373046875, "p10": -67.01022720336913, "median": 65.60852813720703, "p90": 228.4440551757813, "max": 427.15631103515625, "pos_frac": 0.703125, "sample": [-0.73388671875, 30.071578979492188, 27.65612030029297, -46.93561935424805, -54.401580810546875, -172.4593048095703, 65.56658935546875, 101.43507385253906, -72.41393280029297, 23.451684951782227, 101.45095825195312, 129.31312561035156, 180.21932983398438, -10.74614143371582, 191.46327209472656, 65.65046691894531, 90.13121795654297, 141.75125122070312, 67.08425903320312, 213.06716918945312, 87.36427307128906, 88.90420532226562, 339.60595703125, -159.711669921875, 235.03414916992188, 172.96084594726562, 75.59292602539062, -241.557373046875, 136.18463134765625, -112.78620910644531, 325.8622131347656, 97.95967864990234, 122.18072509765625, 240.26318359375, 186.54013061523438, -0.609893798828125, 63.03374099731445, 18.442110061645508, 74.33993530273438, -21.438262939453125, -93.162353515625, 17.267881393432617, 238.91004943847656, 47.997894287109375, 9.356100082397461, -18.586841583251953, 37.03643798828125, -3.3680267333984375, -4.9508209228515625, -12.016193389892578, 4.523771286010742, 187.01651000976562, 79.14263916015625, 14.927316665649414, 123.65502166748047, -6.540645599365234, -18.37738800048828, 427.15631103515625, -165.54180908203125, 84.13314819335938, 281.7732238769531, 26.553115844726562, 195.14085388183594, 171.71337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000621.npy"}
|
|
{"epoch": 0.9387755102040817, "step": 622, "batch_size": 64, "mean": 70.88835906982422, "std": 113.93553924560547, "min": -204.86814880371094, "p10": -58.76215820312499, "median": 49.663936614990234, "p90": 211.46992645263674, "max": 309.760498046875, "pos_frac": 0.71875, "sample": [171.74188232421875, 32.83518600463867, 21.88939666748047, -204.86814880371094, 19.622825622558594, 66.71436309814453, -47.170074462890625, 16.181724548339844, -86.47743225097656, 0.7659454345703125, 232.83779907226562, 132.906982421875, -0.6017017364501953, -45.845890045166016, 208.92820739746094, 90.79878997802734, 204.4101104736328, 187.55831909179688, 176.96485900878906, 203.51641845703125, 186.1313934326172, 193.35931396484375, -12.990514755249023, -28.785459518432617, 274.3542785644531, 61.290992736816406, 49.73515319824219, 178.29415893554688, -2.9019126892089844, 65.05143737792969, 224.43162536621094, -167.76943969726562, 68.41876983642578, 34.69820785522461, 49.59272003173828, -5.971120834350586, 217.6702880859375, 161.50814819335938, 79.50794982910156, 35.220008850097656, 171.51150512695312, 51.119224548339844, 166.1395721435547, -32.178062438964844, 143.8263397216797, 165.73353576660156, 309.760498046875, -162.86026000976562, 2.437152862548828, -23.02075958251953, 212.55923461914062, 24.350929260253906, 1.5799484252929688, -63.730194091796875, -9.227838516235352, -82.55531311035156, 263.52392578125, -13.001167297363281, -68.74629974365234, 25.27507781982422, 146.00706481933594, 10.922269821166992, 208.54766845703125, 45.3250732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000622.npy"}
|
|
{"epoch": 0.9402872260015117, "step": 623, "batch_size": 64, "mean": 73.88140106201172, "std": 119.36698150634766, "min": -257.76171875, "p10": -54.98840713500975, "median": 72.26605224609375, "p90": 212.3918640136719, "max": 394.4352722167969, "pos_frac": 0.734375, "sample": [168.69863891601562, 38.36028289794922, -26.648731231689453, -172.6647491455078, 69.49667358398438, 274.9947204589844, 158.36590576171875, 182.4586639404297, 229.90408325195312, 91.04509735107422, -85.9154281616211, 54.16912078857422, 259.21246337890625, 89.81763458251953, -38.30375671386719, 249.12826538085938, 2.324321746826172, 191.6492462158203, 60.90174102783203, 126.0252914428711, 43.5819091796875, 12.734895706176758, 205.506591796875, -6.255882263183594, -25.585960388183594, 118.8722915649414, 150.17613220214844, 213.25564575195312, -257.76171875, -43.87725830078125, 130.32565307617188, 156.3350372314453, 88.951171875, 182.87771606445312, -33.36505126953125, -117.62078857421875, 185.16445922851562, -14.76495361328125, 159.14794921875, 206.02828979492188, 233.92935180664062, 210.37637329101562, 75.03543090820312, 4.236822128295898, -23.951858520507812, -20.14509391784668, -59.750328063964844, 23.435529708862305, 7.330577850341797, 147.10696411132812, 122.65676879882812, 394.4352722167969, 122.89439392089844, 115.66783142089844, 7.712654113769531, 10.187721252441406, -82.47926330566406, 9.9169921875, 51.908103942871094, 5.681371688842773, 175.2231903076172, -4.274787902832031, 76.22833251953125, -151.69833374023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000623.npy"}
|
|
{"epoch": 0.9417989417989417, "step": 624, "batch_size": 64, "mean": 81.20841979980469, "std": 120.94188690185547, "min": -206.0558624267578, "p10": -40.48638534545898, "median": 45.84852981567383, "p90": 227.5153442382813, "max": 347.5792541503906, "pos_frac": 0.703125, "sample": [-57.44630432128906, -17.006446838378906, 178.56298828125, 129.87933349609375, 201.138427734375, 188.8979949951172, -7.7161712646484375, 2.2258148193359375, -32.027191162109375, -42.992401123046875, 68.4156723022461, 19.288604736328125, 129.81369018554688, 152.98580932617188, 186.26849365234375, 176.886962890625, 16.577852249145508, 292.3793640136719, 286.589599609375, -0.32215118408203125, -33.70817947387695, -98.41171264648438, -23.21641731262207, 205.39111328125, 11.01729965209961, 190.15914916992188, 45.990882873535156, -43.180946350097656, 64.87246704101562, 45.7061767578125, 347.5792541503906, 233.49935913085938, 125.05291748046875, 344.5948181152344, 12.843891143798828, -30.34396743774414, 213.55264282226562, 171.65577697753906, 306.86663818359375, 184.6512451171875, 69.3096923828125, 209.42535400390625, 116.53077697753906, 11.081207275390625, -66.19404602050781, -29.399795532226562, 265.9210205078125, 5.080955505371094, -17.306612014770508, 159.71505737304688, 144.2041473388672, 0.20972633361816406, 7.0873565673828125, 15.804277420043945, -206.0558624267578, 151.7938232421875, 41.30409240722656, -34.639015197753906, 147.73605346679688, -12.364940643310547, -151.8248748779297, -0.8055496215820312, 42.528587341308594, 181.22531127929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000624.npy"}
|
|
{"epoch": 0.9433106575963719, "step": 625, "batch_size": 64, "mean": 64.12037658691406, "std": 125.50875854492188, "min": -192.4095001220703, "p10": -76.07657051086424, "median": 32.023075103759766, "p90": 237.29111938476566, "max": 365.63232421875, "pos_frac": 0.6875, "sample": [68.45196533203125, 204.375, 38.614776611328125, 2.73779296875, 0.4472179412841797, 166.06321716308594, 98.64125061035156, 140.23211669921875, 5.233343124389648, 297.2282409667969, 126.15911865234375, -81.50480651855469, 200.59486389160156, -40.2171516418457, 301.1338195800781, 50.42008972167969, 261.85906982421875, 53.00025939941406, -63.41068649291992, -26.43994903564453, 170.46424865722656, 166.33929443359375, 37.517845153808594, -118.21676635742188, -4.6961822509765625, 14.826118469238281, 31.743927001953125, 32.302223205566406, 10.456371307373047, 40.29412841796875, -0.7064399719238281, 271.93359375, 365.63232421875, -25.362945556640625, 162.83627319335938, -5.017974853515625, -3.770296096801758, 331.52435302734375, -6.020225524902344, 19.701873779296875, 103.78582763671875, 177.91241455078125, 242.09967041015625, 123.94170379638672, 18.496360778808594, -14.961990356445312, -189.35467529296875, -95.73895263671875, -133.90447998046875, 191.7928009033203, 13.651885986328125, -173.31076049804688, -192.4095001220703, 225.780029296875, 159.9136505126953, -0.9438247680664062, 226.0711669921875, 21.919898986816406, 65.64624786376953, -20.543113708496094, 69.90101623535156, 10.355899810791016, 10.36787223815918, -32.16655731201172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000625.npy"}
|
|
{"epoch": 0.9448223733938019, "step": 626, "batch_size": 64, "mean": 85.56047821044922, "std": 132.42971801757812, "min": -151.6724853515625, "p10": -84.78447570800779, "median": 71.5445671081543, "p90": 213.72656707763673, "max": 543.62060546875, "pos_frac": 0.734375, "sample": [216.82809448242188, 29.907333374023438, 199.23599243164062, -36.2431640625, 407.9599609375, 200.33880615234375, 211.17965698242188, -55.46720886230469, 96.3740463256836, 197.49034118652344, -92.32474517822266, 26.149856567382812, 58.441184997558594, 5.328899383544922, -100.50785064697266, 187.8455810546875, 166.27490234375, -96.64329528808594, 271.7128601074219, 55.892173767089844, -138.6307373046875, 3.69647216796875, 107.8257827758789, 183.9847869873047, 14.548454284667969, -0.8243694305419922, 121.3492202758789, 98.77032470703125, -50.50697326660156, 543.62060546875, 122.80345916748047, -67.19051361083984, 238.37936401367188, 193.5547332763672, 79.59966278076172, -111.26901245117188, -3.408599853515625, 63.489471435546875, -2.380979537963867, 154.56146240234375, 14.550468444824219, 8.186334609985352, 210.19656372070312, 37.779788970947266, -26.52655029296875, 7.24870491027832, 119.74320983886719, 187.1477508544922, 45.26826858520508, 188.1917724609375, 161.84371948242188, 23.873332977294922, -151.6724853515625, 197.87921142578125, -124.35518646240234, 186.90725708007812, 29.55868148803711, 214.81809997558594, 174.4168701171875, 129.4844207763672, 271.6473388671875, -63.48179626464844, 179.77999877929688, -48.36101531982422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000626.npy"}
|
|
{"epoch": 0.9463340891912321, "step": 627, "batch_size": 64, "mean": 91.93545532226562, "std": 115.1560287475586, "min": -199.58139038085938, "p10": -18.70290603637695, "median": 96.0250244140625, "p90": 226.36764221191407, "max": 306.57373046875, "pos_frac": 0.734375, "sample": [141.3824920654297, -0.4871788024902344, 98.83100891113281, 173.86434936523438, 149.65176391601562, 118.41036987304688, 167.415771484375, 286.81170654296875, 306.57373046875, 154.76535034179688, -0.107757568359375, 235.05484008789062, 190.77828979492188, 183.6920928955078, 70.08721923828125, 90.30133056640625, 7.66815185546875, 41.574195861816406, 224.38037109375, 149.16616821289062, 150.63832092285156, 161.7903289794922, 44.158111572265625, -9.660400390625, -5.2017364501953125, -105.76747131347656, 204.969482421875, -61.990692138671875, 162.14454650878906, -8.202159881591797, 5.132219314575195, 213.31520080566406, 227.21932983398438, -2.4574356079101562, 142.66598510742188, -19.725433349609375, -16.31700897216797, 147.17005920410156, -77.75579833984375, 231.92001342773438, 290.2305908203125, 61.826942443847656, -174.27847290039062, -3.4725723266601562, 53.31122589111328, -6.2765350341796875, -0.6912841796875, 123.37018585205078, 44.63323974609375, 275.42608642578125, 44.550750732421875, -193.79150390625, 1.0588645935058594, 93.21903991699219, 210.312255859375, 198.992431640625, 70.31647491455078, -199.58139038085938, 80.52957153320312, 76.77628326416016, 125.4306411743164, 181.09739685058594, 144.69879150390625, 212.32049560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000627.npy"}
|
|
{"epoch": 0.9478458049886621, "step": 628, "batch_size": 64, "mean": 87.09317016601562, "std": 128.04795837402344, "min": -273.0263671875, "p10": -67.12699432373047, "median": 89.06139373779297, "p90": 220.80382537841797, "max": 327.33197021484375, "pos_frac": 0.828125, "sample": [222.1498565673828, 115.85366821289062, 90.77639770507812, 160.89332580566406, -41.398399353027344, -187.39535522460938, 243.19363403320312, 38.057403564453125, 184.7161865234375, 201.7147674560547, 223.02279663085938, 258.28546142578125, 104.34010314941406, 46.210384368896484, 40.18189239501953, 176.629150390625, -273.0263671875, -175.43934631347656, 213.7542266845703, 62.568904876708984, 217.6630859375, 82.99198913574219, 21.953950881958008, -68.51699829101562, 204.8552703857422, 167.52890014648438, 61.382598876953125, 143.62747192382812, 87.34638977050781, 206.08795166015625, 188.42333984375, 267.02667236328125, 40.46124267578125, 212.55557250976562, 1.4496726989746094, 327.33197021484375, -32.26261901855469, 7.5749053955078125, 214.66452026367188, -183.28196716308594, 141.82215881347656, 107.0950698852539, 24.680814743041992, 173.81353759765625, 57.913841247558594, 53.62266540527344, 190.66929626464844, 83.38092041015625, 175.62826538085938, 202.6215362548828, 27.597814559936523, -63.88365173339844, 17.106788635253906, -200.9025115966797, 107.05442810058594, 315.43743896484375, 31.021636962890625, 170.04502868652344, 5.7754974365234375, 72.39509582519531, -41.11566925048828, 135.10482788085938, 0.08600997924804688, -86.96070861816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000628.npy"}
|
|
{"epoch": 0.9493575207860923, "step": 629, "batch_size": 64, "mean": 63.783966064453125, "std": 106.31426239013672, "min": -171.75433349609375, "p10": -39.59658432006835, "median": 43.88302993774414, "p90": 218.53874359130864, "max": 320.134033203125, "pos_frac": 0.65625, "sample": [-58.294593811035156, 133.1171875, -171.75433349609375, 100.65912628173828, 81.86167907714844, 0.24767303466796875, -23.316787719726562, 83.10731506347656, -152.54603576660156, 61.46192932128906, -27.190811157226562, 38.264015197753906, 9.520151138305664, 224.3697052001953, -30.20844078063965, -18.913745880126953, 1.545278549194336, -14.629508972167969, -66.3736572265625, 65.33941650390625, 49.502044677734375, -2.1554489135742188, -7.864755630493164, 31.55672836303711, -53.98965835571289, 58.530372619628906, 320.134033203125, 265.8293151855469, -12.357290267944336, 188.47354125976562, 129.93008422851562, -34.04113006591797, 103.13856506347656, 89.14270782470703, -18.92715835571289, 120.55612182617188, 119.62013244628906, 20.224945068359375, 6.389469146728516, -0.8569355010986328, -27.980392456054688, 63.41086196899414, 263.44573974609375, -3.1495704650878906, -14.446769714355469, -53.14996337890625, 61.75421905517578, 151.8408966064453, 155.7633056640625, 185.65774536132812, -27.408950805664062, 69.16482543945312, 252.42449951171875, 16.042192459106445, 202.11968994140625, 297.851806640625, 13.421592712402344, 256.1402893066406, 165.77813720703125, 204.93316650390625, 33.317901611328125, 164.04067993164062, -41.97749328613281, 84.07825469970703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000629.npy"}
|
|
{"epoch": 0.9508692365835223, "step": 630, "batch_size": 64, "mean": 88.95985412597656, "std": 114.0672607421875, "min": -185.36322021484375, "p10": -38.62715835571287, "median": 70.75464630126953, "p90": 227.42686309814462, "max": 355.5955505371094, "pos_frac": 0.78125, "sample": [-68.608642578125, 191.55096435546875, -14.898372650146484, 21.473670959472656, 175.36083984375, 269.12408447265625, 140.25369262695312, 3.926044464111328, 255.84725952148438, 170.7481689453125, -2.6012420654296875, 196.32504272460938, -10.571876525878906, 196.19119262695312, 20.40685272216797, 197.984375, -118.05859375, 67.41790008544922, 144.4189453125, -185.36322021484375, 73.40182495117188, 6.350685119628906, 77.5409164428711, 196.4356689453125, 284.7412109375, 9.515007019042969, -46.692604064941406, 48.76970291137695, -173.45639038085938, 175.06033325195312, -9.352149963378906, 68.10746765136719, 5.981006622314453, 259.4857177734375, 185.72955322265625, 65.47105407714844, 236.58985900878906, 39.92450714111328, 73.63224792480469, 127.33089447021484, 355.5955505371094, 26.402008056640625, 206.04653930664062, -54.72255325317383, 198.93557739257812, 140.28915405273438, 171.8946990966797, 54.320709228515625, 114.42701721191406, -78.48286437988281, 67.18824005126953, 11.888961791992188, -19.379558563232422, 267.6578063964844, 193.78082275390625, 26.55457878112793, 110.5578384399414, 21.020389556884766, 194.78482055664062, 196.29360961914062, -19.807785034179688, 42.99017333984375, 112.69330596923828, -2.991527557373047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000630.npy"}
|
|
{"epoch": 0.9523809523809523, "step": 631, "batch_size": 64, "mean": 99.1514663696289, "std": 128.6322021484375, "min": -166.7552490234375, "p10": -36.67537994384765, "median": 73.44319915771484, "p90": 236.97840118408203, "max": 395.34423828125, "pos_frac": 0.71875, "sample": [190.5359649658203, 74.18266296386719, -37.9921875, 235.75946044921875, 165.77032470703125, 183.77984619140625, 236.92398071289062, 173.29229736328125, 70.17578125, -31.421234130859375, 70.04220581054688, -7.042041778564453, 179.00527954101562, 26.809829711914062, -11.42293930053711, 40.267173767089844, -4.4469757080078125, 48.66472244262695, 28.651695251464844, 308.6031494140625, 308.18243408203125, -82.33541870117188, 225.4002685546875, 237.00172424316406, 174.95382690429688, 55.40818786621094, 26.4332275390625, 343.6297912597656, -1.836385726928711, 152.7221221923828, 40.64789962768555, 26.73448944091797, -165.28001403808594, 114.39109802246094, 105.16700744628906, 343.15673828125, 189.63587951660156, -21.685455322265625, 395.34423828125, 229.9516143798828, 112.41205596923828, -33.60282897949219, -144.59510803222656, 174.4998321533203, -19.471038818359375, -166.7552490234375, -15.647161483764648, 3.414064407348633, -67.84608459472656, 195.82586669921875, -43.92121124267578, 136.6988525390625, -12.95074462890625, 228.21444702148438, -3.9425926208496094, 177.68374633789062, 12.15817642211914, 233.96588134765625, 72.7037353515625, 300.9969482421875, 144.53021240234375, 188.63568115234375, 16.102275848388672, 218.82183837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000631.npy"}
|
|
{"epoch": 0.9538926681783825, "step": 632, "batch_size": 64, "mean": 91.64991760253906, "std": 110.84510803222656, "min": -193.36813354492188, "p10": -37.17676086425781, "median": 83.4979248046875, "p90": 223.3836410522461, "max": 319.9880676269531, "pos_frac": 0.765625, "sample": [118.15441131591797, 253.387451171875, 101.51022338867188, -2.101055145263672, 57.483028411865234, 197.53128051757812, -6.356220245361328, 132.399169921875, -29.209575653076172, 208.93173217773438, 117.04244995117188, -69.48526000976562, 87.14108276367188, -17.021800994873047, -84.52056884765625, 218.9934844970703, 233.29351806640625, 13.894660949707031, 185.54925537109375, -85.76544952392578, 51.982513427734375, 22.70096206665039, -154.62966918945312, 29.405258178710938, 60.930877685546875, 179.65325927734375, 79.85476684570312, 146.9207763671875, 76.93283081054688, -13.60171890258789, 169.18003845214844, 319.9880676269531, 100.02734375, -34.67853546142578, 265.4020080566406, 13.69297981262207, 212.06991577148438, 36.82310485839844, 96.64842987060547, 135.8319854736328, 215.9324951171875, 200.8542022705078, 29.679046630859375, -25.646995544433594, 9.181415557861328, 216.19580078125, 201.45162963867188, 122.11268615722656, 225.26513671875, 198.3355255126953, 202.23629760742188, -193.36813354492188, 73.9364013671875, 138.32904052734375, -2.6544265747070312, -38.24742889404297, 218.2244110107422, 250.2347412109375, 239.94349670410156, 47.844970703125, 73.4135513305664, 22.035037994384766, -51.033504486083984, 65.35270690917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000632.npy"}
|
|
{"epoch": 0.9554043839758125, "step": 633, "batch_size": 64, "mean": 70.39846801757812, "std": 110.2045669555664, "min": -137.6203155517578, "p10": -59.106280899047846, "median": 50.98630332946777, "p90": 209.11713562011718, "max": 313.3438720703125, "pos_frac": 0.703125, "sample": [-4.37769889831543, 37.02191925048828, 10.066413879394531, 141.76995849609375, 197.66929626464844, -32.97486877441406, 125.09719848632812, 151.94285583496094, 53.9107666015625, 102.7140121459961, -6.508544921875, 17.46885871887207, 297.308349609375, 35.66691207885742, -43.12791061401367, 152.74081420898438, 24.001022338867188, 86.74110412597656, 91.90888214111328, -27.410255432128906, 9.182144165039062, -65.52886199951172, 102.10765838623047, 23.763648986816406, 182.57598876953125, 67.15238189697266, -30.084644317626953, -30.59918975830078, 129.4272918701172, 99.69266510009766, 193.37498474121094, -11.298576354980469, 205.69790649414062, 54.26875305175781, -56.63151931762695, 9.270282745361328, -105.58482360839844, -48.12835693359375, -60.166893005371094, -35.550315856933594, -106.87004852294922, 49.710941314697266, 1.9450340270996094, 274.45782470703125, 52.26166534423828, 80.93788146972656, 41.348289489746094, -75.86809539794922, 69.64631652832031, 199.1575469970703, 207.55856323242188, 190.59059143066406, 209.78509521484375, 203.4246826171875, 3.3470535278320312, -80.70500946044922, -0.7142448425292969, 30.948299407958984, 191.3280029296875, 234.8047332763672, 313.3438720703125, -137.6203155517578, 277.91644287109375, 230.19705200195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000633.npy"}
|
|
{"epoch": 0.9569160997732427, "step": 634, "batch_size": 64, "mean": 30.506187438964844, "std": 115.40371704101562, "min": -213.92144775390625, "p10": -123.46476135253904, "median": 4.6519365310668945, "p90": 187.51809844970705, "max": 369.91363525390625, "pos_frac": 0.546875, "sample": [-15.352344512939453, -130.25384521484375, -6.154823303222656, 193.55026245117188, 13.313568115234375, -15.747381210327148, -19.15256118774414, -11.00477409362793, 62.603111267089844, 27.527389526367188, 16.44950294494629, 5.985191345214844, 1.380523681640625, -42.061485290527344, -14.81231689453125, 84.34820556640625, 149.1607208251953, 67.66995239257812, -60.12029266357422, -7.0887298583984375, -21.27141571044922, -2.2434959411621094, 160.53651428222656, -159.27432250976562, 207.0527801513672, 4.291149139404297, 220.4245147705078, 166.87042236328125, 369.91363525390625, -148.86412048339844, 8.311920166015625, 14.240793228149414, -8.284160614013672, -34.028507232666016, 188.41317749023438, -2.587717056274414, -100.45970916748047, -158.1481170654297, 245.8068389892578, 182.3846893310547, 163.0790252685547, -107.62356567382812, -150.03445434570312, 22.977008819580078, -194.60064697265625, -213.92144775390625, -18.48153305053711, 1.2703208923339844, 191.39739990234375, 65.53004455566406, -17.38848876953125, -59.85084533691406, 151.1309814453125, 68.18598175048828, -2.690511703491211, -8.43526840209961, 185.42958068847656, 146.24755859375, 49.25316619873047, 110.25872039794922, 35.956459045410156, 5.012723922729492, -56.983787536621094, 153.35281372070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000634.npy"}
|
|
{"epoch": 0.9584278155706727, "step": 635, "batch_size": 64, "mean": 58.899574279785156, "std": 121.66069793701172, "min": -211.3245849609375, "p10": -104.69952621459962, "median": 50.38602828979492, "p90": 225.13340911865237, "max": 265.53680419921875, "pos_frac": 0.640625, "sample": [-105.25831604003906, 187.71664428710938, -103.39568328857422, 14.737751007080078, 70.93270874023438, 35.68108367919922, -113.96096801757812, -164.1714324951172, -113.43609619140625, 193.09768676757812, 0.790283203125, 183.2501220703125, 24.454673767089844, -4.086128234863281, 226.23773193359375, 228.59121704101562, 166.33151245117188, 12.856658935546875, 239.8536376953125, 202.89944458007812, -92.90882873535156, 21.86541748046875, -55.925392150878906, 246.56735229492188, 65.09097290039062, -76.16142272949219, 182.9381866455078, -3.0505809783935547, -13.023174285888672, 107.0401611328125, 164.74072265625, -59.15459442138672, 207.64987182617188, -10.550346374511719, 73.9582290649414, 6.005680084228516, 194.0442352294922, -17.696239471435547, 240.02610778808594, -169.9379119873047, 149.07798767089844, 262.08624267578125, -43.3414192199707, 222.55665588378906, -26.76556396484375, -43.55238342285156, 99.75709533691406, 97.49056243896484, 114.21986389160156, -40.44743347167969, 103.54256439208984, 265.53680419921875, -12.54632568359375, 85.59639739990234, -211.3245849609375, 150.6029052734375, 147.1090850830078, 28.269744873046875, -6.2833251953125, 156.4494171142578, -129.12037658691406, 114.04708862304688, 87.9743423461914, 3.9964447021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000635.npy"}
|
|
{"epoch": 0.9599395313681028, "step": 636, "batch_size": 64, "mean": 80.24287414550781, "std": 108.712158203125, "min": -204.85186767578125, "p10": -24.691339874267573, "median": 85.54393768310547, "p90": 211.987126159668, "max": 332.18426513671875, "pos_frac": 0.796875, "sample": [199.71299743652344, 12.902961730957031, 12.259634017944336, 9.609397888183594, 93.69620513916016, 23.08728790283203, 85.92472839355469, -6.710197448730469, -36.198768615722656, 197.4759979248047, 132.14651489257812, -8.11644172668457, 192.95831298828125, 165.9810028076172, 51.867919921875, 34.39302062988281, -53.63133239746094, 232.33338928222656, 83.67119598388672, 132.9410400390625, 178.00851440429688, 6.816215515136719, 3.210744857788086, 168.9933319091797, -0.09090423583984375, 91.75846099853516, 292.6357116699219, 5.07258415222168, -0.7163219451904297, 97.72821807861328, 118.13995361328125, 113.10142517089844, 272.07928466796875, 96.48956298828125, -20.528579711914062, 173.339111328125, 118.23020935058594, 50.05858612060547, -26.475379943847656, 178.17913818359375, 120.4689712524414, 217.24746704101562, -10.0400390625, 332.18426513671875, 12.076335906982422, -192.41802978515625, 232.99949645996094, 65.98719787597656, 3.5535011291503906, 188.5906219482422, 171.1068572998047, 102.05213165283203, 161.2475128173828, 232.65298461914062, 121.23265075683594, -148.97149658203125, -204.85186767578125, 21.48856544494629, 85.16314697265625, 122.01638793945312, 39.47283935546875, -128.08523559570312, 69.75497436523438, 48.279937744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000636.npy"}
|
|
{"epoch": 0.9614512471655329, "step": 637, "batch_size": 64, "mean": 58.14302062988281, "std": 128.00009155273438, "min": -331.9679870605469, "p10": -95.27954559326172, "median": 33.142311096191406, "p90": 232.00621948242193, "max": 407.7841491699219, "pos_frac": 0.6875, "sample": [-22.133102416992188, 262.94903564453125, 14.050849914550781, 407.7841491699219, -30.833885192871094, -30.55487060546875, -5.770362854003906, 148.2333984375, 244.00772094726562, 29.48989486694336, -38.045310974121094, 44.318641662597656, 140.632568359375, -129.75851440429688, 161.4112548828125, 56.56534194946289, 5.900810241699219, 139.19894409179688, 216.4744873046875, -27.42320442199707, 123.81040954589844, 36.43852233886719, 0.9461669921875, -2.5063858032226562, 11.283491134643555, 29.846099853515625, -99.21326446533203, 159.8887939453125, 273.0419921875, 205.67245483398438, 201.84954833984375, 10.398124694824219, -167.07618713378906, 151.58114624023438, -43.199737548828125, 141.31759643554688, -118.42453002929688, -9.280227661132812, 26.687767028808594, 46.732643127441406, 106.85997009277344, 256.0784606933594, 3.145784378051758, 17.08572769165039, 236.16192626953125, 22.966880798339844, 13.601242065429688, 109.2720947265625, -130.11915588378906, -15.374183654785156, 47.801361083984375, 254.8703155517578, 108.73674011230469, 96.1766128540039, 39.38462829589844, -169.7855224609375, 222.3095703125, 59.318138122558594, -19.38544464111328, -331.9679870605469, 111.1147232055664, 206.89218139648438, -86.10086822509766, -4.18212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000637.npy"}
|
|
{"epoch": 0.9629629629629629, "step": 638, "batch_size": 64, "mean": 70.67045593261719, "std": 110.64733123779297, "min": -194.854248046875, "p10": -45.52099952697753, "median": 22.164705276489258, "p90": 218.16770935058594, "max": 303.21417236328125, "pos_frac": 0.75, "sample": [-194.854248046875, 175.1338653564453, 122.17073059082031, 5.031070709228516, -142.150390625, 0.0349578857421875, 220.3446502685547, 165.87875366210938, 45.216365814208984, -24.736560821533203, 9.045677185058594, -60.01132583618164, -4.9314117431640625, -49.05735778808594, -61.724853515625, 280.5826416015625, 4.0375518798828125, -52.491729736328125, 218.90997314453125, 19.219741821289062, 147.3907928466797, 9.863716125488281, 158.94522094726562, 254.39993286132812, -92.9694595336914, 251.8301544189453, 112.9862289428711, 189.2888641357422, 1.5724563598632812, 65.27626037597656, -2.6021366119384766, 223.17507934570312, 157.9053497314453, 175.49752807617188, 28.99462890625, -21.212717056274414, 22.215660095214844, 170.95126342773438, 3.4010772705078125, 212.05555725097656, 83.91507720947266, 216.43576049804688, 215.07208251953125, -4.505451202392578, 194.91395568847656, -32.33929443359375, -6.081195831298828, 188.51876831054688, 19.106521606445312, 20.30213165283203, 0.48592567443847656, 22.113750457763672, 303.21417236328125, 47.252960205078125, -37.26949691772461, 19.860610961914062, 3.7574234008789062, 207.0582733154297, 195.9689483642578, 14.024433135986328, -19.635498046875, 0.25335693359375, 84.01361846923828, 41.85863494873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000638.npy"}
|
|
{"epoch": 0.9644746787603931, "step": 639, "batch_size": 64, "mean": 84.87150573730469, "std": 117.521728515625, "min": -141.24884033203125, "p10": -52.330354309082026, "median": 58.29057312011719, "p90": 247.06190185546876, "max": 342.2500305175781, "pos_frac": 0.734375, "sample": [206.89779663085938, 144.284912109375, 137.0338134765625, -77.03874206542969, 50.631439208984375, 203.26223754882812, 13.400588989257812, -43.201969146728516, -63.34374237060547, 146.5859832763672, 36.02656555175781, 65.94970703125, 300.77374267578125, 49.88984680175781, 183.2544708251953, 7.682830810546875, 66.27787780761719, 5.9327545166015625, -141.24884033203125, 116.69628143310547, 159.13580322265625, 203.36465454101562, -2.2222461700439453, 280.23345947265625, 12.876029968261719, 139.89089965820312, 83.34942626953125, 203.52716064453125, -65.41105651855469, 243.00889587402344, 251.60401916503906, 2.7375316619873047, 0.1336956024169922, 155.00900268554688, -4.103496551513672, 20.158004760742188, 152.564697265625, -13.288955688476562, 342.2500305175781, 187.49578857421875, 182.68722534179688, 76.06011962890625, 310.1495666503906, 126.30831146240234, -117.3619384765625, -18.008556365966797, -38.736839294433594, -56.24251937866211, 47.99266052246094, 23.38912582397461, 261.0812683105469, 185.7626495361328, 19.35326385498047, 39.030853271484375, -35.279754638671875, -127.03572082519531, 34.74395751953125, -32.34638977050781, -4.402900695800781, 248.7989044189453, -37.685386657714844, 165.88827514648438, 193.0376739501953, 222.53158569335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000639.npy"}
|
|
{"epoch": 0.9659863945578231, "step": 640, "batch_size": 64, "mean": 77.69434356689453, "std": 114.5261459350586, "min": -205.96206665039062, "p10": -40.82801513671875, "median": 54.03532409667969, "p90": 208.62894439697266, "max": 339.0902099609375, "pos_frac": 0.796875, "sample": [192.52566528320312, 39.97270584106445, 339.0902099609375, -32.725059509277344, 24.97258758544922, 155.14523315429688, 142.8785400390625, 5.6217193603515625, 209.17562866210938, 207.3533477783203, 72.47183227539062, 21.66551971435547, 140.99436950683594, 171.44473266601562, -161.3217010498047, 174.239501953125, 57.014427185058594, 32.70903396606445, -92.2666015625, 222.84909057617188, -197.28993225097656, 169.85366821289062, 265.8502197265625, 61.06706237792969, -38.60340881347656, -14.264232635498047, 150.79403686523438, 34.906166076660156, -29.936410903930664, 18.760852813720703, 177.73463439941406, 17.234664916992188, 54.112457275390625, 210.64938354492188, 115.47216796875, -1.9023208618164062, -11.373741149902344, 24.884963989257812, -81.34593200683594, 58.42829132080078, 44.6549072265625, 201.53465270996094, 7.982635498046875, 52.218414306640625, 206.46360778808594, 145.49234008789062, 105.85367584228516, -205.96206665039062, 17.850717544555664, 193.4751434326172, 157.0169677734375, 277.996337890625, 36.697113037109375, 334.9769287109375, 18.412445068359375, -47.854827880859375, 183.40234375, -41.78141784667969, 158.641357421875, 53.95819091796875, 14.052127838134766, 29.032684326171875, 23.872610092163086, 95.60773468017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000640.npy"}
|
|
{"epoch": 0.9674981103552532, "step": 641, "batch_size": 64, "mean": 68.43684387207031, "std": 92.93678283691406, "min": -221.8933868408203, "p10": -17.231615447998045, "median": 56.20354080200195, "p90": 187.76516265869148, "max": 331.5965270996094, "pos_frac": 0.78125, "sample": [-23.321582794189453, -124.24951934814453, 88.64041137695312, 103.99961853027344, 25.906982421875, -10.326705932617188, 158.78509521484375, -0.5159835815429688, 7.799659729003906, -15.135017395019531, 37.23802185058594, 10.717439651489258, 153.73898315429688, 163.43235778808594, 95.93374633789062, 7.20661735534668, -2.53997802734375, 224.03668212890625, 199.236572265625, 149.60560607910156, 138.43243408203125, 4.537544250488281, 36.99976348876953, 56.41514587402344, 106.12528228759766, 216.2901153564453, 8.975997924804688, 85.06295776367188, 13.0965576171875, 143.60687255859375, 8.437286376953125, 81.61700439453125, -44.95398712158203, 95.11885833740234, 15.259086608886719, -81.8719711303711, 147.28468322753906, 69.91386413574219, -21.964393615722656, 68.38789367675781, -221.8933868408203, 134.935791015625, 69.214599609375, 26.529678344726562, 109.82376098632812, 235.04678344726562, 20.721572875976562, -2.787595748901367, 172.78558349609375, 42.48301696777344, 331.5965270996094, 120.18342590332031, 231.0484161376953, 194.1849822998047, -18.130157470703125, 106.01194763183594, 135.69638061523438, 32.93803405761719, 41.32492446899414, -1.41217041015625, -14.214881896972656, 14.5023193359375, 55.99193572998047, 166.41661071777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000641.npy"}
|
|
{"epoch": 0.9690098261526833, "step": 642, "batch_size": 64, "mean": 51.56036376953125, "std": 115.21082305908203, "min": -198.5953369140625, "p10": -84.96003494262695, "median": 45.64449501037598, "p90": 193.03021850585938, "max": 382.9346008300781, "pos_frac": 0.671875, "sample": [67.38419342041016, 14.441734313964844, 3.407135009765625, -136.62644958496094, 53.27678680419922, -8.719640731811523, 166.86672973632812, 205.14796447753906, -80.06341552734375, -162.13148498535156, 382.9346008300781, -1.5833282470703125, 110.51712036132812, -46.02766418457031, 88.76547241210938, 66.99560546875, 148.9286346435547, 249.70025634765625, -54.26390838623047, -15.793460845947266, 142.415283203125, 151.28082275390625, 190.49908447265625, -34.58694076538086, 114.79380798339844, 174.268310546875, 198.95338439941406, 39.673919677734375, 188.20980834960938, 47.330909729003906, 109.34232330322266, 86.11331176757812, 82.62118530273438, 26.37070083618164, -12.375495910644531, 157.84259033203125, -79.31156921386719, 78.08739471435547, 14.042533874511719, -95.14308166503906, 32.73849868774414, -29.625579833984375, 9.939584732055664, -23.36041259765625, 164.1586151123047, 7.511072158813477, -74.01788330078125, 165.78610229492188, 154.43231201171875, 222.92477416992188, 51.810333251953125, 43.163368225097656, 194.114990234375, 112.94869232177734, 203.53915405273438, 43.95808029174805, -87.05858612060547, 77.70751190185547, -198.5953369140625, -182.01104736328125, -5.2873077392578125, -67.08265686035156, -166.1651153564453, 14.748863220214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000642.npy"}
|
|
{"epoch": 0.9705215419501134, "step": 643, "batch_size": 64, "mean": 78.69720458984375, "std": 120.47637939453125, "min": -202.11941528320312, "p10": -68.51931610107421, "median": 77.38785934448242, "p90": 225.2110595703125, "max": 371.2216796875, "pos_frac": 0.734375, "sample": [73.6910400390625, 83.67527770996094, 102.33529663085938, 43.01580810546875, 1.007838249206543, 254.56182861328125, 1.877389907836914, 186.7164764404297, 89.78490447998047, 32.604454040527344, 196.44818115234375, 39.052406311035156, 179.96449279785156, -0.7755889892578125, 130.92315673828125, 140.44778442382812, -17.137008666992188, 15.823724746704102, 212.50656127929688, 226.66500854492188, 174.40823364257812, 29.27953338623047, 197.77642822265625, 113.52969360351562, -64.05863189697266, 221.81851196289062, 66.73054504394531, -46.9803466796875, 209.65521240234375, 315.6215515136719, 107.09188842773438, 171.1951141357422, -52.3975830078125, 168.28897094726562, 77.74807739257812, -96.28611755371094, 53.39521789550781, -9.02484130859375, 255.83316040039062, 178.33499145507812, 156.7476348876953, -64.5816879272461, 135.8072052001953, -16.232513427734375, 163.85543823242188, -145.50511169433594, -142.892333984375, -202.11941528320312, 3.1956558227539062, 22.32149887084961, -70.20687103271484, 113.66634368896484, 245.91567993164062, 101.45429992675781, 71.67236328125, -77.6220932006836, -17.332901000976562, 250.3045654296875, 54.207908630371094, 77.02764129638672, -41.681278228759766, 371.2216796875, 137.04330444335938, -154.79428100585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000643.npy"}
|
|
{"epoch": 0.9720332577475435, "step": 644, "batch_size": 64, "mean": 54.55887222290039, "std": 115.64922332763672, "min": -262.4553527832031, "p10": -72.31896057128904, "median": 23.68073558807373, "p90": 201.94127349853517, "max": 244.67547607421875, "pos_frac": 0.65625, "sample": [225.14703369140625, -122.11600494384766, -262.4553527832031, 167.7760467529297, -46.98484420776367, -80.75244140625, 9.604530334472656, -1.8737812042236328, 193.47900390625, 244.67547607421875, 67.21455383300781, 26.302959442138672, 193.87274169921875, 183.351806640625, 106.56951904296875, 68.21339416503906, 46.57072067260742, 32.570777893066406, -1.9485702514648438, 193.10153198242188, -5.707263946533203, 203.070068359375, -165.18838500976562, -39.14332580566406, -4.12933349609375, 123.20511627197266, 2.9378433227539062, 127.79270935058594, -3.5421905517578125, -44.44813537597656, 199.3074188232422, -25.6558837890625, -7.1375579833984375, 170.7913360595703, 174.12069702148438, -0.7282485961914062, -52.640838623046875, 237.28443908691406, 210.5929412841797, 4.140419006347656, 119.51383209228516, 209.90652465820312, 195.6886444091797, 23.48195457458496, 15.438308715820312, -214.39332580566406, -18.01519775390625, 152.48619079589844, 17.29962921142578, 163.33489990234375, -21.031906127929688, 23.8795166015625, 6.303556442260742, -122.5509262084961, 22.068679809570312, -1.234130859375, -127.386962890625, 117.27261352539062, 134.21612548828125, 4.304595947265625, 21.153518676757812, 165.26876831054688, 32.92395782470703, 224.59805297851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000644.npy"}
|
|
{"epoch": 0.9735449735449735, "step": 645, "batch_size": 64, "mean": 66.9179458618164, "std": 100.08053588867188, "min": -208.53036499023438, "p10": -30.183965682983395, "median": 45.27858352661133, "p90": 195.84970703125003, "max": 304.86370849609375, "pos_frac": 0.78125, "sample": [34.22977066040039, 226.6181640625, 115.79288482666016, -98.78063201904297, 106.75581359863281, 271.7989196777344, 139.32321166992188, -0.11707496643066406, -23.14574432373047, 34.75440216064453, 19.32760238647461, -153.68687438964844, 41.85917663574219, 3.4943885803222656, 198.64968872070312, 14.898597717285156, 122.34425354003906, 0.2659492492675781, -10.346466064453125, 25.533811569213867, 200.78585815429688, -28.11853790283203, 0.37624359130859375, 101.87146759033203, 70.06795501708984, -18.19298553466797, 89.01964569091797, 117.91915893554688, 198.06382751464844, 121.84696197509766, 114.68575286865234, -2.682088851928711, 64.14591217041016, 11.044448852539062, 10.716384887695312, 148.62791442871094, -128.07484436035156, 34.466880798339844, -64.7781982421875, 217.7144012451172, 131.6727752685547, 179.8582763671875, 304.86370849609375, -9.552726745605469, 36.671993255615234, 126.0453109741211, 160.8994140625, 99.9728012084961, 109.57650756835938, 130.764404296875, 32.83198547363281, 149.61505126953125, 23.568546295166016, 187.7437744140625, 17.683074951171875, 2.0625228881835938, 149.275634765625, -31.069149017333984, -72.24920654296875, -208.53036499023438, 28.602684020996094, 190.6834259033203, 163.98391723632812, 48.69799041748047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000645.npy"}
|
|
{"epoch": 0.9750566893424036, "step": 646, "batch_size": 64, "mean": 62.709808349609375, "std": 117.40260314941406, "min": -207.430908203125, "p10": -78.63631973266601, "median": 43.441110610961914, "p90": 205.41538696289064, "max": 369.0198669433594, "pos_frac": 0.671875, "sample": [-74.03054809570312, 0.9974956512451172, -9.836250305175781, 156.98744201660156, 197.51829528808594, 206.65850830078125, -131.7065887451172, 269.3836975097656, 102.30989074707031, 209.1063995361328, -10.918441772460938, 22.76329803466797, 195.27865600585938, 199.5415802001953, 163.15997314453125, -4.1309814453125, 106.18643951416016, 124.99270629882812, 202.5147705078125, 127.70081329345703, 231.83282470703125, 117.67343139648438, -207.430908203125, -80.61022186279297, -86.41592407226562, 130.72103881835938, 4.553646087646484, -8.599655151367188, 19.89543914794922, 51.991973876953125, 87.27151489257812, -3.5712966918945312, -16.165611267089844, 223.24806213378906, 91.5233154296875, -0.6090164184570312, 54.58854675292969, 19.560874938964844, 55.193870544433594, 152.1168212890625, -0.8908481597900391, -59.10260009765625, 131.10330200195312, -120.08357238769531, 145.51504516601562, 369.0198669433594, 186.59368896484375, 166.0731201171875, 31.670093536376953, 288.18353271484375, 17.341672897338867, -6.778106689453125, 6.4407958984375, -37.13914489746094, -196.20887756347656, 53.05522918701172, 114.52987670898438, -151.00180053710938, -36.96379089355469, 162.31396484375, 34.8902473449707, -12.591690063476562, 9.39706802368164, 26.815006256103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000646.npy"}
|
|
{"epoch": 0.9765684051398337, "step": 647, "batch_size": 64, "mean": 97.99229431152344, "std": 111.2345199584961, "min": -181.2357635498047, "p10": -39.12012710571289, "median": 103.48685073852539, "p90": 229.28551025390627, "max": 347.3150329589844, "pos_frac": 0.828125, "sample": [-10.330333709716797, 146.00042724609375, 199.48231506347656, 22.973922729492188, 85.7298583984375, 123.30580139160156, 38.48309326171875, 238.27178955078125, 141.07427978515625, 318.3121337890625, 97.91896057128906, 146.96453857421875, 173.329833984375, 1.8445930480957031, -39.49272155761719, 8.84100341796875, 347.3150329589844, -26.713212966918945, -76.16999816894531, 230.19287109375, 193.34120178222656, 87.93543243408203, 131.9180908203125, 105.04905700683594, 227.1683349609375, 170.09124755859375, 199.00498962402344, 2.071077346801758, 212.31031799316406, 40.849365234375, 186.81793212890625, 0.754150390625, 190.20046997070312, 10.560089111328125, 161.96951293945312, -24.220703125, 16.719989776611328, 209.17990112304688, 32.75701141357422, -83.32677459716797, 136.4757537841797, 44.734535217285156, 40.43235397338867, 154.52874755859375, -181.2357635498047, 46.56822967529297, 101.92464447021484, 28.103702545166016, 48.519927978515625, 112.21490478515625, -62.000518798828125, 256.3860168457031, 189.61766052246094, -64.60601806640625, -118.97677612304688, 185.90304565429688, 60.51127624511719, 298.9458923339844, -38.25074005126953, 232.69900512695312, 219.1696319580078, 41.545188903808594, 157.259521484375, 142.5521240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000647.npy"}
|
|
{"epoch": 0.9780801209372638, "step": 648, "batch_size": 64, "mean": 68.55940246582031, "std": 132.4037322998047, "min": -210.72958374023438, "p10": -105.58579406738279, "median": 73.2126579284668, "p90": 217.24642333984377, "max": 356.724609375, "pos_frac": 0.671875, "sample": [219.28305053710938, 40.957542419433594, 19.93677520751953, -196.24021911621094, 211.20486450195312, -89.28669738769531, -210.72958374023438, 2.4851303100585938, 212.49429321289062, 50.615234375, -74.34060668945312, 123.91716766357422, 73.55057525634766, -36.96018981933594, -112.57112121582031, -151.31504821777344, 33.99985122680664, -19.224510192871094, 356.724609375, 187.63211059570312, 204.11599731445312, 140.33343505859375, 127.92254638671875, 160.21075439453125, 177.08395385742188, -21.73077392578125, 129.12496948242188, 220.04513549804688, 51.50189971923828, 72.87474060058594, -67.306640625, -3.5912857055664062, 337.650634765625, 331.03863525390625, 2.0897903442382812, -138.008544921875, 30.569961547851562, 108.13904571533203, 170.7527313232422, -7.568489074707031, 125.78286743164062, 206.54110717773438, 226.33126831054688, -83.66842651367188, -24.53070831298828, 9.559288024902344, 72.38075256347656, 137.55101013183594, 210.9871368408203, 227.38092041015625, 186.2602996826172, 157.993408203125, 178.86700439453125, -209.20851135253906, -122.53927612304688, 99.22898864746094, 134.6927947998047, -66.16117095947266, 89.56472778320312, -41.356075286865234, -3.471710205078125, -46.31957244873047, 131.42019653320312, 123.1336669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000648.npy"}
|
|
{"epoch": 0.9795918367346939, "step": 649, "batch_size": 64, "mean": 72.66088104248047, "std": 116.59822082519531, "min": -207.55189514160156, "p10": -110.94443969726562, "median": 70.45387649536133, "p90": 200.27859191894532, "max": 326.36981201171875, "pos_frac": 0.703125, "sample": [171.09048461914062, 151.37298583984375, -40.18746566772461, 162.29116821289062, 193.4668731689453, 185.33474731445312, -24.539772033691406, -9.414880752563477, -19.042984008789062, 182.6806640625, -3.8623905181884766, 326.36981201171875, 147.0131378173828, 0.7573204040527344, 193.49046325683594, 192.47935485839844, 122.83879089355469, 41.12079620361328, 167.27098083496094, 214.4818115234375, 196.7932891845703, -1.8741779327392578, -125.61353302001953, -19.49138641357422, -207.55189514160156, 109.17776489257812, 11.288749694824219, 51.258522033691406, -117.23664093017578, -110.73077392578125, 70.50218200683594, 70.40557098388672, 161.9280548095703, -1.436269760131836, 111.44970703125, 180.61203002929688, 60.90111541748047, 175.49041748046875, 136.171142578125, 195.03208923339844, 9.709571838378906, 221.72735595703125, 83.1659927368164, -17.894535064697266, 29.5433349609375, 195.72866821289062, 3.9218063354492188, 24.812538146972656, 13.291885375976562, 89.44869232177734, 43.050437927246094, 276.92095947265625, -111.0360107421875, -161.37353515625, 249.2726287841797, -10.417926788330078, -114.68241882324219, -113.70515441894531, 108.80522918701172, 201.7722930908203, 209.609375, 104.08879852294922, -3.789836883544922, 16.23822021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000649.npy"}
|
|
{"epoch": 0.981103552532124, "step": 650, "batch_size": 64, "mean": 73.41058349609375, "std": 120.68515014648438, "min": -208.3995361328125, "p10": -57.48132553100585, "median": 42.81291198730469, "p90": 207.64543304443362, "max": 283.44464111328125, "pos_frac": 0.734375, "sample": [-32.46010208129883, 281.92449951171875, 276.2290954589844, 153.73646545410156, 22.752685546875, 179.15231323242188, 193.93499755859375, 82.74270629882812, 9.743675231933594, 34.242801666259766, 169.87640380859375, 193.49923706054688, -152.36907958984375, -6.2816925048828125, 2.5546207427978516, 169.89535522460938, 165.42518615722656, 29.96129035949707, 248.48057556152344, 1.083169937133789, 184.45458984375, 128.8853302001953, -45.478065490722656, 13.379695892333984, 188.6569366455078, 210.02133178710938, 201.25575256347656, 202.10166931152344, 119.9671401977539, -116.4520492553711, 1.4529380798339844, -149.7095184326172, -10.621883392333984, 133.75729370117188, -208.3995361328125, 32.65693664550781, 200.14036560058594, 82.12338256835938, 21.33991241455078, -26.20264434814453, -73.72821044921875, 77.99972534179688, -198.7980499267578, 187.20596313476562, -62.625579833984375, 5.626365661621094, 283.44464111328125, 221.44862365722656, 136.55859375, -40.34369659423828, 166.9062957763672, 9.88494873046875, 196.51222229003906, -4.230533599853516, 7.41093635559082, 41.5465087890625, -34.73091125488281, -18.520111083984375, 190.56063842773438, 274.14166259765625, 44.079315185546875, -24.213092803955078, 105.25979614257812, 19.427291870117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000650.npy"}
|
|
{"epoch": 0.982615268329554, "step": 651, "batch_size": 64, "mean": 63.42354202270508, "std": 106.38317108154297, "min": -163.32742309570312, "p10": -43.924932098388666, "median": 27.30001449584961, "p90": 208.08021240234376, "max": 330.7984313964844, "pos_frac": 0.703125, "sample": [-2.7888641357421875, 7.79864501953125, 112.87327575683594, -45.087677001953125, 47.201133728027344, -63.51453399658203, -41.21186065673828, 97.11780548095703, 29.76177978515625, 195.0468292236328, 0.571258544921875, -46.07061767578125, -2.7336578369140625, 44.16557312011719, 104.49247741699219, -163.32742309570312, 18.289146423339844, 325.8837585449219, 68.47496795654297, 203.521484375, 208.65426635742188, -7.633079528808594, -33.36247253417969, 146.18760681152344, 22.529830932617188, 33.728946685791016, 76.96115112304688, 110.31768798828125, -6.5340118408203125, 269.6942443847656, 7.677158355712891, 234.30953979492188, 11.923885345458984, -20.68311309814453, 45.26002502441406, 0.2584209442138672, 201.1175537109375, 7.7046966552734375, -30.413734436035156, 55.14883041381836, 217.79046630859375, 9.093368530273438, 146.60755920410156, -5.303874969482422, -16.54684829711914, 206.74075317382812, 241.57286071777344, 181.70587158203125, -71.80748748779297, 191.40255737304688, 18.562707901000977, 8.788970947265625, 2.4609222412109375, 102.2342758178711, 124.31515502929688, 114.81731414794922, 189.3343048095703, -3.9715309143066406, 55.62622833251953, -123.69760131835938, 24.83824920654297, -71.86674499511719, 330.7984313964844, -37.700279235839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000651.npy"}
|
|
{"epoch": 0.9841269841269841, "step": 652, "batch_size": 64, "mean": 60.503578186035156, "std": 131.5577850341797, "min": -237.174072265625, "p10": -86.92575378417968, "median": 25.868249893188477, "p90": 215.88613586425782, "max": 505.85491943359375, "pos_frac": 0.6875, "sample": [14.387989044189453, 188.66650390625, 181.7686767578125, 1.5843124389648438, -40.603111267089844, 198.44036865234375, -5.238393783569336, 122.71342468261719, 175.0813446044922, 9.634330749511719, 217.05294799804688, -163.53509521484375, 5.329532623291016, 1.0344562530517578, 98.59561157226562, 213.16357421875, 31.885879516601562, 107.26549530029297, 109.49268341064453, 19.534439086914062, -64.806884765625, 241.09976196289062, 187.1881866455078, 3.3211288452148438, 20.533374786376953, -51.499534606933594, -237.174072265625, -76.2669677734375, 249.55567932128906, 141.23944091796875, 31.69873046875, -42.933868408203125, 109.0830078125, 0.33312225341796875, 145.91217041015625, -95.2789077758789, 231.13018798828125, 127.90266418457031, -10.223533630371094, -171.2850799560547, -29.5968017578125, 210.90013122558594, -10.3170166015625, 160.69317626953125, -57.0689697265625, -25.653491973876953, 225.76278686523438, 176.54879760742188, -0.09259223937988281, 207.93283081054688, 505.85491943359375, 115.55875396728516, 93.24693298339844, -14.500312805175781, 5.680820465087891, 15.402938842773438, -162.0430450439453, 4.456636428833008, -91.49380493164062, 198.8909149169922, 34.49072265625, 31.203125, -182.78228759765625, 233.37020874023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000652.npy"}
|
|
{"epoch": 0.9856386999244142, "step": 653, "batch_size": 64, "mean": 72.3629150390625, "std": 115.34034729003906, "min": -204.89016723632812, "p10": -73.6104911804199, "median": 52.4454460144043, "p90": 209.60126037597658, "max": 323.88153076171875, "pos_frac": 0.75, "sample": [3.774263381958008, 162.45834350585938, 23.180580139160156, -5.195533752441406, 90.62045288085938, 52.830291748046875, 48.619056701660156, -98.1249771118164, 204.89349365234375, 12.288238525390625, -16.00100326538086, -1.3057708740234375, 157.146240234375, -82.79469299316406, 323.88153076171875, 182.72903442382812, 142.4020233154297, 234.64898681640625, 40.52170944213867, 219.86019897460938, 147.7374267578125, 234.2364044189453, 137.01296997070312, -11.812232971191406, 67.51832580566406, 56.945655822753906, 207.43356323242188, 52.06060028076172, -4.810855865478516, -37.99369812011719, 246.17288208007812, -115.11875915527344, -52.180686950683594, 33.5456657409668, 5.870676040649414, -93.5950927734375, 126.58575439453125, 31.803741455078125, 151.32461547851562, 101.14224243164062, 37.71538543701172, -156.8708038330078, 312.8326721191406, 21.12249755859375, 156.91940307617188, -188.2899627685547, 193.82469177246094, -40.66058349609375, 162.10203552246094, 105.06207275390625, -19.212181091308594, 200.90316772460938, -204.89016723632812, 10.776321411132812, 5.633596420288086, 51.286216735839844, 185.6107177734375, 210.5302734375, 12.646221160888672, 170.46356201171875, 66.53246307373047, 132.0614776611328, 41.04644012451172, 183.76986694335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000653.npy"}
|
|
{"epoch": 0.9871504157218443, "step": 654, "batch_size": 64, "mean": 65.32237243652344, "std": 111.6502456665039, "min": -153.31491088867188, "p10": -72.19144744873046, "median": 46.248565673828125, "p90": 214.6586380004883, "max": 371.4671630859375, "pos_frac": 0.703125, "sample": [17.214317321777344, 251.49940490722656, -153.31491088867188, 131.4575653076172, 39.41973876953125, -105.37381744384766, 217.048095703125, 84.59799194335938, 0.2944984436035156, -87.28977966308594, -12.853515625, -65.21598815917969, 138.63482666015625, 2.7874832153320312, -30.648536682128906, 31.356306076049805, 189.88442993164062, -119.49569702148438, 203.66607666015625, 37.688079833984375, 56.85826110839844, 20.484416961669922, 219.52503967285156, 247.34060668945312, -0.7795085906982422, -22.410606384277344, 12.013790130615234, -21.610939025878906, -59.895240783691406, -4.619945526123047, -6.060516357421875, 170.18844604492188, 371.4671630859375, 116.39865112304688, 19.04949188232422, 222.65133666992188, 235.76046752929688, -38.76641845703125, 56.39543151855469, 112.78414916992188, 130.9947052001953, 139.06114196777344, 160.23329162597656, -123.73641967773438, 21.28240966796875, 20.808685302734375, 209.08323669433594, 195.03944396972656, 55.64996337890625, 114.91921997070312, 118.72029113769531, 207.38543701171875, 0.3999824523925781, 87.3827896118164, 178.10397338867188, -75.01718139648438, -9.218803405761719, 117.22248840332031, 130.06407165527344, 129.87539672851562, 38.29050064086914, 53.077392578125, -65.59806823730469, -131.5233154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000654.npy"}
|
|
{"epoch": 0.9886621315192744, "step": 655, "batch_size": 64, "mean": 100.16602325439453, "std": 103.51094055175781, "min": -109.95391082763672, "p10": -24.29073791503906, "median": 97.41352462768555, "p90": 215.77717895507814, "max": 348.00860595703125, "pos_frac": 0.78125, "sample": [162.31893920898438, 162.52169799804688, -19.799766540527344, 7.889423370361328, 41.12396240234375, 148.12588500976562, 348.00860595703125, 145.38323974609375, 123.5505142211914, 8.436767578125, -38.559356689453125, 188.36331176757812, -2.9924468994140625, -21.86644744873047, -0.32541656494140625, -29.05364990234375, 48.378936767578125, 227.93051147460938, 217.42501831054688, 5.918560028076172, 206.50762939453125, -14.008346557617188, 189.41983032226562, 316.77044677734375, 24.57201385498047, -25.32971954345703, 110.73554229736328, -63.17436599731445, 301.87701416015625, -74.00881958007812, 185.3383331298828, 115.58071899414062, 192.34796142578125, 55.24071502685547, 88.12373352050781, 28.571475982666016, 75.01811981201172, 173.66998291015625, 203.46734619140625, 258.1693115234375, 14.208084106445312, 51.30059814453125, -1.3332977294921875, 236.09896850585938, 177.04542541503906, -53.78001403808594, 33.168731689453125, 165.9856719970703, 185.67877197265625, 209.06910705566406, -11.41501235961914, 64.35580444335938, 211.93222045898438, 64.22071838378906, 55.51222229003906, 126.96980285644531, 117.68814086914062, 106.70331573486328, 193.02072143554688, 82.97195434570312, -109.95391082763672, 80.99247741699219, 132.63333129882812, 205.8846893310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000655.npy"}
|
|
{"epoch": 0.9901738473167044, "step": 656, "batch_size": 64, "mean": 72.91627502441406, "std": 114.01313781738281, "min": -232.1086883544922, "p10": -42.57409896850585, "median": 48.95951843261719, "p90": 230.96691131591797, "max": 288.7178649902344, "pos_frac": 0.765625, "sample": [177.43594360351562, 71.97733306884766, 180.92626953125, 193.43350219726562, 9.835376739501953, 11.831130981445312, -5.136829376220703, -13.02008056640625, -0.0665283203125, -34.560089111328125, -6.719379425048828, 269.4869384765625, -21.995437622070312, 0.536346435546875, 2.2109832763671875, 177.4251251220703, -232.1086883544922, -27.901954650878906, 143.22357177734375, 176.2459716796875, 247.56344604492188, 18.64075469970703, -177.35232543945312, 40.50886917114258, 232.0175018310547, -45.65462875366211, 95.9725112915039, 238.59043884277344, 6.5024871826171875, 164.90408325195312, 158.93222045898438, 1.6167984008789062, 50.85456848144531, 43.281005859375, 57.37950897216797, 151.4385528564453, 40.604400634765625, 30.215011596679688, 219.52635192871094, 95.6253662109375, 91.42927551269531, -131.13351440429688, 35.746360778808594, 241.56382751464844, 19.60076904296875, 228.51553344726562, 105.79632568359375, 56.675376892089844, 17.641597747802734, 186.01644897460938, 195.1192626953125, -35.38619613647461, -65.88824462890625, 233.50511169433594, 215.668212890625, 18.731918334960938, 19.790740966796875, -141.87294006347656, -51.977989196777344, 47.06446838378906, 59.620208740234375, 83.69929504394531, 203.77162170410156, 288.7178649902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000656.npy"}
|
|
{"epoch": 0.9916855631141346, "step": 657, "batch_size": 64, "mean": 88.77348327636719, "std": 124.88670349121094, "min": -219.6553955078125, "p10": -20.297632598876948, "median": 80.96869277954102, "p90": 236.28653869628906, "max": 351.8993225097656, "pos_frac": 0.78125, "sample": [251.33445739746094, 10.53558349609375, 16.636966705322266, 345.68792724609375, 138.5035400390625, 18.338682174682617, 351.8993225097656, 204.43984985351562, 42.397117614746094, -184.46331787109375, 24.204120635986328, 2.7056102752685547, 138.94882202148438, 27.5888671875, 341.6668701171875, 119.472412109375, 74.60742950439453, 187.00286865234375, -162.8035888671875, 248.17507934570312, 27.130340576171875, 148.85585021972656, -171.35675048828125, 17.1491641998291, 66.11486053466797, 188.502197265625, 138.55633544921875, -17.012542724609375, -3.823179244995117, 74.48021697998047, 176.99575805664062, 183.6079559326172, -13.395736694335938, 185.8659210205078, 120.0262680053711, 30.234222412109375, 167.22686767578125, 179.45420837402344, 236.02508544921875, 8.289911270141602, 330.9661560058594, 22.023452758789062, -9.654998779296875, 236.39859008789062, 15.917388916015625, 147.04534912109375, 231.93148803710938, -7.670305252075195, -58.788414001464844, 87.3299560546875, -21.705528259277344, -219.6553955078125, 148.63143920898438, -2.1560497283935547, 27.957014083862305, 143.279541015625, -72.26386260986328, 135.38763427734375, 96.45994567871094, 207.06297302246094, 16.029850006103516, 114.75118255615234, -15.597526550292969, 188.0174560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000657.npy"}
|
|
{"epoch": 0.9931972789115646, "step": 658, "batch_size": 64, "mean": 72.20449829101562, "std": 136.12205505371094, "min": -290.8048400878906, "p10": -74.81546020507811, "median": 65.35357666015625, "p90": 237.2352203369141, "max": 420.58258056640625, "pos_frac": 0.75, "sample": [101.09921264648438, 4.645481109619141, 229.6293487548828, -290.8048400878906, 193.75592041015625, -7.154787063598633, -190.32632446289062, 104.50602722167969, 197.98463439941406, 250.02542114257812, -39.2276611328125, 168.54202270507812, -56.79411315917969, 68.84793090820312, 220.0756072998047, 85.38740539550781, -31.695541381835938, 142.3035430908203, 420.58258056640625, 20.390777587890625, -17.441492080688477, 46.432647705078125, 65.28520965576172, 157.867431640625, 201.82086181640625, 215.1334686279297, -200.9046630859375, 164.09054565429688, 115.41009521484375, 33.7181396484375, 12.563879013061523, -1.7604026794433594, 64.83850860595703, 128.01742553710938, 2.4737701416015625, 65.42194366455078, 267.62847900390625, -186.8628387451172, 103.9623031616211, 32.678192138671875, 199.2264404296875, 4.770774841308594, 193.38172912597656, 240.341064453125, -2.5565013885498047, 85.79149627685547, -82.53889465332031, 241.86627197265625, 7.869529724121094, 87.23907470703125, 38.59283447265625, 1.032461166381836, -46.089942932128906, -150.1783447265625, 51.754905700683594, 155.75149536132812, 8.471759796142578, -192.27456665039062, 294.9561462402344, -1.5165557861328125, 314.3727111816406, 73.20468139648438, 229.98825073242188, 5.484891891479492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000658.npy"}
|
|
{"epoch": 0.9947089947089947, "step": 659, "batch_size": 64, "mean": 57.31332015991211, "std": 118.62980651855469, "min": -203.156494140625, "p10": -114.78535079956053, "median": 36.246782302856445, "p90": 210.12653961181644, "max": 298.5504455566406, "pos_frac": 0.671875, "sample": [34.15936279296875, 105.44392395019531, 215.35292053222656, 158.3723907470703, -5.056884765625, 298.5504455566406, 166.91796875, 123.00945281982422, 9.97429084777832, 212.6549835205078, 272.3943176269531, -9.473190307617188, 30.066009521484375, 38.33420181274414, -0.7770118713378906, -10.8438720703125, 153.396240234375, -172.0843505859375, 16.712644577026367, -34.96058654785156, 193.60977172851562, -103.8111343383789, 78.69277954101562, 82.04159545898438, 2.119779586791992, -3.1788406372070312, 14.736833572387695, 30.729398727416992, -2.912912368774414, 93.32057189941406, 52.29161071777344, 58.874229431152344, 201.4615020751953, 131.81297302246094, 124.98328399658203, -11.677543640136719, 204.22683715820312, 118.54287719726562, 197.2347412109375, 3.9288597106933594, 80.56851196289062, -203.156494140625, -119.48858642578125, 197.83360290527344, 5.902730941772461, -4.93743896484375, -10.267791748046875, 214.16900634765625, 148.66021728515625, -71.25605010986328, -1.284088134765625, 187.50772094726562, -197.91702270507812, -126.42481231689453, 224.2953643798828, 27.074920654296875, 12.224311828613281, -193.4043731689453, 238.64703369140625, 77.04000854492188, -22.44263458251953, 169.21592712402344, 98.68545532226562, -132.36367797851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000659.npy"}
|
|
{"epoch": 0.9962207105064248, "step": 660, "batch_size": 64, "mean": 90.0988540649414, "std": 117.20579528808594, "min": -197.50265502929688, "p10": -55.36791381835937, "median": 85.80274963378906, "p90": 209.87735137939455, "max": 546.627685546875, "pos_frac": 0.78125, "sample": [205.36143493652344, 55.41553497314453, 96.59947204589844, -59.789886474609375, 77.81593322753906, -93.30547332763672, -52.89129638671875, 202.43223571777344, -197.50265502929688, -11.485198974609375, 61.3232536315918, 225.183837890625, 44.4302864074707, 128.19361877441406, 14.131271362304688, 321.6783752441406, 202.02825927734375, 141.20753479003906, 27.54902458190918, -71.11183166503906, 139.78245544433594, 121.14306640625, 33.19506072998047, 62.151084899902344, 142.38059997558594, 194.19517517089844, -5.6935272216796875, 239.98208618164062, 108.75665283203125, -56.4293212890625, 38.37274169921875, 201.881103515625, 546.627685546875, 97.83064270019531, -7.493625640869141, -87.15090942382812, 19.349220275878906, -4.188957214355469, 166.68284606933594, 102.93354797363281, 55.555152893066406, 7.851995468139648, 232.69369506835938, 167.23167419433594, 188.63827514648438, 196.18838500976562, -68.00546264648438, 42.40074920654297, 185.3695831298828, 142.31222534179688, -3.980182647705078, 265.5700988769531, 18.848365783691406, 5.068277359008789, 202.2081756591797, 82.11752319335938, 89.48797607421875, 140.56051635742188, 6.73333740234375, -9.84844970703125, 36.72944641113281, 104.84979248046875, 94.36134338378906, 211.812744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000660.npy"}
|
|
{"epoch": 0.9977324263038548, "step": 661, "batch_size": 64, "mean": 57.130775451660156, "std": 127.08076477050781, "min": -297.761474609375, "p10": -113.25373764038085, "median": 44.08030700683594, "p90": 201.9024658203125, "max": 359.5494384765625, "pos_frac": 0.71875, "sample": [2.104400634765625, -1.873443603515625, -115.28018188476562, -168.9081573486328, 126.91316223144531, 164.57025146484375, 183.32749938964844, 9.36370849609375, -297.761474609375, 186.0807647705078, -163.1864776611328, 14.209808349609375, -66.41194152832031, 194.66505432128906, 221.4715576171875, 126.46416473388672, 219.47645568847656, 91.41456604003906, 359.5494384765625, 59.85884094238281, -188.8641815185547, 104.21455383300781, 160.90756225585938, -119.84536743164062, -6.720844268798828, 40.82637023925781, 34.638267517089844, -71.601806640625, 1.9812850952148438, 7.423866271972656, 68.35134887695312, 79.1281967163086, -175.41477966308594, 29.46497344970703, 173.78338623046875, -1.2333335876464844, 81.64756774902344, 201.344482421875, 25.565296173095703, 176.78147888183594, 38.18841552734375, 47.33424377441406, -63.95744323730469, 174.83155822753906, 194.34136962890625, 92.11039733886719, 249.83639526367188, 17.339746475219727, 5.986120223999023, 125.09363555908203, -108.5253677368164, -8.151344299316406, 34.85008239746094, -50.81438446044922, 60.71756362915039, 141.3055419921875, 115.14969635009766, 202.1416015625, 102.22854614257812, 204.6487579345703, 18.69635009765625, -18.275634765625, -32.96080780029297, 345.82843017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000661.npy"}
|