Files

331 lines
955 KiB
Plaintext
Raw Permalink Normal View History

{"epoch": 0.0, "step": 1, "batch_size": 128, "mean": 0.012140914797782898, "std": 0.2948089838027954, "min": -1.7206573486328125, "p10": -0.24997615814208982, "median": 0.005645751953125, "p90": 0.3128910064697265, "max": 0.758941650390625, "pos_frac": 0.515625, "sample": [-0.20262718200683594, 0.047306060791015625, 0.05846214294433594, -0.003662109375, -0.10196113586425781, -0.12421798706054688, -0.09057235717773438, -0.16971969604492188, -0.17220306396484375, 0.009609222412109375, -0.012847900390625, -0.04823112487792969, 0.1595478057861328, -0.07532501220703125, 0.600555419921875, -0.14254188537597656, -0.5367832183837891, 0.09640121459960938, -0.1319103240966797, 0.00080108642578125, 0.4095001220703125, -0.12982177734375, 0.4865264892578125, 0.29351806640625, 0.21588897705078125, 0.00791168212890625, 0.11423301696777344, -0.92767333984375, 0.07368659973144531, -0.35713958740234375, 0.25925445556640625, -0.18205833435058594, 0.1769084930419922, 0.319793701171875, -0.11501312255859375, 0.130828857421875, -0.0652313232421875, -0.05571937561035156, 0.48358154296875, -0.18517303466796875, 0.19147491455078125, -0.0292816162109375, 0.1183013916015625, 0.009857177734375, -0.15608596801757812, -0.106842041015625, 0.2012939453125, 0.4439544677734375, 0.17156982421875, 0.0055999755859375, 0.131622314453125, 0.18282318115234375, 0.3688812255859375, -0.09533882141113281, 0.0516204833984375, 0.14397430419921875, -0.09282684326171875, 0.223388671875, -0.23106002807617188, -0.11232757568359375, 0.13279342651367188, -1.7206573486328125, -0.20966339111328125, -0.281219482421875, 0.277740478515625, -0.5831203460693359, 0.035003662109375, 0.416534423828125, 0.18473434448242188, -0.26101112365722656, -0.1896953582763672, -0.17513656616210938, 0.20343017578125, 0.37976837158203125, 0.02338409423828125, -0.4322509765625, 0.12488555908203125, -0.04070281982421875, 0.09434318542480469, 0.20465087890625, -0.0114593505859375, 0.30808258056640625, 0.1165313720703125, -0.3171119689941406, -0.03559112548828125, 0.08214569091796875, -0.0316314697265625, -0.2771148681640625, -0.18490219116210938, -0.3084869384765625, 0.11998558044433594, 0.2460784912109375, 0.27088165283203125, 0.06439590454101562, 0.15772628784179688, 0.20970916748046875, -0.039905548095703125, 0.12369918823242188, 0.3099327087402344, -0.221343994140625, 0.23291015625, -0.0811920166015625, -0.18535995483398438, -0.040523529052734375, -0.0564727783203125, 0.2216968536376953, -0.06262969970703125, -0.0946502685546875, -0.1340351104736328, -0.24119949340820312, 0.0056915283203125, -0.07728958129882812, -0.04604339599609375, -0.47272491455078125, -0.0768280029296875, 0.12450408935546875, -0.187164306640625, 0.22954177856445312, 0.758941650390625, 0.1110687255859375, -0.33504486083984375, 0.14794921875, 0.71685791015625, 0.4245147705078125, 0.114776611328125, -0.0409393310546875, 0.5430145263671875, -0.24524688720703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000001.npy"}
{"epoch": 0.0030303030303030303, "step": 2, "batch_size": 128, "mean": -0.027733325958251953, "std": 0.3636137843132019, "min": -1.064208984375, "p10": -0.452655029296875, "median": -0.01790332794189453, "p90": 0.3513153076171875, "max": 1.1855316162109375, "pos_frac": 0.4765625, "sample": [-0.347198486328125, 0.07550621032714844, -0.583251953125, -0.383087158203125, -0.2957763671875, 0.26902008056640625, 0.016143798828125, 0.3489341735839844, -0.26019287109375, 0.28823089599609375, -0.17009735107421875, 0.2962493896484375, 0.5048065185546875, 0.18154525756835938, -0.12908935546875, -0.6042633056640625, 0.03460693359375, 0.13628387451171875, -0.10992431640625, 0.03320503234863281, -0.7920608520507812, 0.2242431640625, -0.23288917541503906, -0.33751678466796875, 0.42723846435546875, 0.16233062744140625, -0.295196533203125, -0.0619049072265625, 0.125, 0.07958221435546875, -0.018245697021484375, -0.37758636474609375, -1.064208984375, 0.0501861572265625, -0.06108856201171875, 1.169647216796875, 0.31884765625, -0.03764533996582031, -0.30340576171875, 0.32808685302734375, 0.10391616821289062, -0.2685375213623047, -0.600830078125, -0.62860107421875, 0.055267333984375, 0.08850860595703125, -0.017560958862304688, 0.4547309875488281, 0.3503265380859375, -0.13621902465820312, -0.319915771484375, -0.029632568359375, 0.1598358154296875, -0.6849441528320312, -0.27793121337890625, 0.3574104309082031, -0.15280532836914062, 0.6283111572265625, -0.053279876708984375, 0.67071533203125, -0.18796539306640625, -0.20824432373046875, 0.07111358642578125, -0.08631134033203125, -0.9680633544921875, 0.08416748046875, 0.3536224365234375, -0.4248046875, 0.02069091796875, -0.09970474243164062, -0.0171966552734375, 0.08079147338867188, 0.0200042724609375, -0.26520538330078125, 0.2434844970703125, 0.2361297607421875, -0.4506683349609375, 0.21982574462890625, 0.18981170654296875, -0.085723876953125, 0.021396636962890625, -0.2599067687988281, 0.1833648681640625, -0.4572906494140625, -0.129669189453125, -0.25933074951171875, 1.1855316162109375, -0.1389617919921875, -0.2782745361328125, -0.1563091278076172, 0.2654380798339844, 0.15247726440429688, -0.021602630615234375, 0.2338409423828125, -0.53546142578125, 0.3074493408203125, -0.20336151123046875, 0.32056427001953125, 0.20119476318359375, -0.02307891845703125, -0.6876373291015625, -0.0281829833984375, 0.22972869873046875, 0.2485504150390625, -0.103271484375, 0.00690460205078125, 0.00630950927734375, -0.18467330932617188, -0.4179420471191406, 0.5755233764648438, 0.725921630859375, 0.14903640747070312, -0.06787872314453125, -0.09696006774902344, -0.4033966064453125, -0.46425628662109375, -0.8170547485351562, 0.4462738037109375, -0.28272247314453125, -0.0017528533935546875, 0.57232666015625, -0.2147064208984375, 0.11790847778320312, 0.11786460876464844, 0.0364227294921875, 0.19788360595703125, -0.25130462646484375, -0.3963737487792969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000002.npy"}
{"epoch": 0.006060606060606061, "step": 3, "batch_size": 128, "mean": 0.016473382711410522, "std": 0.35881996154785156, "min": -1.189422607421875, "p10": -0.3745079040527344, "median": 0.02011585235595703, "p90": 0.37170333862304683, "max": 1.57086181640625, "pos_frac": 0.5390625, "sample": [0.1953277587890625, -0.11093902587890625, 0.11671066284179688, 0.6243896484375, -0.268157958984375, -1.189422607421875, 0.0043621063232421875, 0.10066986083984375, -0.25548553466796875, 0.0801849365234375, -0.246673583984375, 0.008457183837890625, 0.017477035522460938, -0.06896591186523438, -0.1822967529296875, 0.2126312255859375, 0.12566375732421875, -0.0055294036865234375, 0.161376953125, -0.058643341064453125, 0.04323005676269531, -0.1472930908203125, 0.1427898406982422, 0.31569480895996094, -0.7194595336914062, -0.5847244262695312, 0.15411949157714844, 0.23480987548828125, 0.16907501220703125, -0.193206787109375, 1.57086181640625, 0.206695556640625, 0.07703399658203125, 0.26825714111328125, -0.15854644775390625, -0.0088348388671875, -0.09203338623046875, -0.04517173767089844, -0.24820327758789062, 0.12875747680664062, 0.2192535400390625, 0.8866729736328125, -0.0080718994140625, 0.3874969482421875, -0.875762939453125, -0.20846939086914062, -0.4817047119140625, -0.3498687744140625, 0.04079437255859375, -0.1524810791015625, 0.32175445556640625, -0.34964752197265625, -0.16538238525390625, -0.05237770080566406, 0.365081787109375, 0.010223388671875, -0.291778564453125, -0.07764434814453125, 0.20673751831054688, 0.022754669189453125, 0.196929931640625, -0.02715301513671875, 0.08840751647949219, 0.1339569091796875, 0.052791595458984375, 0.0662078857421875, -0.0145721435546875, 0.22220611572265625, 0.4357948303222656, -0.30269622802734375, -0.08761978149414062, 0.055957794189453125, 0.1968994140625, -0.0015716552734375, 0.4849395751953125, -0.05969047546386719, -0.4577789306640625, 0.576446533203125, -0.2562294006347656, 0.07265281677246094, -0.5467376708984375, 0.5080718994140625, 0.25646209716796875, -0.31299591064453125, -0.2668609619140625, -0.3156089782714844, 0.01271820068359375, 0.0305938720703125, -0.1791553497314453, 0.3213386535644531, -0.4282379150390625, -0.7055435180664062, -0.13919830322265625, 0.1646137237548828, -0.0220184326171875, -0.12863731384277344, -0.2929229736328125, -0.2624034881591797, -0.3826942443847656, 1.53289794921875, -0.4286842346191406, 0.5581817626953125, 0.14642333984375, -0.0147552490234375, 0.043392181396484375, -0.37397003173828125, 0.02336883544921875, 0.107147216796875, 0.1698322296142578, 0.43267822265625, -0.0542449951171875, 0.25552940368652344, 0.456268310546875, 0.1810283660888672, -0.163604736328125, 0.13339996337890625, 0.38715362548828125, -0.19912147521972656, -0.4121856689453125, 0.22916412353515625, 0.06548881530761719, 0.100921630859375, 0.358856201171875, -0.375762939453125, -0.0005035400390625, 0.06954574584960938, 0.09642219543457031, 0.2744941711425781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000003.npy"}
{"epoch": 0.00909090909090909, "step": 4, "batch_size": 128, "mean": -0.02571679651737213, "std": 0.318131685256958, "min": -1.18109130859375, "p10": -0.3619098663330078, "median": -0.023317337036132812, "p90": 0.34924316406249994, "max": 0.835357666015625, "pos_frac": 0.4609375, "sample": [0.1100311279296875, -0.149261474609375, -0.7498703002929688, 0.1876983642578125, 0.0008029937744140625, 0.06043434143066406, 0.1810894012451172, 0.08904266357421875, -0.30820465087890625, 0.44039154052734375, -0.14705276489257812, 0.366119384765625, 0.12947845458984375, -0.05316162109375, -0.3562164306640625, 0.100982666015625, -0.2536659240722656, -0.3919792175292969, -0.0251312255859375, -0.22472381591796875, -0.1230010986328125, 0.2995872497558594, -0.11750411987304688, 0.13982200622558594, -0.33136749267578125, -0.3751678466796875, -0.005039215087890625, 0.045623779296875, -0.1705455780029297, 0.22224044799804688, 0.06887435913085938, 0.2006072998046875, 0.6383438110351562, -0.247467041015625, -0.021503448486328125, -0.2280406951904297, -0.42540740966796875, 0.56201171875, -0.2851219177246094, -0.08722305297851562, -0.1662445068359375, 0.054595947265625, 0.08603286743164062, -0.084686279296875, -0.64703369140625, 0.07387351989746094, -0.136199951171875, 0.27071380615234375, 0.1816864013671875, -0.15228652954101562, 0.28382110595703125, -0.109161376953125, -0.00543212890625, -0.089324951171875, 0.21703338623046875, -0.02880096435546875, -0.30458831787109375, -0.05594635009765625, -0.6533203125, 0.342010498046875, -0.24880218505859375, -0.2957916259765625, -0.04640960693359375, -0.10093116760253906, 0.07748031616210938, -0.26007080078125, 0.497344970703125, 0.5150604248046875, 0.39604949951171875, -0.139495849609375, 0.79437255859375, 0.30733489990234375, 0.2017822265625, -0.29230690002441406, -0.2721138000488281, -0.643585205078125, 0.08994483947753906, -0.24440765380859375, 0.0658111572265625, 0.2797088623046875, -0.24274635314941406, -0.3838081359863281, -0.64794921875, 0.2288970947265625, 0.044506072998046875, 0.15611839294433594, -0.25006103515625, -0.3292045593261719, 0.22818946838378906, 0.46126556396484375, -0.1951580047607422, 0.0150909423828125, 0.19655227661132812, 0.067535400390625, -1.18109130859375, 0.05767059326171875, -0.14032745361328125, 0.0897216796875, -0.20281982421875, -0.008758544921875, -0.03731346130371094, -0.13579940795898438, -0.1249237060546875, 0.1018218994140625, -0.22521209716796875, -0.18003273010253906, 0.25019073486328125, -0.06679534912109375, -0.3562278747558594, -0.11825180053710938, 0.06943130493164062, -0.3101348876953125, 0.138824462890625, 0.08867454528808594, 0.5341110229492188, -0.1385498046875, -0.270355224609375, 0.18764114379882812, -0.541168212890625, 0.6428756713867188, 0.019083023071289062, -0.0062255859375, 0.835357666015625, -0.5165481567382812, 0.1643810272216797, 0.1437530517578125, -0.5816116333007812, 0.6533889770507812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000004.npy"}
{"epoch": 0.012121212121212121, "step": 5, "batch_size": 128, "mean": 0.03400611877441406, "std": 0.32898062467575073, "min": -0.8916015625, "p10": -0.3519100189208984, "median": -0.0052490234375, "p90": 0.4689666748046874, "max": 1.06646728515625, "pos_frac": 0.4921875, "sample": [-0.472564697265625, 0.500579833984375, -0.3782157897949219, -0.022064208984375, -0.08737754821777344, -0.1262359619140625, -0.23931884765625, 0.1421966552734375, 0.4411773681640625, 0.029205322265625, -0.21353912353515625, -0.03769874572753906, -0.0049896240234375, -0.11219978332519531, 0.3468170166015625, 0.4158973693847656, 0.2810325622558594, 0.2821807861328125, -0.1152801513671875, -0.25893402099609375, 0.21762847900390625, -0.0436553955078125, -0.213592529296875, -0.46404266357421875, 0.05741310119628906, -0.3082427978515625, 0.10904693603515625, -0.1406402587890625, 0.05016517639160156, 0.14453125, -0.0063991546630859375, -0.2825164794921875, -0.8916015625, -0.19842529296875, -0.1244659423828125, -0.2044696807861328, 0.6355667114257812, 0.19117355346679688, -0.05828857421875, 0.6255264282226562, -0.0441741943359375, 0.0802001953125, -0.07746124267578125, -0.0396881103515625, -0.2379608154296875, 0.10730743408203125, -0.49311065673828125, 0.267791748046875, 0.21849822998046875, 0.05204963684082031, -0.20330810546875, -0.255645751953125, -0.031116485595703125, 0.03226470947265625, 0.0810546875, 0.2548694610595703, -0.1502666473388672, -0.0055084228515625, -0.08254623413085938, 0.25782012939453125, -0.572265625, 0.84124755859375, -0.03641510009765625, -0.104278564453125, 1.06646728515625, 0.044342041015625, 0.04643058776855469, -0.1444721221923828, 0.279327392578125, 0.9668426513671875, 0.2922859191894531, -0.2772674560546875, -0.08211708068847656, 0.0461273193359375, 0.16485595703125, -0.13889312744140625, -0.29476165771484375, 0.1451568603515625, 0.5284881591796875, 0.5790481567382812, 0.09008407592773438, 0.565673828125, 0.2196826934814453, 0.222198486328125, -0.16826629638671875, 0.2140636444091797, -0.3427467346191406, -0.06383895874023438, -0.6875762939453125, 0.340362548828125, 0.3858795166015625, -0.3274383544921875, -0.291412353515625, 0.1732635498046875, 0.14754104614257812, -0.04360198974609375, 0.6036224365234375, -0.01043701171875, 0.134613037109375, -0.17689132690429688, 0.30733489990234375, 0.4905853271484375, -0.1818695068359375, 0.2763042449951172, 0.7674789428710938, 0.4597015380859375, -0.022674560546875, -0.4864959716796875, 0.15582275390625, 0.120391845703125, -0.0336456298828125, -0.253204345703125, -0.3773231506347656, 0.2465991973876953, 0.014739990234375, 0.36330413818359375, 0.08609771728515625, 0.30229949951171875, -0.0376129150390625, -0.108917236328125, -0.516693115234375, -0.11572456359863281, 0.12114715576171875, -0.5917816162109375, 0.5012359619140625, 0.17479705810546875, -0.4671974182128906, -0.373291015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000005.npy"}
{"epoch": 0.015151515151515152, "step": 6, "batch_size": 128, "mean": -0.011708170175552368, "std": 0.3694382607936859, "min": -1.3245697021484375, "p10": -0.430582046508789, "median": 0.014140129089355469, "p90": 0.3820281982421875, "max": 0.9107093811035156, "pos_frac": 0.5234375, "sample": [-0.5032119750976562, -0.07610130310058594, 0.21316146850585938, -0.13498687744140625, -0.1297149658203125, -0.2512359619140625, 0.0046539306640625, -0.041797637939453125, -0.18292617797851562, -0.0968170166015625, 0.05950927734375, -0.2640533447265625, 0.3440971374511719, 0.1828155517578125, -0.8615951538085938, -0.20994186401367188, -0.13665771484375, -0.6168975830078125, 0.36803245544433594, 0.487335205078125, 0.08661651611328125, 0.4933319091796875, 0.00127410888671875, 0.9107093811035156, 0.7153167724609375, 0.103790283203125, 0.15887069702148438, 0.36041259765625, 0.043292999267578125, -0.4591827392578125, 0.5551261901855469, 0.064971923828125, 0.3184814453125, -1.3245697021484375, 0.2493438720703125, -0.046722412109375, -0.25006103515625, -0.25241851806640625, 0.28223228454589844, -0.01544189453125, 0.2152080535888672, -0.28751373291015625, -0.13881683349609375, 0.023309707641601562, 0.274688720703125, -0.13970947265625, 0.4587669372558594, 0.19461822509765625, -0.9422149658203125, -0.33563232421875, -0.3302497863769531, 0.38530731201171875, -0.17358779907226562, 0.03459930419921875, -0.26708984375, 0.14113807678222656, -0.13031387329101562, 0.09718132019042969, 0.13244247436523438, 0.4387664794921875, -0.0279541015625, 0.2732391357421875, 0.4253082275390625, 0.0758819580078125, 0.13071060180664062, 0.44317626953125, 0.05921745300292969, -0.16869735717773438, 0.05384063720703125, 0.3552703857421875, 0.09975624084472656, 0.1949310302734375, 0.8604354858398438, -0.45131683349609375, -0.69183349609375, -0.23285675048828125, 0.24646759033203125, -0.18353271484375, 0.0641326904296875, -0.46514892578125, -0.3828773498535156, -0.821929931640625, 0.07935523986816406, 0.013345718383789062, -0.2224273681640625, 0.060611724853515625, -0.11941146850585938, -0.13419342041015625, 0.38062286376953125, -0.6657791137695312, -0.326263427734375, -0.07549476623535156, 0.11554527282714844, -0.921478271484375, 0.824371337890625, 0.2263031005859375, -0.14209747314453125, 0.24740219116210938, -0.06634521484375, -1.1826553344726562, -0.040130615234375, 0.6510009765625, -0.18317794799804688, -0.406890869140625, 0.1032867431640625, -0.16378021240234375, -0.05753898620605469, 0.054096221923828125, -0.15742874145507812, 0.254150390625, 0.0781402587890625, -0.098358154296875, 0.26178741455078125, -0.00029754638671875, 0.333160400390625, 0.2207317352294922, -0.4216957092285156, 0.16886138916015625, -0.20232009887695312, 0.014934539794921875, -0.1868133544921875, -0.08404541015625, 0.29032325744628906, 0.1807403564453125, -0.08256912231445312, 0.2815704345703125, 0.16542434692382812, -0.21937942504882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000006.npy"}
{"epoch": 0.01818181818181818, "step": 7, "batch_size": 128, "mean": 0.013714149594306946, "std": 0.3295344114303589, "min": -1.1622390747070312, "p10": -0.36507568359375, "median": 0.02463245391845703, "p90": 0.35767021179199215, "max": 0.9888916015625, "pos_frac": 0.5390625, "sample": [-0.577850341796875, -0.02059173583984375, -0.5868072509765625, -0.49655914306640625, 0.9888916015625, 0.08537673950195312, -0.40277862548828125, 0.054035186767578125, 0.1595458984375, 0.22603607177734375, -0.2086944580078125, -0.2903900146484375, -0.39475250244140625, 0.9160194396972656, 0.4582672119140625, -0.2390594482421875, 0.0701904296875, 0.2598991394042969, -0.074127197265625, 0.1738872528076172, 0.12005996704101562, 0.18137359619140625, 0.16429519653320312, 0.26688385009765625, 0.11682891845703125, 0.849884033203125, 0.4042205810546875, 0.02519989013671875, 0.024065017700195312, -0.05176544189453125, -0.09654998779296875, 0.33055877685546875, -0.12725830078125, 0.02846527099609375, 0.07471084594726562, 0.35083961486816406, -0.10206222534179688, 0.17502593994140625, -0.15496444702148438, -0.10483741760253906, 0.17599868774414062, 0.2806434631347656, 0.2272796630859375, 0.03928184509277344, 0.121002197265625, -0.48621368408203125, 0.17433929443359375, -0.08374786376953125, 0.12581253051757812, -0.2764129638671875, -1.1622390747070312, 0.03197669982910156, -0.24208831787109375, 0.20001983642578125, 0.00933074951171875, -0.06157684326171875, -1.0706634521484375, 0.02390289306640625, 0.2379913330078125, 0.3654594421386719, -0.3306732177734375, -0.105499267578125, -0.1316661834716797, -0.3853034973144531, -0.1990814208984375, 0.6842803955078125, 0.6046066284179688, 0.24753570556640625, -0.28438377380371094, 0.17551231384277344, 0.183319091796875, -0.3577880859375, -0.03731536865234375, -0.10605621337890625, 0.3836517333984375, 0.06727981567382812, -0.16741561889648438, 0.0255279541015625, -0.21610260009765625, 0.12972640991210938, 0.20895767211914062, -0.25439453125, -0.3075218200683594, -0.006103515625, -0.12235260009765625, 0.11255645751953125, 0.026586532592773438, 0.27152252197265625, 0.059986114501953125, -0.12558746337890625, 0.3018341064453125, 0.8043060302734375, -0.005626678466796875, 0.35433197021484375, 0.1819915771484375, 0.056610107421875, 0.20847702026367188, -0.07535362243652344, -0.04207611083984375, -0.0828857421875, -0.03600120544433594, 0.001834869384765625, 0.46435546875, 0.009096145629882812, 0.33946990966796875, -0.1731414794921875, -0.382080078125, 0.06842803955078125, -0.4335899353027344, 0.0341796875, 0.32683563232421875, -0.200347900390625, 0.13330078125, 0.15645790100097656, 0.36893463134765625, -0.088134765625, -0.30365753173828125, -0.15343093872070312, -0.07164764404296875, -0.9055938720703125, 0.4542694091796875, -0.006587982177734375, 0.14685821533203125, -0.14537620544433594, -0.07956314086914062, -0.08086395263671875, -0.12755584716796875, -0.5120582580566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000007.npy"}
{"epoch": 0.021212121212121213, "step": 8, "batch_size": 128, "mean": 0.07173047959804535, "std": 0.32933151721954346, "min": -0.7587203979492188, "p10": -0.3383903503417969, "median": 0.07525157928466797, "p90": 0.3748088836669921, "max": 1.5772705078125, "pos_frac": 0.65625, "sample": [0.3376426696777344, -0.195892333984375, 0.0651092529296875, -0.24248695373535156, 0.1867828369140625, -0.6948013305664062, 0.7792510986328125, 0.33547210693359375, 0.2726593017578125, -0.3473358154296875, 0.2364482879638672, 0.07171630859375, 0.06912994384765625, 0.3643455505371094, -0.20970916748046875, 0.2152099609375, 0.0966339111328125, -0.0503692626953125, 0.0526885986328125, 0.12570571899414062, -0.4575347900390625, 0.6424102783203125, 0.16850852966308594, -0.3645477294921875, -0.25460052490234375, 0.2527923583984375, 0.172637939453125, -0.10943794250488281, 0.010807037353515625, 0.01222991943359375, 0.09541511535644531, -0.6625595092773438, 0.09613037109375, 0.253814697265625, 0.1808624267578125, 0.019136428833007812, -0.0366973876953125, -0.016698837280273438, 0.274932861328125, 0.1530628204345703, 0.07239913940429688, -0.16851806640625, -0.3714752197265625, 0.8678436279296875, -0.17572021484375, 0.2477874755859375, 0.23983383178710938, -0.003910064697265625, 0.30218505859375, 0.26406097412109375, -0.20874786376953125, 0.44823455810546875, 0.2441272735595703, 0.5046234130859375, 0.28618621826171875, -0.202911376953125, 0.623809814453125, -0.43445587158203125, -0.047580718994140625, 0.11916542053222656, 0.24570465087890625, -0.045654296875, -0.10328292846679688, 0.12999534606933594, 0.39922332763671875, 0.22686386108398438, 0.007442474365234375, -0.1585540771484375, 0.22946929931640625, -0.1257495880126953, 0.04480934143066406, 0.23248291015625, 0.4295921325683594, 0.08925056457519531, 0.059177398681640625, 1.5772705078125, 0.362548828125, -0.08036041259765625, -0.4967193603515625, 0.25290679931640625, -0.07038116455078125, 0.750701904296875, -0.0625, -0.31342315673828125, -0.3978118896484375, -0.2397308349609375, -0.18712997436523438, 0.19026565551757812, 0.01081085205078125, 0.0624847412109375, 0.07866859436035156, 0.06928443908691406, 0.10645294189453125, 0.7003097534179688, -0.7587203979492188, -0.12870025634765625, 0.4075775146484375, -0.04249000549316406, 0.093505859375, 0.20511627197265625, -0.33455657958984375, 0.093475341796875, 0.20199966430664062, 0.0128631591796875, 0.03391838073730469, 0.8631591796875, 0.20136260986328125, 0.3415069580078125, 0.11859130859375, 0.2729949951171875, 0.30672454833984375, -0.5532684326171875, -0.16993331909179688, -0.14354515075683594, 0.036235809326171875, -0.009918212890625, -0.536163330078125, 0.17308807373046875, 0.07683181762695312, 0.107574462890625, 0.19232940673828125, 0.3307342529296875, 0.080352783203125, 0.06913375854492188, 0.013387680053710938, 0.07367134094238281, -0.3031768798828125, -0.6243820190429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000008.npy"}
{"epoch": 0.024242424242424242, "step": 9, "batch_size": 128, "mean": -0.04485127329826355, "std": 0.31825387477874756, "min": -1.0893173217773438, "p10": -0.4090057373046875, "median": -0.012447357177734375, "p90": 0.30916786193847656, "max": 0.802642822265625, "pos_frac": 0.484375, "sample": [-0.06012535095214844, -0.0601654052734375, -0.19072723388671875, 0.1687183380126953, 0.4800262451171875, 0.05078125, -0.16912841796875, 0.32370758056640625, 0.5394134521484375, 0.3728752136230469, 0.3130531311035156, 0.30750274658203125, 0.19654083251953125, -0.11300849914550781, -1.029144287109375, -0.14994049072265625, -0.7563018798828125, -0.21668243408203125, -0.32781982421875, -0.2750244140625, -0.3522453308105469, -0.6936531066894531, -0.3474578857421875, -0.42889404296875, 0.009595870971679688, 0.260223388671875, -0.43891143798828125, -0.046417236328125, -0.065399169921875, 0.09419631958007812, 0.20965576171875, 0.2324981689453125, -0.3279857635498047, -0.01074981689453125, 0.5290679931640625, 0.17743301391601562, -0.009004592895507812, 0.07089996337890625, -0.3316326141357422, 0.06679916381835938, 0.5101165771484375, 0.009695053100585938, -0.14266204833984375, -0.1501789093017578, 0.43797874450683594, -0.36663055419921875, 0.0047149658203125, 0.012420654296875, -0.708953857421875, -0.17781829833984375, 0.16768646240234375, -0.23976516723632812, -0.17566299438476562, -0.7542800903320312, -0.331817626953125, -0.0794219970703125, -0.0269927978515625, -0.20507431030273438, -0.20050048828125, -0.17165374755859375, 0.07261276245117188, -0.08978271484375, 0.0116424560546875, 0.12248992919921875, -0.06542205810546875, 0.3724250793457031, 0.09460639953613281, 0.03483009338378906, 0.5543994903564453, 0.1042327880859375, 0.06702232360839844, -0.0748748779296875, 0.17632675170898438, 0.0428009033203125, -0.0141448974609375, -0.5107879638671875, 0.2611885070800781, 0.28530120849609375, -0.34363555908203125, 0.3859214782714844, 0.05007171630859375, -0.400482177734375, -0.10072708129882812, 0.0630035400390625, 0.3037395477294922, -0.37775611877441406, 0.2233428955078125, -0.3963775634765625, 0.205108642578125, 0.6331100463867188, -0.062129974365234375, -0.16230392456054688, 0.2090778350830078, -0.23807716369628906, 0.2593231201171875, 0.02021026611328125, -0.602508544921875, -0.328216552734375, -0.1238861083984375, -0.2269287109375, 0.1004791259765625, 0.1938934326171875, 0.075958251953125, -0.4623565673828125, 0.2718181610107422, 0.12533187866210938, 0.802642822265625, 0.06578445434570312, -0.28670501708984375, -0.15021705627441406, 0.08237075805664062, -0.24615478515625, -0.23264312744140625, -1.0893173217773438, 0.20766448974609375, -0.523284912109375, -0.2288360595703125, -0.07494735717773438, 0.016756057739257812, -0.1945056915283203, -0.1815166473388672, 0.06696701049804688, 0.21007347106933594, 0.21276473999023438, 0.283782958984375, -0.1692047119140625, 0.023143768310546875, -0.48921966552734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000009.npy"}
{"epoch": 0.02727272727272727, "step": 10, "batch_size": 128, "mean": 0.00819852203130722, "std": 0.2890770733356476, "min": -1.155792236328125, "p10": -0.31790885925292967, "median": 0.02691173553466797, "p90": 0.32898559570312497, "max": 0.64300537109375, "pos_frac": 0.5625, "sample": [-0.057220458984375, -0.12248992919921875, 0.079620361328125, 0.296142578125, 0.21929931640625, -0.244232177734375, 0.20775604248046875, 0.205718994140625, -0.11515617370605469, 0.0233306884765625, 0.045963287353515625, 0.22117996215820312, 0.04212188720703125, -0.6590805053710938, 0.16474151611328125, 0.2089996337890625, 0.64300537109375, -0.04034423828125, -0.14068603515625, 0.037994384765625, -0.13799285888671875, -0.2165985107421875, 0.01869964599609375, -0.16228103637695312, 0.09720039367675781, -0.07477569580078125, -0.5732269287109375, 0.19204139709472656, 0.2075958251953125, 0.0019426345825195312, -0.1364593505859375, 0.017963409423828125, -1.155792236328125, -0.060077667236328125, 0.20703125, 0.13562774658203125, 0.100494384765625, -0.034221649169921875, 0.1133575439453125, -0.016023635864257812, -0.37115478515625, -0.0652313232421875, 0.0682373046875, -0.34515380859375, 0.1656494140625, -0.020404815673828125, 0.16595458984375, 0.5721282958984375, -0.16854286193847656, 0.05809593200683594, -0.1215057373046875, 0.5303421020507812, -0.29187774658203125, 0.07336997985839844, -0.13607406616210938, -0.980010986328125, -0.15709686279296875, 0.0882568359375, -0.07666206359863281, 0.5845870971679688, 0.3101367950439453, 0.33318328857421875, 0.5276947021484375, -0.4683990478515625, 0.19207000732421875, 0.0311431884765625, 0.0702056884765625, 0.14919281005859375, -0.028491973876953125, -0.2422332763671875, 0.2582855224609375, 0.0035400390625, 0.1340179443359375, 0.14569091796875, 0.32718658447265625, -0.35826873779296875, 0.06653785705566406, 0.003452301025390625, -0.1361865997314453, 0.2239990234375, 0.2039794921875, -0.6860332489013672, 0.5181655883789062, 0.46659088134765625, -0.3062324523925781, 0.483673095703125, -0.471099853515625, -0.06611251831054688, 0.04802703857421875, -0.05025482177734375, 0.088470458984375, 0.01641845703125, -0.2588005065917969, 0.0771331787109375, 0.2928047180175781, -0.019866943359375, 0.4515228271484375, 0.6241798400878906, 0.042469024658203125, 0.20577239990234375, 0.08154296875, -0.16513442993164062, -0.0492706298828125, 0.3966941833496094, -0.09898567199707031, 0.03208160400390625, -0.23763465881347656, 0.07200813293457031, -0.0507354736328125, -0.10479927062988281, -0.1788330078125, -0.23743438720703125, 0.14593505859375, -0.4059867858886719, 0.07956695556640625, 0.1361236572265625, -0.12584686279296875, 0.324737548828125, -0.1896209716796875, 0.28789520263671875, -0.3584785461425781, -0.19036865234375, -0.29531097412109375, 0.38562774658203125, 0.030303955078125, -0.211151123046875, -0.3646736145019531, 0.023519515991210938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000010.npy"}
{"epoch": 0.030303030303030304, "step": 11, "batch_size": 128, "mean": 0.012516975402832031, "std": 0.38249605894088745, "min": -1.620147705078125, "p10": -0.3738910675048828, "median": 0.006474494934082031, "p90": 0.4370220184326172, "max": 1.0574493408203125, "pos_frac": 0.5078125, "sample": [-0.1426544189453125, 0.0449371337890625, 0.05515861511230469, -0.1529998779296875, 0.17642974853515625, 0.0036773681640625, -0.11755561828613281, 0.08891105651855469, -0.257110595703125, -1.2669601440429688, -0.07387542724609375, -0.05023956298828125, 0.1185760498046875, 0.03965187072753906, -0.009218215942382812, 0.325347900390625, 0.724853515625, 0.40875244140625, -0.611175537109375, -0.03603363037109375, -1.620147705078125, 0.13800811767578125, 0.2627105712890625, -0.001796722412109375, 0.3410148620605469, -0.05635833740234375, -0.15631866455078125, -0.194366455078125, -0.052387237548828125, -0.5780143737792969, -0.130401611328125, 0.024196624755859375, 0.024187088012695312, 0.2313098907470703, -0.06960296630859375, -0.3494873046875, 0.5122528076171875, 0.21321868896484375, 0.2885246276855469, 0.22280502319335938, 0.0811309814453125, -0.0012664794921875, -0.09401321411132812, -0.20589447021484375, -0.888946533203125, 0.039791107177734375, -0.11651611328125, 0.2062206268310547, -0.23928070068359375, 0.13165283203125, 0.825286865234375, -0.086395263671875, 0.02989959716796875, 0.12791061401367188, -0.25473785400390625, -0.18870162963867188, -0.4300365447998047, -0.01299285888671875, -0.5716838836669922, 0.0672454833984375, 0.31081390380859375, 0.3665008544921875, -0.3732490539550781, -0.11114501953125, 0.1530914306640625, 0.4356498718261719, -0.44910430908203125, -0.17884063720703125, -0.028406143188476562, 0.6825790405273438, 0.023296356201171875, -0.8148651123046875, -0.35791778564453125, -0.3485260009765625, 0.73614501953125, 0.156829833984375, -0.12139129638671875, 0.7485504150390625, 0.197357177734375, 0.06392097473144531, -0.14829254150390625, 0.41640472412109375, -0.11268424987792969, 0.25353240966796875, -0.3178253173828125, 1.0574493408203125, -0.2982330322265625, -0.14229583740234375, -0.629669189453125, -0.127716064453125, -0.2935523986816406, 0.6913604736328125, 0.3771400451660156, 0.259674072265625, 0.42282867431640625, -0.11069869995117188, -0.045330047607421875, -0.05764007568359375, -0.110687255859375, 0.387451171875, -0.15927886962890625, 0.16439247131347656, 0.24137496948242188, 0.24506378173828125, -0.41022682189941406, 0.2422943115234375, -0.36673927307128906, 0.4342060089111328, 0.009271621704101562, -0.12090301513671875, 0.49603271484375, 0.126068115234375, -0.6641941070556641, 0.5826263427734375, 0.44022369384765625, 0.40296173095703125, -0.37538909912109375, 0.45076751708984375, 0.3756294250488281, 0.170501708984375, -0.1934051513671875, 0.12628936767578125, 0.46303558349609375, -0.3160552978515625, 0.06887054443359375, -0.11834716796875, -0.12218666076660156, 0.11029052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000011.npy"}
{"epoch": 0.03333333333333333, "step": 12, "batch_size": 128, "mean": -0.0038421154022216797, "std": 0.31990373134613037, "min": -0.987945556640625, "p10": -0.40161247253417964, "median": 0.05275726318359375, "p90": 0.32744140624999984, "max": 0.941802978515625, "pos_frac": 0.5546875, "sample": [0.0578765869140625, 0.383148193359375, 0.03690338134765625, 0.173248291015625, 0.023538589477539062, 0.09020423889160156, -0.33184814453125, -0.4308013916015625, 0.09380340576171875, 0.0728607177734375, -0.16022872924804688, -0.6092071533203125, -0.14559173583984375, 0.13445472717285156, 0.09313583374023438, 0.08453750610351562, -0.395843505859375, -0.23453712463378906, -0.26345062255859375, -0.34246063232421875, 0.430572509765625, -0.842010498046875, 0.12633895874023438, -0.09392356872558594, -0.014129638671875, 0.064483642578125, -0.53564453125, 0.11260795593261719, 0.2109508514404297, -0.14620399475097656, 0.08222198486328125, 0.14913177490234375, 0.359100341796875, -0.0335845947265625, 0.5882720947265625, -0.4150733947753906, -0.2623481750488281, 0.1816864013671875, 0.1521282196044922, -0.987945556640625, -0.07376480102539062, -0.1305084228515625, -0.0228271484375, 0.2498493194580078, 0.115386962890625, -0.389923095703125, -0.0143280029296875, -0.108062744140625, -0.696075439453125, -0.5620193481445312, 0.13233184814453125, -0.5018997192382812, -0.09539794921875, 0.1079254150390625, 0.22446441650390625, 0.45849609375, -0.349334716796875, 0.48288726806640625, 0.5151138305664062, -0.19683074951171875, -0.709014892578125, 0.0305023193359375, 0.1956787109375, 0.6703948974609375, 0.2651863098144531, -0.0095367431640625, 0.26322174072265625, 0.08718299865722656, 0.05049896240234375, -0.793853759765625, -0.19933700561523438, -0.345428466796875, 0.18771743774414062, 0.07570075988769531, -0.23185348510742188, 0.14908981323242188, 0.14495849609375, 0.6882667541503906, -0.12969207763671875, -0.33646392822265625, 0.063720703125, -0.021251678466796875, -0.125823974609375, 0.6664581298828125, -0.18288803100585938, 0.14430618286132812, 0.13129806518554688, 0.09486770629882812, 0.20248794555664062, 0.313873291015625, 0.20902442932128906, 0.05501556396484375, -0.03937530517578125, 0.1298370361328125, -0.440704345703125, 0.13648223876953125, 0.1833038330078125, -0.1048126220703125, 0.2568359375, 0.10589408874511719, 0.03839874267578125, 0.941802978515625, -0.1847686767578125, 0.0497894287109375, -0.11071014404296875, -0.228790283203125, -0.0525665283203125, -0.159393310546875, 0.17254638671875, -0.8293609619140625, -0.13819313049316406, -0.0071887969970703125, 0.48975372314453125, 0.062198638916015625, 0.29090309143066406, 0.04802703857421875, 0.1214447021484375, -0.269989013671875, 0.12705230712890625, 0.18219757080078125, -0.06284713745117188, 0.5010986328125, 0.12691497802734375, -0.08517074584960938, -0.2894744873046875, 0.1694793701171875, -0.02831268310546875, 0.19974327087402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000012.npy"}
{"epoch": 0.03636363636363636, "step": 13, "batch_size": 128, "mean": 0.040446534752845764, "std": 0.34125030040740967, "min": -1.3609161376953125, "p10": -0.3290069580078125, "median": 0.016824722290039062, "p90": 0.4441864013671875, "max": 1.2240066528320312, "pos_frac": 0.53125, "sample": [0.16319656372070312, 0.4430694580078125, -0.049304962158203125, 0.0994720458984375, -0.1455841064453125, 0.195526123046875, 0.21753692626953125, 0.19212913513183594, 0.43177032470703125, -0.0803375244140625, 0.274993896484375, -1.3609161376953125, 0.3199310302734375, -0.0073261260986328125, -0.16970062255859375, -0.8016357421875, -0.38997650146484375, -0.16952896118164062, -0.10639190673828125, -0.12883949279785156, 0.7529144287109375, 0.2789478302001953, 0.3375110626220703, -0.2169647216796875, 0.12601470947265625, -0.45810699462890625, -0.2301788330078125, 0.09732437133789062, 1.0439834594726562, 0.025726318359375, 1.2240066528320312, -0.0731658935546875, -0.1912994384765625, -0.006862640380859375, 0.07231903076171875, -0.2511005401611328, 0.404296875, 0.3569793701171875, -0.48199462890625, -0.09412384033203125, 0.01696014404296875, 0.09996223449707031, 0.4467926025390625, -0.378662109375, 0.0820770263671875, -0.16003036499023438, -0.11485481262207031, 0.29430389404296875, 0.0530853271484375, 0.2396087646484375, -0.13617706298828125, 0.041454315185546875, 0.1447906494140625, -0.13231658935546875, -0.08972930908203125, -0.008113861083984375, 0.43666839599609375, 0.5390472412109375, -0.3469696044921875, 0.1837158203125, -0.15130233764648438, 0.5064239501953125, 0.341644287109375, 0.6979522705078125, -0.2288360595703125, 0.284332275390625, -0.7510223388671875, -0.14232635498046875, 0.5075950622558594, 0.26775169372558594, 0.43536376953125, 0.04338836669921875, 0.086334228515625, -0.2165069580078125, 0.016689300537109375, -0.3300743103027344, 0.111907958984375, 0.033267974853515625, -0.16642379760742188, -0.2208118438720703, 0.01666259765625, -0.05478668212890625, 0.065185546875, 0.5102386474609375, 0.28678131103515625, 0.03691673278808594, -0.02863311767578125, -0.342620849609375, 0.6441802978515625, 0.1582164764404297, 0.0061492919921875, 0.76971435546875, 0.041919708251953125, -0.45836639404296875, -0.11810684204101562, 0.14677047729492188, 0.16220474243164062, 0.6250267028808594, -0.22467041015625, 0.29144287109375, -0.3289794921875, -0.24301910400390625, -0.279052734375, -0.329071044921875, -0.1298675537109375, -0.15792083740234375, -0.1824951171875, -0.0025310516357421875, -0.2625579833984375, -0.488983154296875, -0.1830425262451172, 0.1804351806640625, 0.1575145721435547, 0.15276336669921875, -0.10580825805664062, -0.16160202026367188, -0.03499603271484375, 0.1642913818359375, 0.1288909912109375, 0.09249687194824219, 0.364837646484375, 0.11000251770019531, -0.0193939208984375, 0.0018520355224609375, 0.4872856140136719, -0.24118995666503906, -0.020664215087890625, -0.007534027099609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000013.npy"}
{"epoch": 0.03939393939393939, "step": 14, "batch_size": 128, "mean": -0.021586090326309204, "std": 0.3099919557571411, "min": -0.947998046875, "p10": -0.35278854370117185, "median": -0.027248382568359375, "p90": 0.3335235595703125, "max": 0.86041259765625, "pos_frac": 0.46875, "sample": [0.10248565673828125, 0.029314041137695312, -0.15576171875, -0.34595489501953125, 0.014163970947265625, 0.25989532470703125, -0.21907806396484375, -0.05706024169921875, 0.2184906005859375, 0.34239959716796875, 0.012073516845703125, 0.6014556884765625, -0.32138824462890625, 0.639739990234375, -0.947998046875, 0.30481719970703125, -0.06894874572753906, 0.2723541259765625, -0.06390380859375, -0.5578765869140625, 0.26880645751953125, 0.2549896240234375, 0.3492889404296875, -0.32248687744140625, -0.17581939697265625, 0.001110076904296875, -0.6221923828125, -0.2899589538574219, 0.10727500915527344, -0.033050537109375, -0.1334095001220703, 0.07515525817871094, -0.02935791015625, -0.0088043212890625, -0.7885360717773438, 0.86041259765625, -0.011142730712890625, -0.051998138427734375, 0.05467987060546875, 0.19728851318359375, 0.24737548828125, 0.04449462890625, -0.1006317138671875, -0.2531280517578125, -0.22095489501953125, -0.07140731811523438, -0.5719146728515625, 0.08783340454101562, -0.3761749267578125, 0.05059051513671875, 0.5705604553222656, -0.4268455505371094, 0.159820556640625, 0.12483978271484375, -0.01739501953125, -0.17316436767578125, 0.2873382568359375, 0.076995849609375, -0.2649993896484375, 0.2147960662841797, -0.2471466064453125, -0.32930755615234375, 0.0332794189453125, -0.15593719482421875, 0.1407623291015625, -0.247283935546875, -0.247528076171875, -0.16798782348632812, 0.30596923828125, -0.07049179077148438, 0.5048980712890625, 0.57049560546875, -0.25049781799316406, -0.22585487365722656, -0.35294342041015625, -0.35272216796875, 0.32971954345703125, 0.07293701171875, -0.1288318634033203, 0.19219207763671875, -0.2982826232910156, -0.438079833984375, -0.2267608642578125, -0.07932281494140625, -0.2570648193359375, -0.16720962524414062, -0.02616119384765625, -0.31512451171875, -0.3470935821533203, 0.17969894409179688, -0.5235824584960938, 0.222686767578125, -0.2740631103515625, 0.605926513671875, 0.176666259765625, 0.0330352783203125, 0.16201400756835938, 0.015960693359375, 0.4380645751953125, 0.2824249267578125, -0.58319091796875, 0.19107818603515625, -0.0283355712890625, -0.2017974853515625, 0.4549293518066406, -0.06950759887695312, -0.03532218933105469, -0.2063446044921875, 0.15526580810546875, -0.2999420166015625, -0.5598678588867188, 0.10763168334960938, 0.11106109619140625, -0.12693023681640625, 0.29473876953125, -0.24868202209472656, -0.28522491455078125, 0.21404075622558594, -0.2794017791748047, 0.04041290283203125, -0.3177204132080078, 0.211944580078125, 0.6642837524414062, 0.2698097229003906, 0.40628814697265625, -0.031986236572265625, 0.1456775665283203, -0.44287872314453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000014.npy"}
{"epoch": 0.04242424242424243, "step": 15, "batch_size": 128, "mean": 0.03531627357006073, "std": 0.35889536142349243, "min": -1.3189849853515625, "p10": -0.34925365447998047, "median": 0.049373626708984375, "p90": 0.4953926086425781, "max": 1.2348175048828125, "pos_frac": 0.546875, "sample": [-0.035247802734375, 0.5102386474609375, -0.10052108764648438, 0.166839599609375, -0.354766845703125, 0.17411231994628906, -0.323272705078125, 0.21894264221191406, -0.0137939453125, -0.020969390869140625, 0.091796875, 0.22332000732421875, 0.12860107421875, -0.524688720703125, 0.092010498046875, -0.07559776306152344, 0.6133270263671875, 0.12442970275878906, -0.19525146484375, 0.07105255126953125, -0.015026092529296875, 0.49466705322265625, -0.01706695556640625, -0.7119140625, -0.2920074462890625, 0.21207618713378906, 1.2348175048828125, 0.479278564453125, -0.034271240234375, 0.108612060546875, 0.38426971435546875, 0.093841552734375, -0.4780616760253906, -1.3189849853515625, -0.2985401153564453, 0.09178924560546875, -0.0537872314453125, 0.057239532470703125, 0.14595794677734375, 0.42426300048828125, 0.224456787109375, 0.0953826904296875, -0.268585205078125, -0.42006683349609375, -0.28485107421875, 0.08495330810546875, -0.046356201171875, -0.14569091796875, -0.2758197784423828, 0.05965423583984375, 0.1441822052001953, -0.8821678161621094, 0.22393798828125, 0.22734832763671875, 0.110107421875, -1.1028900146484375, -0.1176910400390625, -0.24192047119140625, -0.22042083740234375, -0.017181396484375, -0.144989013671875, 0.056400299072265625, -0.3448143005371094, 0.21253204345703125, 0.1645050048828125, -0.08832740783691406, 0.190582275390625, 0.21196746826171875, 0.20746994018554688, 0.7020034790039062, -0.01302337646484375, 0.477508544921875, 0.27474403381347656, 0.6275787353515625, 0.311920166015625, 0.5838623046875, 0.0050811767578125, 0.011806488037109375, -0.05601692199707031, 0.1096954345703125, 0.17182159423828125, -0.39725494384765625, 0.976959228515625, -0.352691650390625, 0.6156234741210938, -0.024145126342773438, 0.0497894287109375, -0.1895751953125, 0.04895782470703125, -0.495697021484375, -0.00159454345703125, 0.00904083251953125, 0.1660308837890625, -0.09453582763671875, -0.23107147216796875, 0.060085296630859375, -0.59564208984375, -0.1396484375, 0.13610076904296875, -0.16741943359375, -0.2132110595703125, 0.24170684814453125, 0.09383392333984375, 0.5701446533203125, -0.24185943603515625, 0.131103515625, -0.0277862548828125, -0.14934539794921875, 0.6370315551757812, 0.148162841796875, 0.31976318359375, -0.083404541015625, -0.3477802276611328, 0.00283050537109375, 0.760894775390625, 0.4970855712890625, -0.10076522827148438, 0.4019012451171875, 0.5000534057617188, 0.2901496887207031, -0.2957916259765625, 0.0443878173828125, -0.14544677734375, -0.37589454650878906, 0.387664794921875, -0.08266448974609375, 0.17059326171875, -0.08259010314941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000015.npy"}
{"epoch": 0.045454545454545456, "step": 16, "batch_size": 128, "mean": -0.0015023797750473022, "std": 0.3271251618862152, "min": -0.9761962890625, "p10": -0.4225959777832031, "median": -0.013384819030761719, "p90": 0.42778396606445296, "max": 0.843658447265625, "pos_frac": 0.46875, "sample": [-0.3378143310546875, 0.36673736572265625, -0.4261474609375, -0.047760009765625, 0.34993934631347656, 0.843658447265625, 0.20598602294921875, -0.18255233764648438, 0.30458641052246094, -0.258636474609375, 0.14125823974609375, -0.345489501953125, -0.2313995361328125, -0.006809234619140625, 0.11550712585449219, 0.2346668243408203, 0.24997711181640625, -0.24048995971679688, 0.12259101867675781, 0.057403564453125, 0.03619384765625, -0.1575164794921875, -0.08033561706542969, -0.06336593627929688, 0.15932464599609375, -0.5482864379882812, 0.1982421875, 0.25286102294921875, -0.2180938720703125, -0.03331756591796875, -0.4581298828125, 0.079925537109375, -0.42198944091796875, 0.5648880004882812, -0.013416290283203125, 0.043182373046875, 0.33983612060546875, -0.5876922607421875, 0.2274322509765625, -0.057830810546875, 0.25775146484375, -0.1883697509765625, -0.35486602783203125, -0.19378662109375, -0.4743804931640625, -0.05364990234375, 0.5157623291015625, 0.277618408203125, 0.08483505249023438, -0.0345458984375, -0.3834114074707031, -0.550872802734375, 0.69622802734375, 0.36077880859375, 0.1617412567138672, 0.2032623291015625, -0.15716934204101562, 0.2888946533203125, -0.12745094299316406, 0.0187835693359375, -0.5772857666015625, 0.26499366760253906, -0.11323165893554688, -0.23839569091796875, 0.08521270751953125, -0.1594085693359375, -0.0245208740234375, 0.0095062255859375, 0.4965667724609375, 0.522857666015625, -0.09075927734375, 0.12688446044921875, 0.090576171875, -0.2779693603515625, -0.1275787353515625, 0.6428985595703125, -0.38570594787597656, -0.26923370361328125, 0.210845947265625, -0.2930755615234375, -0.2946052551269531, 0.26016998291015625, 0.0299224853515625, 0.29885101318359375, 0.6724624633789062, 0.16411209106445312, 0.66046142578125, 0.48946380615234375, -0.2523193359375, -0.0505828857421875, -0.1302337646484375, -0.041667938232421875, -0.03353118896484375, -0.009378433227539062, 0.41065216064453125, -0.26029205322265625, -0.1520843505859375, -0.1396331787109375, 0.4677581787109375, 0.40073394775390625, 0.5812034606933594, -0.4857635498046875, 0.49139404296875, -0.9761962890625, -0.036907196044921875, -0.010242462158203125, -0.4666328430175781, 0.11425018310546875, 0.0294342041015625, 0.38819122314453125, -0.11933135986328125, 0.3977622985839844, -0.2627735137939453, -0.406707763671875, 0.006992340087890625, 0.07597732543945312, 0.004016876220703125, -0.5393257141113281, -0.013353347778320312, 0.0606842041015625, -0.42401123046875, -0.1632862091064453, -0.04035186767578125, -0.026304244995117188, -0.4158821105957031, -0.3939208984375, 0.201019287109375, -0.669952392578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000016.npy"}
{"epoch": 0.048484848484848485, "step": 17, "batch_size": 128, "mean": 0.010346956551074982, "std": 0.2969686985015869, "min": -0.949798583984375, "p10": -0.34404640197753905, "median": 0.011532783508300781, "p90": 0.3359962463378906, "max": 0.7650070190429688, "pos_frac": 0.515625, "sample": [-0.2078704833984375, 0.5915069580078125, 0.41291046142578125, 0.02056884765625, 0.08677482604980469, -0.389862060546875, 0.33386993408203125, 0.04055023193359375, -0.474609375, 0.3079872131347656, -0.11671829223632812, 0.143218994140625, -0.0711517333984375, 0.1644439697265625, -0.33306121826171875, -0.2332763671875, -0.22989654541015625, 0.4239330291748047, 0.25913238525390625, -0.029977798461914062, 0.2505149841308594, -0.45867156982421875, -0.033657073974609375, -0.06283378601074219, -0.04192543029785156, 0.19379425048828125, -0.06759071350097656, 0.176605224609375, -0.1599407196044922, 0.2945556640625, 0.20355987548828125, -0.3519477844238281, -0.133758544921875, 0.619873046875, -0.8497314453125, -0.34066009521484375, -0.08228206634521484, -0.1615581512451172, 0.19091033935546875, 0.3604164123535156, -0.1073455810546875, 0.1845245361328125, -0.211273193359375, -0.01674652099609375, -0.1700286865234375, 0.0515594482421875, -0.05707550048828125, 0.12355232238769531, 0.1241607666015625, -0.15656280517578125, 0.2458171844482422, -0.949798583984375, -0.3789520263671875, 0.043914794921875, -0.22258377075195312, 0.1659564971923828, 0.12790679931640625, -0.09281158447265625, -0.184112548828125, -0.048065185546875, 0.2733497619628906, 0.7650070190429688, 0.14052200317382812, 0.041107177734375, 0.1609954833984375, 0.1744232177734375, 0.23378753662109375, 0.08945465087890625, -0.3350334167480469, -0.5634994506835938, 0.009943008422851562, 0.7155075073242188, 0.1663360595703125, -0.38538360595703125, -0.19767379760742188, -0.20837783813476562, 0.20115280151367188, -0.20008468627929688, 0.002254486083984375, -0.32718658447265625, -0.13856124877929688, 0.19107437133789062, 0.29538726806640625, 0.01312255859375, 0.2833671569824219, 0.27301597595214844, 0.06104278564453125, -0.0452423095703125, 0.2961387634277344, -0.0758819580078125, -0.015300750732421875, -0.064483642578125, -0.23705291748046875, 0.227294921875, -0.12495803833007812, 0.45068359375, 0.041294097900390625, 0.0531158447265625, 0.39922332763671875, -0.224822998046875, 0.26818084716796875, 0.018218994140625, -0.117584228515625, 0.1024169921875, 0.30609893798828125, -0.21221160888671875, 0.09407806396484375, -0.13214874267578125, -0.6531753540039062, 0.1908588409423828, -0.1982421875, -0.17170333862304688, -0.0355987548828125, -0.4657249450683594, -0.18346595764160156, -0.3665008544921875, 0.0771484375, 0.5792236328125, 0.5271873474121094, 0.6075515747070312, -0.0847625732421875, -0.15275001525878906, -0.10291099548339844, 0.05917930603027344, -0.6931610107421875, 0.288543701171875, 0.303466796875, 0.3409576416015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000017.npy"}
{"epoch": 0.051515151515151514, "step": 18, "batch_size": 128, "mean": 0.02474963665008545, "std": 0.37329238653182983, "min": -0.967193603515625, "p10": -0.41748771667480467, "median": 0.00304412841796875, "p90": 0.500571060180664, "max": 1.378814697265625, "pos_frac": 0.5078125, "sample": [0.5066299438476562, -0.44866180419921875, 0.1271495819091797, -0.00308990478515625, 0.14669418334960938, -0.4891357421875, 0.9764404296875, 0.3820838928222656, 1.378814697265625, -0.9595718383789062, -0.1957244873046875, 0.22345352172851562, -0.11155509948730469, 0.25150299072265625, 0.2027721405029297, -0.2819061279296875, 0.36614990234375, -0.01811981201171875, -0.03662109375, 0.692352294921875, -0.08728218078613281, 1.2410888671875, -0.052486419677734375, 0.09204864501953125, 0.23957443237304688, 0.07240486145019531, -0.13609695434570312, 0.09952926635742188, 0.2131195068359375, 0.44626617431640625, 0.0035552978515625, 0.007404327392578125, -0.3456878662109375, -0.16175460815429688, -0.5867156982421875, 0.26444244384765625, -0.20847511291503906, -0.20420265197753906, -0.13213348388671875, 0.028627395629882812, 0.07579803466796875, 0.5248069763183594, 0.1775360107421875, -0.39931488037109375, 0.6729354858398438, -0.4275360107421875, -0.0587310791015625, -0.262451171875, -0.175628662109375, -0.13165855407714844, -0.09099578857421875, 0.065643310546875, -0.10633087158203125, 0.002532958984375, -0.0444793701171875, -0.25460052490234375, -0.967193603515625, 0.43761444091796875, -0.21369552612304688, -0.0528564453125, -0.13518142700195312, 0.5491104125976562, 0.08472061157226562, -0.14335250854492188, -0.285400390625, 0.15142822265625, 0.04929351806640625, 0.33579254150390625, 0.10297584533691406, 0.4310455322265625, -0.5500106811523438, -0.3011627197265625, 0.1446990966796875, -0.4131813049316406, 0.08958053588867188, 0.2796592712402344, -0.21497344970703125, 0.284393310546875, -0.8773078918457031, 0.07568740844726562, 0.02826690673828125, 0.060638427734375, -0.14807891845703125, 0.131500244140625, 0.4877166748046875, 0.277587890625, 0.3609352111816406, -0.1129150390625, 0.1154632568359375, -0.0626678466796875, 0.550811767578125, -0.12319183349609375, 0.30910682678222656, -0.0437164306640625, -0.5277175903320312, 0.5670318603515625, 0.10605239868164062, 0.73138427734375, -0.33026885986328125, -0.20083999633789062, -0.5144805908203125, 0.572052001953125, -0.009263992309570312, -0.1236572265625, 0.4979743957519531, -0.30664825439453125, 0.05941009521484375, -0.159698486328125, -0.05069732666015625, -0.562347412109375, 0.039073944091796875, -0.149078369140625, -0.12122344970703125, 0.1811065673828125, 0.110870361328125, 0.3899955749511719, 0.036159515380859375, 0.2160472869873047, -0.24108505249023438, -0.47144317626953125, 0.6211395263671875, -0.3198699951171875, 0.190185546875, 0.1504058837890625, -0.43990325927734375, -0.16009521484375, -0.3310050964355469, -0.04516410827636719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000018.npy"}
{"epoch": 0.05454545454545454, "step": 19, "batch_size": 128, "mean": 0.013132825493812561, "std": 0.36088183522224426, "min": -1.07452392578125, "p10": -0.40382614135742184, "median": 0.07112407684326172, "p90": 0.41594467163085935, "max": 0.895660400390625, "pos_frac": 0.53125, "sample": [-0.6456985473632812, 0.722076416015625, 0.12788009643554688, -0.2550048828125, -0.2605133056640625, -0.20370101928710938, 0.07269668579101562, -0.022737503051757812, -0.2600421905517578, 0.14411354064941406, -0.21643447875976562, 0.724090576171875, 0.3611106872558594, -0.017061233520507812, -0.956146240234375, -0.19942474365234375, 0.691680908203125, 0.42272186279296875, 0.4249267578125, 0.892059326171875, -0.25791168212890625, 0.3129386901855469, 0.0333709716796875, -0.148223876953125, 0.08101654052734375, -0.3735198974609375, -0.24991989135742188, -0.01485443115234375, -0.20708274841308594, 0.24269485473632812, 0.51776123046875, 0.1121673583984375, -0.211761474609375, 0.39311981201171875, -0.310302734375, -0.2968711853027344, 0.37834930419921875, 0.3946990966796875, -0.13247299194335938, 0.11566925048828125, 0.06955146789550781, 0.15230178833007812, -0.00547027587890625, -0.2757854461669922, -0.40082550048828125, -0.44738006591796875, 0.365478515625, -0.17429351806640625, -0.933013916015625, -0.12005615234375, 0.13889694213867188, 0.290802001953125, -1.07452392578125, 0.09833717346191406, -0.1392974853515625, 0.7303466796875, -0.24025726318359375, 0.07730674743652344, 0.107879638671875, 0.4693756103515625, 0.12799072265625, 0.8677978515625, -0.0800323486328125, -0.56597900390625, 0.12218856811523438, 0.3146076202392578, 0.28786468505859375, 0.10015869140625, -0.41082763671875, 0.06941986083984375, 0.08019447326660156, 0.08428192138671875, -0.3267669677734375, -0.0103302001953125, -0.546966552734375, 0.31372833251953125, -0.6392822265625, 0.0822601318359375, -0.0782928466796875, -0.00732421875, -0.3333568572998047, -0.021701812744140625, -0.07970428466796875, 0.3174095153808594, 0.43924713134765625, -0.07603263854980469, 0.3177070617675781, 0.1775054931640625, -0.15412139892578125, 0.1014404296875, 0.4130401611328125, 0.39711761474609375, 0.5009384155273438, 0.10024261474609375, -0.03771781921386719, 0.09137535095214844, -0.38043975830078125, -0.41693115234375, 0.3355579376220703, 0.10880279541015625, -0.06342697143554688, -0.2380390167236328, -0.2932281494140625, 0.1667327880859375, -0.21056365966796875, 0.15142822265625, 0.1258087158203125, 0.2876701354980469, -0.2779083251953125, -0.06270599365234375, 0.07841873168945312, 0.33174896240234375, -0.23157882690429688, -0.840301513671875, 0.3601512908935547, -0.23012161254882812, -0.24750518798828125, 0.20904541015625, -0.4932708740234375, 0.1025848388671875, 0.10010528564453125, 0.10852813720703125, 0.15830230712890625, 0.895660400390625, -0.08863067626953125, -0.526763916015625, 0.0055866241455078125, 0.20337295532226562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000019.npy"}
{"epoch": 0.05757575757575758, "step": 20, "batch_size": 128, "mean": 0.01438722014427185, "std": 0.3383356034755707, "min": -0.994384765625, "p10": -0.35055999755859374, "median": -0.032607078552246094, "p90": 0.4657737731933594, "max": 1.30511474609375, "pos_frac": 0.4609375, "sample": [0.5021934509277344, -0.19867706298828125, 0.0724334716796875, 0.38182830810546875, 0.5589599609375, 0.2604217529296875, 0.4947509765625, 0.2984733581542969, -0.1916522979736328, -0.16829681396484375, 0.05118751525878906, 0.1244964599609375, -0.040821075439453125, -0.0711212158203125, -0.0015201568603515625, -0.293792724609375, -0.7380828857421875, 0.001667022705078125, -0.08193588256835938, -0.052234649658203125, -0.19998931884765625, 0.46512603759765625, -0.20190811157226562, 0.14465713500976562, 0.3079032897949219, -0.2665424346923828, -0.6374053955078125, 0.830963134765625, 0.4058799743652344, -0.03275489807128906, -0.05680274963378906, 0.53509521484375, -0.29286956787109375, 0.3349170684814453, -0.4769287109375, -0.4094963073730469, 0.008752822875976562, -0.027797698974609375, 0.46897125244140625, -0.19397354125976562, -0.741729736328125, -0.1735076904296875, -0.1583709716796875, 0.12451553344726562, -0.11175537109375, 0.08151626586914062, -0.07695960998535156, 0.3736724853515625, -0.47386932373046875, 0.23532867431640625, -0.009950637817382812, -0.04869842529296875, -0.26702880859375, -0.5276756286621094, 0.1161956787109375, -0.355804443359375, -0.06020164489746094, -0.296295166015625, 0.5356674194335938, -0.07318496704101562, 0.016010284423828125, -0.17575645446777344, -0.032459259033203125, 0.24153900146484375, -0.7802581787109375, 0.1629180908203125, 0.46728515625, 0.30925750732421875, 0.018930435180664062, -0.17642593383789062, -0.04137420654296875, -0.10921478271484375, -0.052764892578125, -0.27276611328125, -0.15576171875, 0.18359375, -0.2708854675292969, 0.017059326171875, -0.40468597412109375, 0.1269683837890625, -0.0621337890625, 0.6946487426757812, 0.21395301818847656, 0.027360916137695312, -0.16961669921875, 0.3025970458984375, -0.32552528381347656, 0.723541259765625, -0.0553741455078125, -0.3782958984375, -0.14480209350585938, 0.3244476318359375, 0.4907684326171875, 0.16156005859375, -0.5228424072265625, -0.0433349609375, 0.46832275390625, 0.08753585815429688, -0.2962837219238281, -0.18384552001953125, -0.3483123779296875, 0.17966461181640625, -0.2505149841308594, 0.24884796142578125, 0.13050460815429688, -0.0364990234375, 0.2639427185058594, -0.13539505004882812, 0.3708648681640625, -0.2638816833496094, -0.04438591003417969, 0.2461700439453125, -0.010564804077148438, 1.30511474609375, 0.27251243591308594, 0.3287506103515625, -0.23589324951171875, 0.10474777221679688, -0.15953826904296875, 0.397186279296875, 0.353607177734375, 0.23467254638671875, -0.2477569580078125, -0.059783935546875, 0.08968925476074219, 0.07993698120117188, -0.994384765625, -0.06756591796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000020.npy"}
{"epoch": 0.06060606060606061, "step": 21, "batch_size": 128, "mean": -0.029511526226997375, "std": 0.3695879578590393, "min": -0.8862762451171875, "p10": -0.49925460815429684, "median": -0.014485359191894531, "p90": 0.39052562713623046, "max": 1.33612060546875, "pos_frac": 0.46875, "sample": [0.30722808837890625, 0.19922256469726562, -0.3719139099121094, -0.012447357177734375, -0.3773956298828125, -0.63128662109375, -0.24564170837402344, 0.21239089965820312, -0.19775390625, 0.99566650390625, 0.43605804443359375, 0.06606483459472656, -0.565826416015625, -0.7298583984375, -0.20165252685546875, 0.6269073486328125, 0.4413909912109375, -0.4212188720703125, 0.2884407043457031, -0.35108184814453125, -0.1573333740234375, -0.34110260009765625, 0.4065704345703125, -0.23533058166503906, 0.0460052490234375, -0.17211151123046875, -0.057220458984375, -0.092376708984375, -0.08251571655273438, -0.06914520263671875, -0.20700836181640625, -0.733367919921875, 0.004039764404296875, 0.27019500732421875, -0.3626708984375, -0.1750965118408203, -0.02557373046875, 0.32096099853515625, -0.49620819091796875, -0.1602630615234375, -0.2175273895263672, 0.5385284423828125, -0.0403289794921875, -0.09296035766601562, 0.10151290893554688, -0.4047393798828125, -0.06818008422851562, -0.74920654296875, -0.3197593688964844, 0.1549835205078125, -0.26720428466796875, -0.053546905517578125, -0.0103759765625, -0.09821891784667969, 0.20354843139648438, 0.16129302978515625, -0.09708786010742188, 0.5575332641601562, -0.013498306274414062, -0.749420166015625, 0.8187942504882812, 0.05397796630859375, 0.21660804748535156, 0.2043609619140625, -0.1358795166015625, 1.33612060546875, -0.4349517822265625, -0.10680770874023438, 0.20434188842773438, -0.8862762451171875, 0.29633140563964844, 0.04765796661376953, 0.3377532958984375, -0.780914306640625, 0.07741928100585938, 0.2576446533203125, 0.3304595947265625, 0.5863418579101562, -0.4329833984375, -0.6861763000488281, 0.1308460235595703, 0.0910491943359375, 0.02985382080078125, -0.26401519775390625, 0.050567626953125, 0.0168304443359375, -0.1939239501953125, -0.161285400390625, 0.078857421875, -0.598602294921875, 0.18234825134277344, -0.5992202758789062, -0.41263580322265625, -0.028926849365234375, 0.023481369018554688, -0.0045013427734375, 0.23685455322265625, 0.1650543212890625, -0.06616973876953125, 0.28874969482421875, 0.3895282745361328, 0.059787750244140625, -0.6717453002929688, 0.37622833251953125, 0.4143943786621094, 0.05938720703125, 0.23271942138671875, 0.392852783203125, 0.082489013671875, 0.010402679443359375, -0.2425079345703125, 0.13082313537597656, 0.08502960205078125, -0.4169464111328125, -0.5063629150390625, 0.08953094482421875, -0.3626556396484375, 0.7012786865234375, -0.47830963134765625, -0.015472412109375, -0.07607269287109375, -0.0637216567993164, -0.0697479248046875, -0.018798828125, 0.37485313415527344, 0.177490234375, 0.03600311279296875, -0.42005157470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000021.npy"}
{"epoch": 0.06363636363636363, "step": 22, "batch_size": 128, "mean": 0.017291978001594543, "std": 0.41309213638305664, "min": -1.56378173828125, "p10": -0.4217245101928711, "median": 0.00266265869140625, "p90": 0.47991790771484377, "max": 1.3443756103515625, "pos_frac": 0.5, "sample": [-0.023681640625, 0.8299903869628906, 0.183258056640625, 0.2612457275390625, 0.5975570678710938, 0.2933635711669922, -0.20287704467773438, -0.16566848754882812, -0.15173721313476562, 0.214630126953125, 0.013448715209960938, 0.188751220703125, 0.28566932678222656, -0.232940673828125, -0.4953460693359375, -0.0225677490234375, 0.08086395263671875, -0.0011444091796875, 0.03231048583984375, -1.275299072265625, -0.1114349365234375, -0.0919647216796875, 0.250274658203125, 0.37298583984375, -0.2723884582519531, -0.4281177520751953, -0.0434722900390625, 0.7820358276367188, -0.21303558349609375, -0.021331787109375, 0.3779449462890625, 0.3301544189453125, 0.652008056640625, -0.06565093994140625, -0.056339263916015625, -0.25484466552734375, -0.1274261474609375, -0.0195159912109375, -0.35089111328125, 0.11236572265625, -0.19118499755859375, 0.4115276336669922, -0.06473350524902344, 0.177215576171875, 0.16495513916015625, -0.0029449462890625, 0.0064697265625, 0.7501220703125, -0.6613540649414062, -0.1232452392578125, -0.4040985107421875, 0.08367156982421875, 0.2824287414550781, -0.03509521484375, -0.0718536376953125, -0.0134429931640625, 0.03057098388671875, 0.1881561279296875, 0.58160400390625, -0.280853271484375, -0.2757110595703125, 0.31170654296875, 0.2597465515136719, -0.02219390869140625, -0.10957717895507812, -0.0748748779296875, 0.02805328369140625, -0.0634307861328125, 0.14344024658203125, 1.0194091796875, 0.21578216552734375, 0.3233795166015625, -0.4201526641845703, 0.0868682861328125, 0.6220512390136719, 0.12066268920898438, 0.11285018920898438, 0.6293983459472656, 0.39225006103515625, -0.2376708984375, 0.1431121826171875, -0.8543777465820312, 0.21836471557617188, -0.4125194549560547, 0.4530487060546875, 0.11548995971679688, 1.0338821411132812, 0.10208892822265625, -0.00322723388671875, 0.027587890625, 0.2911415100097656, -0.385345458984375, -0.1309356689453125, -0.40081787109375, 0.057460784912109375, 0.1000823974609375, -0.5422439575195312, -0.3238029479980469, 0.16835403442382812, 1.3443756103515625, -0.3173065185546875, -0.019683837890625, -0.30387115478515625, 0.08270263671875, -0.05455970764160156, -0.7456512451171875, 0.089019775390625, -0.261810302734375, 0.16510009765625, -0.42539215087890625, -1.56378173828125, -0.20749664306640625, 0.3885078430175781, -0.4486083984375, -0.741973876953125, 0.4799041748046875, -0.7289962768554688, -0.07196807861328125, -0.12291717529296875, 0.0830841064453125, -0.47483062744140625, 0.8858184814453125, 0.027492523193359375, -0.22796630859375, -0.32955169677734375, 0.479949951171875, 0.4364471435546875, 0.018909454345703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000022.npy"}
{"epoch": 0.06666666666666667, "step": 23, "batch_size": 128, "mean": 0.02365492284297943, "std": 0.38563254475593567, "min": -1.0140571594238281, "p10": -0.450701904296875, "median": 0.004942893981933594, "p90": 0.4994163513183594, "max": 1.0163497924804688, "pos_frac": 0.5078125, "sample": [-0.057292938232421875, -0.13745880126953125, -0.05338287353515625, 0.2536468505859375, 0.2571430206298828, -0.3604736328125, -0.7732887268066406, 0.8228607177734375, 0.06755447387695312, -0.102203369140625, 0.0199127197265625, 0.20360946655273438, 0.270477294921875, 0.37070465087890625, -0.24680328369140625, 0.19815444946289062, -0.2320709228515625, 0.4974822998046875, -0.001861572265625, 0.908355712890625, 0.37235260009765625, -0.01141357421875, -0.25182342529296875, -0.11159133911132812, -0.86822509765625, -0.223907470703125, 0.006500244140625, -0.6836814880371094, 0.2397003173828125, -0.400726318359375, 0.2622032165527344, -0.25878143310546875, -0.6644287109375, 0.2169342041015625, 0.19804763793945312, 0.7039642333984375, 0.03589630126953125, 0.08153533935546875, -0.22568130493164062, -0.4692230224609375, 0.27289581298828125, -0.121734619140625, 0.7227783203125, 0.04873847961425781, 1.0163497924804688, 0.2975502014160156, 0.0383758544921875, -0.56121826171875, -0.026035308837890625, -0.740692138671875, -0.19528579711914062, 0.24305343627929688, 0.478729248046875, 0.0033855438232421875, 0.834747314453125, -0.1895885467529297, -0.2328643798828125, -0.28057098388671875, -0.18891525268554688, -0.12433815002441406, -0.12871551513671875, 0.130462646484375, 0.2798614501953125, -0.22351455688476562, -0.19475555419921875, -0.7104606628417969, 0.3367156982421875, 0.17559814453125, 0.5431976318359375, -0.157440185546875, -0.10872840881347656, 0.21646881103515625, 0.1024627685546875, 0.031780242919921875, -0.4579010009765625, -0.0173187255859375, 0.09845542907714844, -0.180877685546875, 0.0073299407958984375, -0.4866180419921875, -0.1628284454345703, -0.34813690185546875, 0.03372955322265625, -0.1980133056640625, -0.23933792114257812, 0.294036865234375, -0.24374008178710938, -1.0140571594238281, 0.21187973022460938, -0.003509521484375, 0.0066585540771484375, -0.1340179443359375, 0.09372138977050781, -0.2344970703125, 0.0838165283203125, 0.07144927978515625, -0.10372161865234375, -0.26873779296875, 0.3657798767089844, -0.02101898193359375, -0.04193115234375, -0.8034286499023438, -0.0619049072265625, 0.6046104431152344, 0.51422119140625, 0.45587158203125, -0.7731475830078125, 0.4983978271484375, 0.7431411743164062, 0.49166107177734375, 0.425048828125, -0.191375732421875, 0.5017929077148438, -0.4476165771484375, -0.004364013671875, -0.1486663818359375, 0.343109130859375, -0.27976226806640625, 0.03199005126953125, 0.2486572265625, 0.37906646728515625, 0.6368637084960938, 0.058429718017578125, 0.45465087890625, 0.43309783935546875, -0.13230514526367188, -0.19469070434570312, 0.6928787231445312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000023.npy"}
{"epoch": 0.0696969696969697, "step": 24, "batch_size": 128, "mean": 0.010594427585601807, "std": 0.2943873703479767, "min": -0.8493537902832031, "p10": -0.31383438110351564, "median": 0.005176544189453125, "p90": 0.3572364807128906, "max": 1.1098175048828125, "pos_frac": 0.5078125, "sample": [0.0755157470703125, 0.38436126708984375, -0.0219573974609375, -0.29868316650390625, 0.1056365966796875, -0.15178680419921875, -0.015588760375976562, -0.127899169921875, 0.2628154754638672, 0.45749664306640625, 1.1098175048828125, -0.17763519287109375, -0.0597076416015625, 0.34560394287109375, 0.08524322509765625, 0.6164321899414062, -0.1620025634765625, 0.0037994384765625, -0.03864288330078125, -0.0087738037109375, 0.28916168212890625, 0.11004638671875, 0.013368606567382812, 0.18732452392578125, 0.15278053283691406, -0.02181243896484375, -0.06528472900390625, 0.3213348388671875, -0.1833953857421875, -0.55718994140625, -0.27706336975097656, 0.19597625732421875, -0.023046493530273438, -0.05841064453125, 0.355499267578125, 0.12550926208496094, -0.1006927490234375, 0.0436553955078125, 0.26946258544921875, 0.030216217041015625, -0.1796398162841797, 0.0415802001953125, -0.09455299377441406, -0.0717620849609375, -0.1516571044921875, -0.16729736328125, -0.08559799194335938, 0.12522506713867188, -0.236907958984375, -0.22186279296875, 0.0209197998046875, -0.18054962158203125, -0.0845794677734375, 0.14534378051757812, -0.4422149658203125, -0.17864990234375, 0.20981216430664062, 0.3938026428222656, -0.35649871826171875, 0.25176239013671875, -0.31000518798828125, 0.03289794921875, 0.319549560546875, -0.212005615234375, 0.2202606201171875, -0.777008056640625, 0.025438308715820312, 0.4451904296875, -0.004192352294921875, -0.4345855712890625, -0.3227691650390625, 0.077880859375, -0.30670928955078125, -0.025747299194335938, -0.36167144775390625, 0.2480010986328125, 0.00959014892578125, 0.6010704040527344, 0.24111175537109375, 0.028308868408203125, 0.1403484344482422, 0.14288330078125, -0.5262031555175781, -0.26351165771484375, 0.4467887878417969, -0.5156898498535156, 0.36128997802734375, -0.14151954650878906, 0.2490386962890625, -0.17108917236328125, 0.19964599609375, 0.135711669921875, 0.05499839782714844, 0.00655364990234375, 0.08219337463378906, 0.03925895690917969, 0.4010295867919922, 0.21044921875, -0.03179168701171875, -0.8493537902832031, -0.1812896728515625, -0.15381622314453125, -0.6720962524414062, 0.2719879150390625, -0.13251495361328125, 0.3803558349609375, 0.3375701904296875, 0.13408279418945312, 0.039005279541015625, 0.8199615478515625, 0.03235626220703125, -0.1255950927734375, -0.0061092376708984375, -0.5196685791015625, 0.26287078857421875, 0.06582069396972656, -0.0406646728515625, -0.22564125061035156, -0.3404388427734375, 0.08896636962890625, -0.0720367431640625, -0.00038909912109375, 0.5566558837890625, -0.2985267639160156, 0.2905769348144531, -0.15853500366210938, -0.1461334228515625, -0.24446487426757812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000024.npy"}
{"epoch": 0.07272727272727272, "step": 25, "batch_size": 128, "mean": 0.020165130496025085, "std": 0.3373103141784668, "min": -1.1229324340820312, "p10": -0.33698158264160155, "median": 0.04592132568359375, "p90": 0.42595291137695307, "max": 1.2801666259765625, "pos_frac": 0.5703125, "sample": [0.440399169921875, 0.5355796813964844, 0.270599365234375, 0.32558441162109375, 0.49742889404296875, 0.00086212158203125, -0.3439960479736328, 0.18490219116210938, -0.19970321655273438, 0.1249237060546875, -0.2610282897949219, -0.336090087890625, -0.18503952026367188, -0.25789451599121094, 0.1367969512939453, -0.281463623046875, -0.2886772155761719, -0.06073760986328125, 0.08683013916015625, 0.255218505859375, 0.06276702880859375, -0.26403045654296875, 1.2801666259765625, 0.17548370361328125, 0.08074951171875, -0.0672454833984375, 0.04831695556640625, 0.13958740234375, -0.398956298828125, -0.26984405517578125, -0.003997802734375, -0.19269943237304688, 0.16575241088867188, 0.5074958801269531, 0.08098983764648438, -0.3003082275390625, 0.01297760009765625, 0.617950439453125, -0.13340187072753906, 0.10171699523925781, 0.012165069580078125, -0.0042266845703125, 0.1830291748046875, -0.1480884552001953, -0.15975189208984375, -0.1183624267578125, -0.021799087524414062, 0.012912750244140625, 0.272064208984375, 0.0677032470703125, 0.41976165771484375, 0.04541015625, -0.1135406494140625, 0.36151885986328125, -0.04010772705078125, 0.013227462768554688, -1.1229324340820312, 0.09954833984375, -0.21216583251953125, 0.06717109680175781, -0.20555877685546875, 0.19305419921875, 0.16822052001953125, 0.0464324951171875, 0.25206756591796875, 0.14762115478515625, -0.5400390625, 0.10975265502929688, -0.119659423828125, -0.18451690673828125, -0.6162986755371094, -0.01210784912109375, 0.5448989868164062, -0.39423370361328125, 0.07414054870605469, 0.44499969482421875, 0.36949920654296875, 0.06624221801757812, 0.16998672485351562, -0.081573486328125, 0.04479789733886719, 0.3656005859375, -0.654083251953125, -0.3634033203125, -0.2737579345703125, -0.217041015625, 0.32807159423828125, 0.1855621337890625, -0.8087692260742188, 0.4127349853515625, 0.15241622924804688, 0.04044342041015625, 0.20656394958496094, 0.057460784912109375, 0.12508392333984375, -0.2626991271972656, 0.1266632080078125, -0.16771316528320312, 0.108551025390625, 0.7765884399414062, -0.5260086059570312, 0.08163833618164062, -0.14182281494140625, -0.09775924682617188, 0.4146461486816406, 0.79205322265625, 0.712432861328125, 0.09092903137207031, 0.105743408203125, -0.014156341552734375, 0.057056427001953125, -0.3390617370605469, -0.19249725341796875, -0.330902099609375, 0.31081390380859375, -0.17299652099609375, 0.0416107177734375, -0.2833080291748047, 0.07552337646484375, -0.3052825927734375, 0.1610565185546875, -0.7528762817382812, -0.1906585693359375, -0.324371337890625, 0.6380290985107422, -0.49022674560546875, 0.17639541625976562, 0.5676345825195312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000025.npy"}
{"epoch": 0.07575757575757576, "step": 26, "batch_size": 128, "mean": 0.023159563541412354, "std": 0.4251406192779541, "min": -1.110595703125, "p10": -0.47366943359375, "median": -0.0055065155029296875, "p90": 0.47875976562499994, "max": 1.591156005859375, "pos_frac": 0.4921875, "sample": [0.487091064453125, -0.03410530090332031, -0.27260398864746094, -0.3844146728515625, 0.08010101318359375, 0.36373138427734375, 0.19843482971191406, -0.06822013854980469, -0.0045166015625, 0.475189208984375, 1.2459869384765625, 1.591156005859375, -0.472076416015625, 0.18413162231445312, 0.0555419921875, -0.1561737060546875, 0.20804786682128906, -0.477386474609375, -0.16715240478515625, 0.019824981689453125, 0.17167282104492188, -0.13165664672851562, -0.13401031494140625, 0.19445419311523438, 0.23858642578125, 0.23940277099609375, 0.11082077026367188, 0.8024215698242188, 0.6049919128417969, 0.32445526123046875, 0.23685455322265625, -0.24812889099121094, 0.2108020782470703, -0.26214599609375, -0.06739234924316406, 0.19252777099609375, -0.1564483642578125, -0.054229736328125, -0.5108566284179688, -0.370849609375, -0.27215576171875, -0.0767364501953125, 0.14044189453125, -0.21273040771484375, -0.10850143432617188, -0.3479881286621094, -0.365997314453125, -0.61065673828125, -0.3716888427734375, 0.30171966552734375, -0.09647750854492188, 1.1016082763671875, 0.36460113525390625, 0.28148651123046875, -0.089385986328125, -0.8124847412109375, 0.4492835998535156, 0.48943328857421875, 0.2037200927734375, -0.1312103271484375, -0.2731819152832031, 0.612152099609375, -0.13978958129882812, 0.23886489868164062, 0.33255767822265625, -0.8295440673828125, 0.525909423828125, -0.9517059326171875, 0.41812896728515625, -0.07802581787109375, -0.323272705078125, -0.30641937255859375, -0.3907318115234375, -0.17603302001953125, -0.006496429443359375, 0.2765655517578125, 0.27231597900390625, -0.1228790283203125, -0.195556640625, -0.6911430358886719, 0.036376953125, 0.156158447265625, -0.009738922119140625, -0.2947845458984375, 0.01287078857421875, 0.3446540832519531, 0.724822998046875, -0.156951904296875, -1.110595703125, -0.9065093994140625, 0.1980438232421875, -0.37677001953125, -0.06632614135742188, 0.2622489929199219, -0.01351165771484375, -0.17559051513671875, -0.5834617614746094, -0.10610198974609375, 1.0446929931640625, 0.26678466796875, -0.37357330322265625, 0.652923583984375, 0.4500732421875, -0.4980316162109375, -0.23565673828125, 0.8820343017578125, 0.2889251708984375, 0.0789642333984375, -0.2293701171875, 0.2562599182128906, 0.018522262573242188, -0.8570404052734375, 0.199951171875, -0.4932975769042969, 0.23809242248535156, -0.01665496826171875, 0.1457347869873047, 0.24445343017578125, 0.025781631469726562, 0.01885986328125, 0.37650299072265625, 0.3178596496582031, -0.248199462890625, -0.0312957763671875, -0.0605926513671875, 0.09553718566894531, -0.09675788879394531, 0.27625274658203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000026.npy"}
{"epoch": 0.07878787878787878, "step": 27, "batch_size": 128, "mean": 0.06259748339653015, "std": 0.3224225342273712, "min": -0.6753768920898438, "p10": -0.35996208190917967, "median": 0.07408618927001953, "p90": 0.4926414489746093, "max": 0.902496337890625, "pos_frac": 0.5703125, "sample": [-0.01468658447265625, 0.3122100830078125, 0.6022911071777344, 0.48917388916015625, -1.52587890625e-05, -0.18732833862304688, -0.3429679870605469, 0.22165679931640625, 0.027736663818359375, 0.5974273681640625, -0.06264495849609375, -0.1570892333984375, 0.057861328125, -0.21292877197265625, 0.23487091064453125, 0.35309600830078125, 0.12187957763671875, 0.745819091796875, 0.4345893859863281, -0.16485595703125, 0.2556285858154297, 0.5976524353027344, 0.2233104705810547, 0.5613536834716797, 0.6842422485351562, 0.167327880859375, 0.5350723266601562, -0.06597518920898438, 0.26735496520996094, -0.10004043579101562, 0.5120620727539062, 0.0925750732421875, -0.58544921875, -0.4562263488769531, -0.22016143798828125, 0.4792327880859375, 0.076446533203125, 0.31258392333984375, -0.07631683349609375, 0.3399200439453125, 0.03955841064453125, -0.45068359375, 0.038486480712890625, -0.347564697265625, -0.15217018127441406, 0.12953948974609375, 0.47765159606933594, -0.4606170654296875, -0.2911052703857422, -0.4702606201171875, 0.26688575744628906, -0.1392974853515625, 0.28284454345703125, 0.500732421875, 0.16382598876953125, 0.42841339111328125, 0.60589599609375, -0.45410919189453125, 0.902496337890625, 0.07128143310546875, -0.0745086669921875, 0.6605072021484375, -0.1174163818359375, -0.17122268676757812, -0.3057365417480469, -0.01403045654296875, -0.30403900146484375, 0.12163925170898438, -0.4632110595703125, -0.22766876220703125, 0.22728347778320312, -0.33928680419921875, 0.1850128173828125, 0.12705039978027344, -0.30889892578125, 0.3341064453125, 0.09186553955078125, -0.0771942138671875, 0.1404895782470703, -0.02532958984375, -0.49021148681640625, 0.03352546691894531, 0.11883163452148438, 0.44017791748046875, -0.5653839111328125, -0.2759857177734375, -0.2397613525390625, 0.23935317993164062, -0.23860740661621094, -0.1241912841796875, 0.34438323974609375, -0.11031723022460938, 0.23217391967773438, 0.030185699462890625, 0.11971855163574219, 0.07735443115234375, -0.02861785888671875, -0.02941131591796875, -0.2843208312988281, -0.1531505584716797, -0.6753768920898438, 0.12358856201171875, -0.01819610595703125, -0.06992530822753906, 0.3626556396484375, 0.01554107666015625, -0.11155509948730469, 0.10215377807617188, 0.2536773681640625, 0.29763031005859375, 0.16338348388671875, -0.3888893127441406, 0.23331069946289062, -0.29796600341796875, 0.46816253662109375, 0.2594757080078125, -0.5170822143554688, 0.16991424560546875, 0.4691276550292969, 0.2629680633544922, -0.03884315490722656, 0.07172584533691406, 0.12775039672851562, -0.401031494140625, 0.6331558227539062, -0.1334228515625, 0.20694732666015625, 0.08994865417480469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000027.npy"}
{"epoch": 0.08181818181818182, "step": 28, "batch_size": 128, "mean": 0.07496914267539978, "std": 0.40043699741363525, "min": -0.9407806396484375, "p10": -0.46212158203125, "median": 0.06155109405517578, "p90": 0.5538673400878906, "max": 1.26611328125, "pos_frac": 0.5859375, "sample": [0.049072265625, 0.106597900390625, 0.33819580078125, -0.00536346435546875, -0.32517051696777344, -0.51068115234375, -0.20269775390625, -0.18939208984375, 0.036716461181640625, -0.819610595703125, 0.553375244140625, 0.9818840026855469, 0.22589874267578125, -0.274383544921875, -0.042186737060546875, 0.034343719482421875, 0.42192840576171875, 0.40407752990722656, -0.4126434326171875, -0.22601318359375, -0.13552474975585938, -0.2235565185546875, 0.3280220031738281, 0.5038070678710938, 1.0432281494140625, -0.5634078979492188, 0.07355117797851562, 0.18406105041503906, -0.23738861083984375, 0.3075752258300781, -0.7829818725585938, 0.000507354736328125, -0.1935749053955078, 0.4337005615234375, -0.04447174072265625, 0.49298095703125, 0.08892822265625, 0.2849578857421875, -0.44217491149902344, 0.0018463134765625, 1.0194091796875, 0.26953125, -0.2580299377441406, 0.449859619140625, -0.17254257202148438, 0.6500244140625, -0.218017578125, -0.7492523193359375, 0.3789520263671875, 0.28058624267578125, 0.43475341796875, 0.723541259765625, -0.12932586669921875, 0.2011432647705078, 0.5550155639648438, -0.5253753662109375, 0.24913787841796875, 0.14972686767578125, 0.24377822875976562, 0.246002197265625, 0.0558319091796875, 0.8129043579101562, -0.01656341552734375, -0.16487884521484375, -0.49542999267578125, 0.0260467529296875, 0.024982452392578125, -0.11545181274414062, 0.5368423461914062, -0.25847625732421875, 0.2668609619140625, -0.9407806396484375, 0.7005615234375, 0.39048004150390625, 0.2206268310546875, 0.15665054321289062, -0.2052021026611328, 1.26611328125, -0.00617218017578125, 0.17146682739257812, 0.803924560546875, 0.31619834899902344, 0.12195587158203125, 0.26165771484375, -0.4837188720703125, 0.33792877197265625, -0.21556663513183594, -0.6051254272460938, -0.15134620666503906, 0.19573593139648438, -0.138397216796875, -0.1398773193359375, 0.3467216491699219, 0.2556953430175781, 0.184906005859375, -0.5494709014892578, -0.045841217041015625, 0.1914043426513672, 0.1160430908203125, 0.6735382080078125, -0.13714599609375, 0.0796966552734375, 0.11940765380859375, 0.02846527099609375, -0.15529823303222656, 0.2545661926269531, 0.19950485229492188, -0.02675628662109375, -0.15139007568359375, 0.06727027893066406, -0.5523033142089844, -0.29656982421875, 0.5426464080810547, 0.048858642578125, -0.0854034423828125, -0.4528656005859375, 0.033977508544921875, -0.17993545532226562, 0.09154510498046875, 0.956512451171875, -0.4984283447265625, 0.220306396484375, 0.09629058837890625, -0.119049072265625, -0.33946990966796875, 0.6955490112304688, -0.07079887390136719, 0.261138916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000028.npy"}
{"epoch": 0.08484848484848485, "step": 29, "batch_size": 128, "mean": 0.06998194754123688, "std": 0.46646609902381897, "min": -1.3257293701171875, "p10": -0.49623947143554686, "median": 0.05862617492675781, "p90": 0.6727142333984375, "max": 1.82086181640625, "pos_frac": 0.578125, "sample": [-0.38250732421875, -0.027313232421875, -0.08446693420410156, -0.6396026611328125, 0.18062973022460938, 0.641754150390625, 0.0226593017578125, 0.03025054931640625, 0.18677520751953125, 0.08175277709960938, 0.803619384765625, -0.07821846008300781, -0.39101409912109375, 0.1657867431640625, 0.03479957580566406, 0.587615966796875, 0.940185546875, 1.046875, -0.3598747253417969, -0.5239028930664062, -0.547210693359375, -0.03408050537109375, -0.665435791015625, 0.796905517578125, -0.01117706298828125, -0.14752197265625, -0.43328094482421875, 0.636322021484375, -0.7557296752929688, 1.370147705078125, 0.13675689697265625, 0.32080078125, 0.06707763671875, 0.29505157470703125, 0.236480712890625, 0.2641487121582031, 0.08372116088867188, -0.01277923583984375, 0.6370010375976562, -0.07071685791015625, 0.043537139892578125, -0.2142353057861328, 0.3453826904296875, -0.5489959716796875, -0.03363800048828125, -1.3257293701171875, 0.6837158203125, -0.43259429931640625, -0.15087509155273438, -0.1876068115234375, -0.9615554809570312, 0.2012195587158203, 0.5296401977539062, 0.8291015625, -0.49530792236328125, 0.4039497375488281, 0.727386474609375, 0.022869110107421875, 0.1383686065673828, 0.1051177978515625, -0.18495559692382812, 0.24964332580566406, 0.6964111328125, -0.33515167236328125, -0.4496307373046875, 0.2798423767089844, 0.050174713134765625, 0.2525291442871094, 0.00679779052734375, 0.306396484375, 0.27101898193359375, 0.35169219970703125, 0.4411582946777344, -0.4527740478515625, 0.0977020263671875, -0.3076171875, 0.09662628173828125, 0.12896347045898438, 0.005321502685546875, 0.0442352294921875, 0.180267333984375, -0.2410736083984375, 1.82086181640625, -0.3545494079589844, -0.240234375, 0.667999267578125, 0.4570465087890625, -0.0172119140625, -0.28002166748046875, 0.15201187133789062, -0.34694671630859375, -0.059185028076171875, 0.22695159912109375, -0.19388198852539062, -0.3510589599609375, 0.5392303466796875, 0.45245361328125, 0.5753555297851562, 0.20290374755859375, 0.07004928588867188, -0.4984130859375, -0.5205917358398438, 0.022588729858398438, 0.2324676513671875, -0.5121040344238281, -0.4650001525878906, 0.4234123229980469, -0.8253936767578125, -0.012477874755859375, -0.272613525390625, -0.2614784240722656, -0.07767868041992188, 1.0481109619140625, -0.21068572998046875, 0.09619903564453125, 0.233428955078125, 0.20594024658203125, 0.7325286865234375, 0.31601715087890625, -0.38794708251953125, 0.2612628936767578, -0.5191555023193359, -0.0611724853515625, 0.7241973876953125, 0.23870086669921875, -0.44481658935546875, 0.23859596252441406, 0.3623809814453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000029.npy"}
{"epoch": 0.08787878787878788, "step": 30, "batch_size": 128, "mean": 0.04887509346008301, "std": 0.4386157691478729, "min": -1.5555877685546875, "p10": -0.3929893493652344, "median": 0.07504940032958984, "p90": 0.4986814498901365, "max": 1.2475433349609375, "pos_frac": 0.5859375, "sample": [-0.06264114379882812, -0.2910919189453125, 0.13404083251953125, -0.6143798828125, 0.257476806640625, -0.024026870727539062, -0.10645294189453125, -0.017353057861328125, -0.2622833251953125, -0.3843269348144531, 0.1638031005859375, -0.2730560302734375, 0.10358810424804688, -0.00321197509765625, 0.020244598388671875, -0.38867950439453125, 0.10951614379882812, -0.2315826416015625, -0.072296142578125, 0.20486831665039062, 0.0333709716796875, 0.4799365997314453, 0.4763336181640625, -0.6823348999023438, 0.08088874816894531, -0.00439453125, -0.02426910400390625, -0.0177154541015625, -1.3499679565429688, -0.158203125, 0.0032958984375, -0.0182037353515625, 0.247161865234375, 0.36932373046875, 0.54241943359375, 0.1077728271484375, 0.010101318359375, 0.2906341552734375, 0.7700004577636719, -0.2430572509765625, -0.6148529052734375, 0.11985015869140625, 0.038967132568359375, 0.8499069213867188, 0.823974609375, 0.2257080078125, 0.16332626342773438, 0.17293930053710938, 1.2475433349609375, -0.8836212158203125, -0.6657028198242188, -0.26000213623046875, 0.00315093994140625, 0.03779411315917969, 0.11717987060546875, 0.2706928253173828, -1.3355026245117188, 0.23919677734375, 0.05130767822265625, -0.38950347900390625, 0.47362518310546875, 0.16058349609375, 0.830841064453125, -0.401123046875, -0.06629180908203125, 0.1808624267578125, 0.3112525939941406, -0.281280517578125, -0.040069580078125, -0.28936004638671875, 0.1716156005859375, 0.17908859252929688, 0.21419715881347656, 0.2803516387939453, 0.0381622314453125, -0.22280120849609375, 0.6989288330078125, -0.07915306091308594, 0.04547882080078125, 0.46596527099609375, 0.31380462646484375, -0.053768157958984375, -0.0763702392578125, -0.49993896484375, -0.1030426025390625, 0.6270751953125, -0.49956512451171875, -0.07627105712890625, 0.37722015380859375, 0.31292152404785156, 0.37360382080078125, 0.2957611083984375, 0.08652114868164062, 0.29688262939453125, 0.43255615234375, -0.06694793701171875, 0.25217437744140625, -1.187255859375, 0.2822914123535156, 0.34073638916015625, -0.2459239959716797, -0.17003631591796875, 0.2014007568359375, -0.285186767578125, 0.6073341369628906, 0.282989501953125, 0.16196823120117188, 0.11419105529785156, 0.8789596557617188, 0.4439277648925781, 0.08492279052734375, 0.4601631164550781, -0.7552108764648438, -0.09114265441894531, 0.1763763427734375, -0.10865020751953125, -0.2392406463623047, 0.9412689208984375, 0.5971946716308594, 0.7184906005859375, -0.06818008422851562, -0.2316741943359375, 0.12798309326171875, 0.47966766357421875, -1.5555877685546875, -0.08794975280761719, 0.2618827819824219, 0.06921005249023438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000030.npy"}
{"epoch": 0.09090909090909091, "step": 31, "batch_size": 128, "mean": 0.09291373193264008, "std": 0.48592203855514526, "min": -1.2648239135742188, "p10": -0.5058197021484375, "median": 0.048534393310546875, "p90": 0.6954364776611328, "max": 1.458221435546875, "pos_frac": 0.5703125, "sample": [0.4162139892578125, 0.9875335693359375, 0.49074554443359375, 1.3168182373046875, 0.9044227600097656, -0.3107490539550781, 0.5633468627929688, 0.46337127685546875, 0.35880279541015625, 0.2127094268798828, -0.001163482666015625, -0.003864288330078125, 0.12871360778808594, 0.36639404296875, 0.04645538330078125, -0.18342208862304688, -0.358978271484375, -0.632904052734375, -0.8912200927734375, -0.3063011169433594, -0.06970596313476562, 0.24460220336914062, 0.3361663818359375, 0.153167724609375, -0.3508758544921875, 0.030275344848632812, 0.43970489501953125, -1.2648239135742188, -0.20021820068359375, -0.30365753173828125, 0.290679931640625, -0.5816497802734375, 0.14040374755859375, 0.11790847778320312, 0.14885711669921875, -0.2340221405029297, 0.2996387481689453, 0.227020263671875, -0.026103973388671875, -0.103546142578125, 0.2294139862060547, 0.3113861083984375, -0.2804718017578125, -0.036163330078125, 0.33374786376953125, 0.04399871826171875, 0.17933082580566406, -0.205078125, 1.45782470703125, -0.5284194946289062, -0.172271728515625, -0.50555419921875, -0.4003410339355469, 0.7203292846679688, 0.13497352600097656, 0.24538803100585938, 0.0552978515625, -0.1833343505859375, -0.189208984375, -0.45986175537109375, 0.0506134033203125, -0.22894287109375, -0.7524032592773438, -0.8198394775390625, -0.318939208984375, -0.4549121856689453, -0.02622222900390625, 0.09080314636230469, -0.1091766357421875, -0.0022430419921875, 0.7548141479492188, 0.33165550231933594, 0.45458984375, 0.5442657470703125, 0.2686023712158203, 0.4005393981933594, -0.5324325561523438, -0.40375518798828125, 0.037506103515625, -0.03680610656738281, 0.3018207550048828, -0.2212371826171875, 0.5535202026367188, -1.0229034423828125, 0.9764938354492188, 1.1201057434082031, -0.2421741485595703, 0.1583099365234375, 0.20464134216308594, -0.29665374755859375, 0.07033538818359375, 0.7054634094238281, 0.4951801300048828, 0.10011100769042969, 0.41570281982421875, 0.37821197509765625, -0.24500656127929688, 0.3487091064453125, 1.2729072570800781, -0.034397125244140625, 0.04518890380859375, 0.2872467041015625, 0.8751983642578125, 0.0041351318359375, -0.011957168579101562, 1.458221435546875, 0.207183837890625, 0.36267852783203125, -0.02208709716796875, 0.6911392211914062, -0.5228195190429688, -0.3228321075439453, 0.1661834716796875, 0.0347442626953125, -0.20211029052734375, 0.6562423706054688, 0.26398658752441406, 0.582550048828125, -0.5216217041015625, 0.01505279541015625, 0.609527587890625, 1.0828857421875, -0.08619499206542969, -0.295501708984375, -0.7126922607421875, -0.15590286254882812, -0.506439208984375, 0.012361526489257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000031.npy"}
{"epoch": 0.09393939393939393, "step": 32, "batch_size": 128, "mean": 0.07559210807085037, "std": 0.46791157126426697, "min": -1.56982421875, "p10": -0.4394527435302734, "median": 0.07349109649658203, "p90": 0.6755987167358398, "max": 1.46661376953125, "pos_frac": 0.578125, "sample": [0.00543975830078125, -0.17755126953125, 0.45700836181640625, -0.1328125, 0.927490234375, -0.10747528076171875, -0.25380897521972656, -0.3809814453125, 1.0858612060546875, -0.5721282958984375, 0.0027408599853515625, 0.29043006896972656, -0.0535888671875, 0.3376750946044922, 0.36109161376953125, 0.7337493896484375, 0.3825645446777344, -0.303802490234375, 0.3036956787109375, 0.1091461181640625, 0.368255615234375, -0.4031410217285156, 0.32387542724609375, 0.1504802703857422, -0.2559967041015625, -0.39597320556640625, -0.3019428253173828, 0.38716697692871094, 0.2596435546875, -0.0550689697265625, -0.16245269775390625, -0.2025909423828125, 0.5615386962890625, 0.39688873291015625, -0.885162353515625, -0.22301101684570312, -0.09614944458007812, -0.7240524291992188, 0.3117179870605469, 0.8683319091796875, 0.7083663940429688, 0.6882839202880859, 0.930450439453125, -0.0592193603515625, 0.11288642883300781, 0.029052734375, 0.24874114990234375, 0.3028411865234375, 0.2117786407470703, 0.5303955078125, 0.06751537322998047, 0.5588207244873047, 0.07576942443847656, 0.5313835144042969, -0.042633056640625, 0.6701622009277344, 0.20678329467773438, 0.1317596435546875, 0.26879119873046875, 0.08474159240722656, 0.5650100708007812, 1.1927490234375, 0.8679656982421875, -0.42797088623046875, 0.8580703735351562, -0.5108642578125, -0.13689613342285156, 0.12550735473632812, 0.08724594116210938, 0.548614501953125, 0.16454505920410156, 0.038665771484375, -0.253814697265625, 0.24610137939453125, 0.314727783203125, 0.13897132873535156, 1.46661376953125, -1.0700149536132812, -0.3612823486328125, -0.030426025390625, 0.3307647705078125, -0.881103515625, 0.07669830322265625, -0.772613525390625, 0.08199882507324219, -0.244232177734375, -0.1427326202392578, 0.431121826171875, -0.22971343994140625, -0.2232666015625, -0.10121917724609375, -0.31793212890625, 0.31847381591796875, 0.08294677734375, 0.4240550994873047, -0.13170623779296875, 0.23508262634277344, 0.4700775146484375, 0.19370079040527344, 0.25611114501953125, -0.09064483642578125, 0.8536300659179688, 0.3138084411621094, -0.2757091522216797, -0.28501129150390625, -0.5433235168457031, -0.26706695556640625, -1.56982421875, -0.4662437438964844, 0.019176483154296875, -0.10132026672363281, 0.6057052612304688, 0.0296173095703125, -0.2561187744140625, 0.0710601806640625, -0.3173484802246094, 0.6028366088867188, -0.0311126708984375, -0.3883209228515625, -0.078094482421875, 0.190948486328125, 0.723724365234375, -0.6866302490234375, -0.5074348449707031, -0.81353759765625, -0.004425048828125, 0.00243377685546875, 0.0712127685546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000032.npy"}
{"epoch": 0.09696969696969697, "step": 33, "batch_size": 128, "mean": 0.10725614428520203, "std": 0.5393900275230408, "min": -1.24200439453125, "p10": -0.4589399337768554, "median": 0.04214763641357422, "p90": 0.7397121429443357, "max": 2.38494873046875, "pos_frac": 0.5546875, "sample": [-0.22332286834716797, 0.6231651306152344, 0.3597869873046875, 0.26799774169921875, -0.24566650390625, -0.12942123413085938, 0.0465240478515625, -1.24200439453125, 0.12008857727050781, 0.13267135620117188, -0.250213623046875, 0.2493896484375, -0.170867919921875, -0.34603118896484375, 0.3201141357421875, 1.118865966796875, -0.10153961181640625, -0.704071044921875, 0.26318359375, 0.43460845947265625, -0.22065353393554688, 0.98175048828125, 0.8620338439941406, 0.14320755004882812, -0.5161361694335938, 0.3119354248046875, 1.1692352294921875, -0.11673164367675781, -0.8668441772460938, 0.4756927490234375, 0.1011810302734375, 0.366607666015625, -0.2390003204345703, 0.53460693359375, -0.0724029541015625, 0.15105056762695312, 0.6944046020507812, 0.0045013427734375, 0.4494132995605469, -0.020921707153320312, 0.6284332275390625, 0.079681396484375, -1.1655693054199219, -0.0659942626953125, -0.060398101806640625, -0.0550689697265625, 0.02748870849609375, -0.3820343017578125, 0.6896457672119141, 0.283843994140625, -0.00263214111328125, 0.3156585693359375, -0.22495651245117188, 0.27906036376953125, -0.4109535217285156, -0.654388427734375, 0.573272705078125, -0.07040023803710938, 0.6811065673828125, 0.91802978515625, 1.3582000732421875, -1.226593017578125, 1.6478271484375, -0.10623645782470703, 0.06947517395019531, 2.38494873046875, 0.03809547424316406, -0.1822376251220703, 0.026254653930664062, -0.5685806274414062, -0.15523910522460938, -0.039562225341796875, -0.03714752197265625, 0.00678253173828125, 0.800506591796875, -0.01511383056640625, -0.07711029052734375, 0.08351516723632812, -0.16452789306640625, 0.13520431518554688, -0.19426345825195312, 0.48531532287597656, 0.3604774475097656, 0.046199798583984375, -0.5671672821044922, 0.7136573791503906, -0.1544342041015625, 0.16965103149414062, 0.31219482421875, 0.23271751403808594, -0.42162322998046875, -0.31352996826171875, -0.055782318115234375, 0.903106689453125, -0.124755859375, -0.00542449951171875, -0.43442726135253906, 0.5964164733886719, -0.399017333984375, 0.0167083740234375, 0.29248046875, -0.2525520324707031, -0.2640228271484375, -0.6751022338867188, -0.1680755615234375, 0.4589805603027344, -0.3274879455566406, 0.9261817932128906, -0.01651763916015625, 0.18062591552734375, 0.1537628173828125, 0.3699760437011719, -0.612884521484375, -1.0701408386230469, 0.107696533203125, -0.0469207763671875, 0.37171173095703125, -0.39762115478515625, 0.5537185668945312, 0.5776557922363281, 0.0153350830078125, 0.2020721435546875, -0.7012481689453125, 0.8635711669921875, 0.06021881103515625, 0.3352184295654297, 0.5041427612304688, 1.04351806640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000033.npy"}
{"epoch": 0.1, "step": 34, "batch_size": 128, "mean": 0.03575011342763901, "std": 0.49251511693000793, "min": -1.913177490234375, "p10": -0.5878757476806641, "median": 0.10413837432861328, "p90": 0.5704925537109375, "max": 1.28533935546875, "pos_frac": 0.59375, "sample": [0.5393295288085938, -0.49609375, 0.19096755981445312, -0.19367599487304688, -0.584442138671875, 0.1120147705078125, 0.32825660705566406, 0.6195716857910156, 0.2155170440673828, 0.12454605102539062, 0.23215866088867188, 0.6113510131835938, -1.0370330810546875, 0.31734466552734375, -0.9288787841796875, 0.914031982421875, 0.5580825805664062, 0.4239845275878906, 0.23084259033203125, -0.006191253662109375, -0.038562774658203125, -0.04120063781738281, -0.4266242980957031, 0.20135116577148438, 0.1746978759765625, 0.22105026245117188, 0.37117767333984375, -0.11749267578125, 0.10515022277832031, -0.1812744140625, 0.22673797607421875, -0.24558258056640625, 0.202056884765625, -0.17177581787109375, 0.4384613037109375, 0.3422565460205078, -0.22348785400390625, -0.7229995727539062, 0.3171844482421875, 0.03424263000488281, 0.1222686767578125, 0.1858978271484375, 0.36635589599609375, -0.1919403076171875, 0.52166748046875, 0.1950244903564453, -0.08592605590820312, 0.18975067138671875, -0.0060882568359375, 0.10312652587890625, -0.19365692138671875, 0.12430572509765625, -0.07039260864257812, 0.47681427001953125, -0.0131683349609375, -0.016902923583984375, 0.03597259521484375, -0.25762176513671875, -0.429290771484375, -0.7972183227539062, 0.078033447265625, -0.229827880859375, 0.50994873046875, 0.24268722534179688, 0.6947021484375, 0.414886474609375, 0.012014389038085938, 0.5034332275390625, -0.5853729248046875, 0.5706100463867188, 0.02437591552734375, -0.5715217590332031, -0.6008758544921875, -0.5254173278808594, -0.6171722412109375, -0.7505569458007812, -0.241302490234375, -0.07217597961425781, -1.913177490234375, -0.10570526123046875, 0.036731719970703125, 0.14800643920898438, -0.16457366943359375, 0.00870513916015625, 0.1387176513671875, 0.03856658935546875, 0.3729743957519531, 0.28011131286621094, -0.21259307861328125, 0.407958984375, 1.2818450927734375, 0.21067047119140625, 0.1529693603515625, 0.36197662353515625, 0.7437515258789062, -0.8700408935546875, 0.7756977081298828, -0.14495849609375, 0.96234130859375, -1.326263427734375, 0.18711090087890625, 0.3993988037109375, -0.38074493408203125, -0.02536773681640625, 0.5704421997070312, 0.20711326599121094, 0.270843505859375, -0.5937156677246094, 0.7736053466796875, -0.1860504150390625, -0.361663818359375, 0.08254528045654297, -0.2840576171875, -0.11937332153320312, 1.28533935546875, 0.10555267333984375, -0.931182861328125, 0.0811614990234375, 0.10918426513671875, 0.057407379150390625, 0.15402984619140625, 0.13714599609375, 0.674896240234375, -0.17238998413085938, 0.3782997131347656, -0.1332855224609375, 0.7756805419921875, -1.422119140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000034.npy"}
{"epoch": 0.10303030303030303, "step": 35, "batch_size": 128, "mean": 0.12881217896938324, "std": 0.541138768196106, "min": -1.4449615478515625, "p10": -0.4453521728515625, "median": 0.15484333038330078, "p90": 0.7797294616699219, "max": 1.957611083984375, "pos_frac": 0.6015625, "sample": [-0.05863189697265625, -0.2952308654785156, 1.0040283203125, 0.6537017822265625, 0.9352264404296875, 0.8033256530761719, -0.2956275939941406, 0.1383953094482422, 0.31194496154785156, 0.5361480712890625, 0.443878173828125, -0.1316070556640625, 0.284576416015625, -0.31284332275390625, -0.095184326171875, 0.24495315551757812, 0.159088134765625, 0.178375244140625, 0.25150299072265625, -1.168121337890625, 0.3547096252441406, 0.3419971466064453, -0.5775146484375, -0.28227996826171875, -0.02086639404296875, -0.7199020385742188, -0.14449310302734375, 0.434234619140625, -1.4449615478515625, 0.045246124267578125, 0.37084197998046875, -0.0208587646484375, 0.36710357666015625, -0.25054168701171875, -0.050079345703125, 0.7765960693359375, 0.3489265441894531, 0.7870407104492188, -0.12071609497070312, 0.362213134765625, 0.209991455078125, 0.2778511047363281, -0.1374359130859375, 0.3042449951171875, 0.0906524658203125, -0.4484710693359375, 0.4185829162597656, -0.60125732421875, 0.1763153076171875, 0.17116165161132812, 0.1471099853515625, 0.42534637451171875, 0.6425399780273438, 0.3277435302734375, 0.4953956604003906, -0.10804176330566406, 0.1704559326171875, 0.6633071899414062, 0.7427597045898438, -0.4180316925048828, 1.957611083984375, -0.5591888427734375, -0.17437744140625, 0.13491249084472656, 0.43829345703125, -0.4178047180175781, 0.9550247192382812, 0.164306640625, 0.15059852600097656, -0.8298892974853516, -0.014759063720703125, 0.6014785766601562, 0.9655838012695312, -0.74749755859375, -0.07712554931640625, -0.06932449340820312, 1.75860595703125, 0.50775146484375, 0.26117515563964844, 0.21936607360839844, 0.34635162353515625, -0.14384078979492188, -0.3091888427734375, 0.707489013671875, 0.3530731201171875, -0.3111572265625, 0.3725414276123047, 0.5037193298339844, 0.017663955688476562, 0.16507720947265625, -0.23130035400390625, 1.0290451049804688, -0.06322097778320312, -1.39739990234375, -0.14873123168945312, -0.03548622131347656, 0.00817108154296875, -0.055751800537109375, 0.393218994140625, 0.34450531005859375, 0.071197509765625, 0.34101104736328125, 0.14508628845214844, -0.36421966552734375, -0.40995025634765625, -0.17510223388671875, 0.08819580078125, 0.837005615234375, 0.3342094421386719, 1.450653076171875, -0.2565498352050781, 0.486297607421875, 0.45395851135253906, -0.41596221923828125, -0.30474281311035156, 0.8889694213867188, 0.48557281494140625, -0.37097930908203125, 0.434722900390625, 1.048095703125, -0.494110107421875, -0.4440155029296875, 0.24504852294921875, 0.020351409912109375, -1.3326873779296875, -0.10370635986328125, 0.0009555816650390625, -0.63568115234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000035.npy"}
{"epoch": 0.10606060606060606, "step": 36, "batch_size": 128, "mean": 0.16065537929534912, "std": 0.5889185667037964, "min": -2.345550537109375, "p10": -0.47116622924804685, "median": 0.14964675903320312, "p90": 0.8909980773925781, "max": 1.5071792602539062, "pos_frac": 0.65625, "sample": [0.08065032958984375, 0.11283493041992188, 0.4572277069091797, -0.039707183837890625, 0.04376220703125, -0.7815017700195312, 0.4066886901855469, -0.6306610107421875, 0.7969894409179688, 1.1170692443847656, 0.5941410064697266, 0.8121795654296875, -0.596954345703125, 0.32598876953125, -0.2928962707519531, 0.3708648681640625, -0.38120269775390625, -0.47997283935546875, -0.2101573944091797, 0.1190643310546875, 0.3237190246582031, 0.9900131225585938, 1.0681934356689453, -0.5155563354492188, 0.1428375244140625, 0.025964736938476562, 0.9238872528076172, 1.0852241516113281, -0.7237701416015625, 0.1687469482421875, 0.6604232788085938, 1.0782814025878906, -0.277557373046875, 0.82952880859375, 0.047061920166015625, -1.2787628173828125, 0.00778961181640625, 0.5678329467773438, -1.166534423828125, 0.10964393615722656, 0.5070934295654297, 0.6930694580078125, 0.8132476806640625, -0.09694480895996094, 0.21495819091796875, -0.07474517822265625, -2.345550537109375, -0.1632843017578125, -0.4673919677734375, -0.79302978515625, 0.38067626953125, 0.37384033203125, -0.03993988037109375, -0.4332418441772461, -0.8355712890625, 0.1707305908203125, 0.055789947509765625, 0.14971160888671875, -0.28618621826171875, 0.44301414489746094, 0.29038238525390625, -0.3006782531738281, 1.1184539794921875, -0.194427490234375, 0.7997360229492188, -0.0726776123046875, 0.29644775390625, 0.82080078125, -0.26666259765625, 0.0389862060546875, 0.15996360778808594, 1.5071792602539062, 0.4405975341796875, -0.4293403625488281, 0.5335540771484375, -0.056182861328125, 0.1363525390625, 0.0077533721923828125, 0.6229400634765625, 0.48029136657714844, -0.3887939453125, 0.6645965576171875, 0.8880386352539062, -0.1957855224609375, 0.2129802703857422, -0.04182243347167969, 0.0587921142578125, 0.03894805908203125, 0.19805145263671875, 0.04717254638671875, -1.268096923828125, 1.455902099609375, 1.0962371826171875, -0.03281211853027344, 1.2074356079101562, -0.09548568725585938, -0.3435993194580078, 0.5324172973632812, 0.3982353210449219, 0.176788330078125, -0.11231231689453125, 0.24269866943359375, 0.11586761474609375, 0.1495819091796875, -0.146240234375, 0.4810028076171875, -0.11703681945800781, -0.4329376220703125, -0.16016387939453125, 0.7117156982421875, 0.8979034423828125, 0.5051708221435547, 0.8989677429199219, 0.17501449584960938, 0.715423583984375, 0.15003204345703125, 0.224029541015625, -0.06624603271484375, 0.747589111328125, 0.735076904296875, 0.08057022094726562, 0.5291786193847656, -1.3323516845703125, 0.4978485107421875, 0.19103240966796875, 0.17633819580078125, 0.14000701904296875, -0.13416290283203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000036.npy"}
{"epoch": 0.10909090909090909, "step": 37, "batch_size": 128, "mean": 0.11037015914916992, "std": 0.6310659050941467, "min": -1.8463134765625, "p10": -0.6459224700927734, "median": 0.05354881286621094, "p90": 0.8694114685058594, "max": 2.38470458984375, "pos_frac": 0.5703125, "sample": [-0.10260963439941406, 0.009525299072265625, 0.628753662109375, -0.8418369293212891, -0.3824748992919922, -0.726654052734375, -0.5645980834960938, -0.055408477783203125, -0.6694793701171875, 0.21832656860351562, -1.0152435302734375, 0.05718994140625, 0.8805999755859375, -0.11931228637695312, 0.0070934295654296875, -0.2218475341796875, 0.15269088745117188, 0.07666778564453125, -0.6185302734375, 0.14923667907714844, 0.7498664855957031, 0.26416015625, -0.2290191650390625, -0.7767105102539062, -0.020755767822265625, -0.9585304260253906, -0.8421478271484375, -0.12481689453125, 0.021730422973632812, -0.0100555419921875, 0.8646163940429688, 0.22133636474609375, -1.8463134765625, 0.3717479705810547, -0.026092529296875, 0.6161880493164062, -0.48722267150878906, -0.6232147216796875, -0.7596149444580078, -0.03394317626953125, 2.38470458984375, 0.7863311767578125, -0.6357460021972656, -0.5201988220214844, -0.028203964233398438, 0.32281494140625, 0.100372314453125, 0.24995040893554688, -1.167266845703125, 1.334197998046875, 1.698974609375, -0.6533966064453125, -0.6363067626953125, 0.19766998291015625, 0.35991668701171875, -0.30133819580078125, -0.0625762939453125, 0.363311767578125, -0.25701904296875, -0.441253662109375, -0.1593170166015625, 0.18685150146484375, 1.484893798828125, -0.26008033752441406, 0.0089874267578125, 0.35536956787109375, -0.31890869140625, 0.5686416625976562, 0.2412872314453125, -0.38742828369140625, -0.2608184814453125, 1.04327392578125, -0.740692138671875, 1.0863265991210938, 0.4963531494140625, -0.3010406494140625, 0.1697540283203125, 1.5103607177734375, 0.7547225952148438, 0.5770072937011719, 0.5945720672607422, -0.10737037658691406, -0.5004310607910156, -0.022457122802734375, -0.34665489196777344, 0.2534332275390625, -0.6563949584960938, -0.1252765655517578, 0.3449668884277344, 0.04195404052734375, 0.068511962890625, -0.4618511199951172, -0.08237457275390625, 0.3641357421875, 0.32564544677734375, 0.12213897705078125, 0.15018844604492188, 0.5222797393798828, 0.5101375579833984, 0.0155029296875, 1.2725372314453125, 0.45782470703125, -0.23520469665527344, 0.2588691711425781, 1.0583648681640625, -0.05608558654785156, -0.6427192687988281, 0.7368183135986328, 1.4211273193359375, -0.5404052734375, 0.008264541625976562, 0.4037189483642578, 0.2327880859375, -0.3437156677246094, 0.6604423522949219, 0.6239509582519531, 0.3118705749511719, 1.527252197265625, 0.4354095458984375, -0.17052459716796875, 0.245849609375, 0.9342041015625, 0.0486907958984375, 0.6360626220703125, 0.5101509094238281, 0.049907684326171875, 0.3779125213623047, 0.50958251953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000037.npy"}
{"epoch": 0.11212121212121212, "step": 38, "batch_size": 128, "mean": 0.15855847299098969, "std": 0.5921036005020142, "min": -2.699432373046875, "p10": -0.46338119506835934, "median": 0.15355968475341797, "p90": 0.8857147216796875, "max": 1.949737548828125, "pos_frac": 0.6484375, "sample": [-0.027523040771484375, 0.254669189453125, -0.435638427734375, 0.4123497009277344, 1.1732177734375, -0.00711822509765625, -0.21131515502929688, 0.8634033203125, -0.32021331787109375, -0.036762237548828125, -0.505279541015625, -0.220672607421875, 1.189117431640625, -0.08635711669921875, 0.4125213623046875, 1.085235595703125, -0.06134796142578125, 0.7407608032226562, 0.6839370727539062, 0.36902618408203125, 1.949737548828125, -0.37042236328125, 0.9103240966796875, 0.13205528259277344, -0.09451866149902344, 0.05457878112792969, 0.4699993133544922, 0.5263442993164062, -0.5034236907958984, 0.6063613891601562, 0.153594970703125, 0.46929931640625, 0.5372238159179688, 0.698699951171875, -0.45397186279296875, 0.176361083984375, 0.03661155700683594, 0.6338272094726562, 0.04630088806152344, 0.5184459686279297, 0.1712799072265625, -0.9526443481445312, 0.22988128662109375, 0.0068359375, 0.6629409790039062, 0.06055450439453125, 0.19901084899902344, 0.7635307312011719, 0.8203659057617188, 0.15030670166015625, -0.2668609619140625, 0.31308937072753906, 1.049163818359375, 0.24083709716796875, -0.0131683349609375, 0.23419189453125, 0.563232421875, -0.2845878601074219, -0.3595123291015625, -0.4872856140136719, -0.39234352111816406, 0.25391197204589844, 0.620574951171875, 0.4820098876953125, -1.6419525146484375, 0.6243553161621094, 0.0751495361328125, -0.4524688720703125, 0.0810546875, -0.40457725524902344, 0.36713409423828125, 0.09351730346679688, -0.20644378662109375, 0.3487720489501953, 0.9217529296875, -0.15832901000976562, -0.0556793212890625, 0.6321372985839844, -0.6963272094726562, 1.2527008056640625, -0.930206298828125, -0.3502082824707031, 0.009487152099609375, -0.22846221923828125, 0.950897216796875, 0.4678192138671875, -0.4853630065917969, -0.8126983642578125, 0.12120437622070312, 0.0184326171875, 0.3051300048828125, 0.30609130859375, 0.06612777709960938, 0.33946990966796875, 0.9711761474609375, 0.06036376953125, 0.011661529541015625, 0.6736297607421875, 0.8089675903320312, 1.2575759887695312, 0.18095016479492188, -0.9264450073242188, 1.174407958984375, -0.4853363037109375, -2.699432373046875, -0.197418212890625, 0.21598052978515625, 0.3826713562011719, 0.15352439880371094, 0.23850059509277344, -0.35858726501464844, -0.021566390991210938, 0.2707023620605469, 0.5427780151367188, 0.07826995849609375, 0.2893829345703125, 0.9220046997070312, -0.1136932373046875, 0.4937591552734375, -0.23142242431640625, -0.1857433319091797, -0.7010097503662109, -0.24871826171875, 0.41400146484375, 0.16667938232421875, 0.10322189331054688, 0.8751678466796875, -0.2137928009033203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000038.npy"}
{"epoch": 0.11515151515151516, "step": 39, "batch_size": 128, "mean": 0.17837262153625488, "std": 0.6479672193527222, "min": -1.5714492797851562, "p10": -0.562249755859375, "median": 0.13478946685791016, "p90": 0.9752712249755859, "max": 2.0238037109375, "pos_frac": 0.59375, "sample": [0.1342639923095703, 0.7181358337402344, -0.37111663818359375, -0.4552955627441406, 0.9848175048828125, 0.3977813720703125, -0.7821578979492188, -0.33589744567871094, 0.321807861328125, 1.4118576049804688, 0.13531494140625, 1.7344284057617188, 0.34148597717285156, 0.1131591796875, 1.180633544921875, 1.024993896484375, -0.40428924560546875, 0.1713104248046875, 0.13391494750976562, -0.57562255859375, 0.32225799560546875, 0.034061431884765625, 0.25623512268066406, -0.1616973876953125, 1.54620361328125, 0.7862815856933594, 1.4878463745117188, -0.09485626220703125, 0.175262451171875, 0.8938922882080078, 0.9535369873046875, 0.4949531555175781, -0.5231857299804688, 0.0346527099609375, 0.9357986450195312, 0.3587532043457031, -0.21671485900878906, -0.12126922607421875, 0.2535858154296875, 0.548583984375, 0.27008056640625, 0.8238372802734375, 0.20167160034179688, 0.6002883911132812, -0.09762191772460938, -0.21429443359375, 0.14781570434570312, -0.5240554809570312, 0.5054931640625, -0.03809356689453125, -0.086761474609375, -0.10823631286621094, -0.29701995849609375, 2.0238037109375, 0.09418678283691406, -0.3131847381591797, -0.7153701782226562, -0.2462921142578125, 0.4504241943359375, -0.3861274719238281, 0.320709228515625, 0.6837921142578125, 0.05488395690917969, -0.5565185546875, 0.9711799621582031, -0.666900634765625, 0.8577175140380859, 1.3852558135986328, -0.10240554809570312, 0.7469234466552734, -0.21360015869140625, -0.0325927734375, -0.42169952392578125, 0.7758636474609375, -0.71209716796875, 0.5855941772460938, -0.13735198974609375, 0.322265625, -0.865081787109375, -0.2856788635253906, -0.7737998962402344, 0.017303466796875, 0.21390533447265625, -0.0567169189453125, -0.0025787353515625, 0.25507354736328125, 0.6004848480224609, 0.9686374664306641, 0.4800148010253906, 0.2304534912109375, -0.4501018524169922, 0.8205642700195312, -0.127166748046875, -1.173187255859375, 0.425750732421875, -0.056659698486328125, -0.008653640747070312, 1.097381591796875, 0.466949462890625, -1.2616119384765625, 0.07607460021972656, -0.0502471923828125, -0.048126220703125, -1.5714492797851562, -0.045684814453125, 0.0980987548828125, -0.4570655822753906, -0.9715728759765625, 0.9266510009765625, -1.11541748046875, 0.9897365570068359, 0.2145843505859375, 0.16093826293945312, 0.39083099365234375, -0.3739471435546875, 0.4262237548828125, 0.45583343505859375, 0.25774383544921875, -0.9973602294921875, 0.3828468322753906, 0.06937408447265625, -0.274200439453125, 0.5692615509033203, 1.479217529296875, -0.40020751953125, 0.017177581787109375, 1.759918212890625, 0.551910400390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000039.npy"}
{"epoch": 0.11818181818181818, "step": 40, "batch_size": 128, "mean": 0.2836909592151642, "std": 0.8651589751243591, "min": -1.8647689819335938, "p10": -0.7455043792724609, "median": 0.23894119262695312, "p90": 1.3550338745117188, "max": 3.752777099609375, "pos_frac": 0.671875, "sample": [0.1669445037841797, 0.0095977783203125, -1.047616958618164, 0.3514728546142578, -0.5728569030761719, 0.19169998168945312, -0.5891532897949219, -0.3063220977783203, 1.1265487670898438, 0.4468708038330078, -0.7076034545898438, -0.2505073547363281, 1.1096076965332031, 2.16033935546875, 0.2713623046875, 0.198822021484375, 1.3506546020507812, 1.0705108642578125, 0.6605148315429688, 0.471282958984375, 0.20478057861328125, 0.8028488159179688, -0.7760658264160156, 1.9197616577148438, 0.20125770568847656, 0.4051971435546875, 0.31098365783691406, -0.9714279174804688, 1.6617279052734375, 0.026641845703125, 0.3171844482421875, 3.752777099609375, 0.364654541015625, 0.25370025634765625, -0.3726387023925781, 0.40423583984375, 0.39769744873046875, 1.8402976989746094, -1.4029922485351562, 0.30249786376953125, 0.5726776123046875, -0.9059581756591797, 2.3322067260742188, 1.072967529296875, 0.48486328125, 0.9322128295898438, -0.07568359375, -0.21538162231445312, -0.4239959716796875, -0.59698486328125, 0.3935089111328125, -0.21461105346679688, -0.05416107177734375, 0.19664382934570312, 0.9159469604492188, 0.2500419616699219, -0.688079833984375, 0.2313995361328125, 0.24648284912109375, 0.9957237243652344, -0.958404541015625, 0.49567413330078125, 1.035369873046875, 0.02864837646484375, -0.08278465270996094, 1.264852523803711, -0.6147003173828125, 0.51861572265625, 0.4824504852294922, 1.7077178955078125, 0.20627975463867188, -0.25652503967285156, -0.37908935546875, -0.21880340576171875, 0.21654701232910156, 0.09645652770996094, 0.2804450988769531, -1.0453872680664062, 0.2641410827636719, 0.22284698486328125, 0.36902618408203125, 0.04115104675292969, -0.9698638916015625, 0.347076416015625, -0.2258148193359375, 0.8027496337890625, 1.8460235595703125, -0.7324066162109375, -1.8647689819335938, -0.08331298828125, 1.484344482421875, -0.3624458312988281, 1.54486083984375, 0.08947372436523438, -0.5324497222900391, -0.4005565643310547, -1.083587646484375, -0.6276092529296875, 0.8185501098632812, 2.5902633666992188, -0.26630401611328125, -0.5108489990234375, 0.04804039001464844, 1.3510284423828125, 0.6148834228515625, 1.272024154663086, 1.3643798828125, 0.0829315185546875, 0.4957122802734375, 0.77630615234375, -0.15613746643066406, 0.9101104736328125, 1.87335205078125, 0.16066741943359375, -1.3211441040039062, 1.2466583251953125, 0.00829315185546875, -0.33358001708984375, 0.051025390625, 0.1387786865234375, 0.353668212890625, -0.8358592987060547, 0.6452960968017578, -1.1308135986328125, 0.5962982177734375, 0.29937744140625, 0.601531982421875, 0.45660400390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000040.npy"}
{"epoch": 0.12121212121212122, "step": 41, "batch_size": 128, "mean": 0.2562437653541565, "std": 0.6967384219169617, "min": -1.2764358520507812, "p10": -0.5373203277587889, "median": 0.25029468536376953, "p90": 1.0735246658325195, "max": 3.37225341796875, "pos_frac": 0.640625, "sample": [0.05168342590332031, 1.0717754364013672, 0.4275474548339844, -0.7321624755859375, 0.5804977416992188, -1.0537490844726562, 0.3501853942871094, 0.4597015380859375, -0.17743492126464844, 0.28403472900390625, 2.0632858276367188, 0.7027912139892578, 0.6188411712646484, -0.4694404602050781, 0.06082916259765625, 0.405853271484375, 0.35507965087890625, 0.25965118408203125, -1.2764358520507812, -0.264739990234375, -0.1410980224609375, 3.37225341796875, 1.077606201171875, 1.24420166015625, 1.2074737548828125, -0.821319580078125, 1.2919197082519531, -0.223724365234375, 1.0592803955078125, -0.9093017578125, -0.37964439392089844, 0.9997100830078125, -0.053656578063964844, -0.1083831787109375, 0.8259124755859375, 0.2071685791015625, 0.3390350341796875, 0.9949188232421875, -0.033084869384765625, 0.21239471435546875, 0.46142578125, 0.7474632263183594, -0.00754547119140625, -0.3416786193847656, 0.001766204833984375, -0.631622314453125, 0.42292022705078125, -1.1640625, -0.451416015625, 0.6178054809570312, 0.25344085693359375, -0.12160491943359375, 0.1407928466796875, -0.1576690673828125, 0.7104549407958984, 0.5353012084960938, -0.7809925079345703, -0.023855209350585938, 0.07950592041015625, 1.3401527404785156, -0.5059623718261719, -0.4803009033203125, -0.8782882690429688, -0.41936492919921875, 0.4448966979980469, 0.8079948425292969, -0.21575164794921875, -0.3832969665527344, 1.0197219848632812, 0.18776702880859375, 0.7193164825439453, 1.0237007141113281, 0.42977142333984375, 0.5169029235839844, 0.2705039978027344, 0.34725189208984375, -0.2071666717529297, 0.169708251953125, -0.199859619140625, -0.22582435607910156, 1.8145675659179688, 0.04936981201171875, 0.5673980712890625, 0.6974334716796875, 1.3230705261230469, 0.171600341796875, 0.3458251953125, 1.0931587219238281, 0.15386199951171875, -0.325225830078125, 1.0489578247070312, 0.2524890899658203, 0.831939697265625, 0.6412582397460938, -0.6104888916015625, -0.48526954650878906, 1.7160797119140625, 0.5851688385009766, -0.31688690185546875, -0.08438491821289062, 0.4200592041015625, 0.43643951416015625, 0.467529296875, 0.5890922546386719, 0.24810028076171875, 1.6764373779296875, 1.1136531829833984, -0.0308380126953125, 0.47345542907714844, 0.3199424743652344, -0.09165000915527344, 0.5152111053466797, 0.040771484375, -0.26407623291015625, 0.9196319580078125, -0.9487266540527344, 0.5834999084472656, -0.44242095947265625, 0.6193046569824219, -0.2569465637207031, 0.0911865234375, 0.14701080322265625, 0.340789794921875, 0.12663841247558594, -0.8490676879882812, -0.902679443359375, -0.06754112243652344, 0.122711181640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000041.npy"}
{"epoch": 0.12424242424242424, "step": 42, "batch_size": 128, "mean": 0.3442216217517853, "std": 0.8011794686317444, "min": -1.7593917846679688, "p10": -0.4231063842773437, "median": 0.2638072967529297, "p90": 1.3575881958007812, "max": 2.7323074340820312, "pos_frac": 0.6640625, "sample": [0.895477294921875, 1.5579071044921875, -0.9272346496582031, 0.13650131225585938, 0.7633628845214844, -0.43975830078125, 0.7073287963867188, 0.0878143310546875, 0.8852920532226562, -0.14252471923828125, 1.0104713439941406, 0.0946044921875, 0.14002227783203125, -0.35132598876953125, 2.0331039428710938, 0.1500835418701172, -0.3340110778808594, -1.5234260559082031, -0.34528350830078125, 0.6711177825927734, 0.21108055114746094, 0.1945953369140625, 0.4398059844970703, -1.7593917846679688, 0.6351776123046875, 1.0299949645996094, 2.7323074340820312, 0.5649852752685547, 1.9306373596191406, 0.38436126708984375, 0.39093780517578125, 1.113718032836914, 0.4402313232421875, 1.3040771484375, 0.9426498413085938, 0.41250038146972656, -1.2852783203125, 0.2841796875, 0.9337501525878906, 0.5078811645507812, 0.185699462890625, -0.361846923828125, 2.2215423583984375, -0.6673583984375, 0.21511077880859375, 0.09347915649414062, -0.0365142822265625, -0.22834014892578125, 0.03826713562011719, 1.1444931030273438, -0.2258148193359375, 0.2670402526855469, 0.0989227294921875, -0.38486480712890625, 0.23804092407226562, 1.3853378295898438, 0.6326503753662109, 0.5807952880859375, 1.1896286010742188, -0.102874755859375, 1.3158721923828125, 0.2605743408203125, 0.7876319885253906, -1.233978271484375, 1.6456222534179688, 0.473968505859375, -0.05777168273925781, 0.7713546752929688, 1.4795074462890625, 1.2635993957519531, 1.2655563354492188, -0.30892181396484375, 0.4248390197753906, -0.24359130859375, 1.7098312377929688, -0.2723197937011719, 0.5276870727539062, 0.1370697021484375, -0.12627792358398438, 0.922454833984375, -0.13993072509765625, 1.9507293701171875, -1.1795806884765625, 0.5264244079589844, 0.825164794921875, -0.010524749755859375, 0.09214019775390625, -1.219451904296875, 0.47257232666015625, -0.18375396728515625, -0.1581268310546875, -0.076995849609375, 0.14894866943359375, 1.3456954956054688, 0.9703178405761719, -1.5288238525390625, -0.26609039306640625, -0.6163330078125, 0.2231597900390625, 1.0050010681152344, 1.0130386352539062, -0.4159698486328125, -0.31041717529296875, 0.4045906066894531, -0.2187042236328125, -0.330108642578125, -0.3714447021484375, 1.4274444580078125, 0.1428070068359375, 0.5558853149414062, 0.3243408203125, -0.1174774169921875, 0.12261962890625, 2.0382232666015625, 0.7916755676269531, -0.21731948852539062, 1.5883407592773438, 0.4672050476074219, 0.3752288818359375, 0.8742218017578125, 0.51507568359375, 0.006481170654296875, -0.123443603515625, -0.1468353271484375, 1.0268898010253906, 0.5841560363769531, -0.7231712341308594, -0.9073333740234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000042.npy"}
{"epoch": 0.12727272727272726, "step": 43, "batch_size": 128, "mean": 0.3339724540710449, "std": 0.7677590250968933, "min": -1.6433563232421875, "p10": -0.7437580108642577, "median": 0.39202880859375, "p90": 1.281098175048828, "max": 2.5646896362304688, "pos_frac": 0.7265625, "sample": [0.8107166290283203, 0.1202392578125, 0.7892131805419922, 0.9008769989013672, 0.29195404052734375, -0.29445648193359375, 0.7926101684570312, 0.42518043518066406, 1.0779495239257812, 0.916290283203125, 0.989044189453125, 0.05649566650390625, -0.24005126953125, 0.5393943786621094, -0.92657470703125, 0.8813209533691406, 0.602264404296875, 1.868438720703125, 0.5054130554199219, 0.051883697509765625, 1.3783798217773438, 0.7474269866943359, 0.349884033203125, 0.31876564025878906, 0.39054107666015625, 0.5969562530517578, 0.4312095642089844, -1.3641510009765625, 0.36092376708984375, -0.2042388916015625, 0.3378639221191406, 0.569854736328125, 0.15945053100585938, 1.15155029296875, 1.3292694091796875, 0.033294677734375, 0.9545307159423828, 0.9107818603515625, 0.5393505096435547, 0.6306190490722656, 0.04077339172363281, 0.2464447021484375, 0.14769363403320312, 0.397552490234375, 1.5720138549804688, 0.07309532165527344, -0.490875244140625, 1.0131702423095703, -0.9920730590820312, 1.1097564697265625, -0.52777099609375, 0.9033851623535156, 2.5646896362304688, 1.1541385650634766, 1.94232177734375, 0.48026466369628906, -0.8645095825195312, -1.0777206420898438, 1.4660415649414062, 0.3194732666015625, 1.5297698974609375, -0.7117156982421875, -0.22689056396484375, 0.39351654052734375, 1.3002281188964844, 0.181671142578125, -0.6565628051757812, 0.07569313049316406, 1.0891265869140625, -0.4844627380371094, -0.1012115478515625, 0.312347412109375, -0.13304519653320312, -0.16333770751953125, -1.6433563232421875, 0.4702301025390625, 1.129364013671875, 0.39534759521484375, -0.2955207824707031, 0.6125526428222656, 0.09576416015625, -1.0319290161132812, 1.10638427734375, 0.6837310791015625, -0.45900726318359375, 0.24974822998046875, -1.1499862670898438, -0.5598602294921875, 0.414703369140625, -0.218841552734375, -0.6253662109375, 0.8712081909179688, 0.0425262451171875, -0.2317028045654297, 0.6690177917480469, 0.5187225341796875, -1.2325286865234375, 0.02060699462890625, 0.12467384338378906, -0.6847915649414062, 1.3285980224609375, -0.27130889892578125, -0.9570159912109375, 1.02978515625, 1.61669921875, 0.18051719665527344, -0.889892578125, 0.49864959716796875, 1.148834228515625, 0.19583702087402344, 1.1049003601074219, 1.7735748291015625, -0.8185234069824219, 1.347494125366211, 0.6760025024414062, -0.5005168914794922, 0.10626983642578125, 0.5804977416992188, 1.2728996276855469, 0.4005699157714844, -1.2602310180664062, 0.9845199584960938, 0.6279182434082031, -0.018802642822265625, 0.2325897216796875, 0.1725940704345703, 0.5877876281738281, 0.6630783081054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000043.npy"}
{"epoch": 0.1303030303030303, "step": 44, "batch_size": 128, "mean": 0.27228787541389465, "std": 1.0035978555679321, "min": -3.215301513671875, "p10": -0.918968391418457, "median": 0.2759361267089844, "p90": 1.6057586669921875, "max": 3.250518798828125, "pos_frac": 0.6171875, "sample": [0.24491119384765625, 0.77783203125, 1.7673110961914062, 0.4633464813232422, -0.0158538818359375, -0.3636627197265625, 1.669281005859375, -0.6353988647460938, -1.33697509765625, 1.7364158630371094, 0.7934417724609375, 0.03801727294921875, 0.2676239013671875, -0.9220352172851562, 0.9952564239501953, -0.4557342529296875, 1.1926193237304688, -0.55206298828125, 0.7804718017578125, -1.4163932800292969, 1.2991485595703125, 1.3546257019042969, 1.042531967163086, 0.4303016662597656, -0.10601997375488281, -0.1482391357421875, 1.650726318359375, 0.1168975830078125, 1.5341796875, -0.2978363037109375, -0.93621826171875, -0.1947021484375, 1.4472122192382812, 0.8735275268554688, -0.4449653625488281, -0.6173419952392578, 0.22178268432617188, -1.3457107543945312, -1.910736083984375, 2.1861572265625, 1.8018264770507812, -0.78656005859375, 0.5581512451171875, -0.6221771240234375, 1.4107666015625, -0.6871509552001953, -0.6199722290039062, -0.245086669921875, 0.1103973388671875, 1.223541259765625, 0.8961334228515625, -0.5745697021484375, 0.7098617553710938, -0.17205429077148438, 0.12918853759765625, 1.2439689636230469, 0.08939743041992188, 0.23546218872070312, 3.250518798828125, 1.6295166015625, -1.07086181640625, 0.482940673828125, 0.6834716796875, 0.1757049560546875, 1.8176422119140625, 0.2238311767578125, -0.7477054595947266, -0.8824615478515625, 1.60150146484375, 1.7366714477539062, 1.569122314453125, 0.14500045776367188, 0.37441253662109375, -0.7420501708984375, -1.07464599609375, -0.884307861328125, 0.4518394470214844, 0.9817314147949219, 0.2537803649902344, 0.18324851989746094, 1.321136474609375, 0.6989593505859375, -0.08123779296875, 0.454925537109375, -0.2960357666015625, 1.4744071960449219, 0.26071929931640625, 0.676727294921875, 0.5223846435546875, 0.4245948791503906, 1.07025146484375, 1.0113372802734375, 1.615692138671875, 1.768310546875, 0.67852783203125, -0.9176540374755859, -0.3104400634765625, -0.2795276641845703, 0.5523490905761719, 1.0473709106445312, 0.7201690673828125, 0.7641410827636719, -0.3644905090332031, 0.28424835205078125, -1.2240257263183594, -0.06374359130859375, -0.03707122802734375, 2.0165252685546875, -0.21112060546875, -0.6547451019287109, -3.215301513671875, -1.7385635375976562, -0.02579498291015625, 0.880706787109375, 1.0608081817626953, 0.46486854553222656, -1.2411041259765625, 0.7615203857421875, 0.41632080078125, 0.3631134033203125, 0.7276992797851562, 0.32071495056152344, 0.7830810546875, 0.4762535095214844, -0.57415771484375, -2.5230712890625, -0.35003089904785156, -0.7006587982177734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000044.npy"}
{"epoch": 0.13333333333333333, "step": 45, "batch_size": 128, "mean": 0.43691137433052063, "std": 0.8619186878204346, "min": -1.9591865539550781, "p10": -0.5629096984863281, "median": 0.4069499969482422, "p90": 1.5702262878417967, "max": 3.04595947265625, "pos_frac": 0.7109375, "sample": [1.7201271057128906, 0.427947998046875, 0.2779827117919922, 0.49935150146484375, 0.3679656982421875, 1.255523681640625, -0.82537841796875, -0.026220321655273438, 0.5449867248535156, 0.797943115234375, -0.394683837890625, -1.1888465881347656, 1.8283309936523438, -0.5504150390625, -0.28780364990234375, 1.877105712890625, -0.5527439117431641, 0.5404548645019531, 1.769744873046875, -0.0609130859375, 1.3291397094726562, 0.6083183288574219, 0.3798828125, 1.3711700439453125, 1.6037826538085938, -0.759796142578125, 0.0941009521484375, 1.3675575256347656, 0.19843673706054688, 0.6067543029785156, 0.45855140686035156, -1.2138442993164062, 0.7125053405761719, 1.2597808837890625, 0.5494060516357422, 1.4620208740234375, 0.2549591064453125, 0.5414581298828125, 0.12047958374023438, -0.2385692596435547, -0.038219451904296875, 3.04595947265625, 0.3526458740234375, 0.2789306640625, -0.1929149627685547, 1.0257110595703125, -0.44268798828125, -0.8609161376953125, 0.9808521270751953, 0.35518646240234375, 2.391765594482422, 2.272907257080078, 0.4359302520751953, 0.06018257141113281, 0.6868820190429688, 1.0955810546875, 0.9678897857666016, -0.3142585754394531, -1.4204826354980469, -1.9591865539550781, 1.65032958984375, -0.06073760986328125, 0.30783653259277344, 0.6198978424072266, -0.46372222900390625, 1.1747913360595703, 0.1233673095703125, 0.4332733154296875, 0.9432315826416016, 0.3699989318847656, 0.9678688049316406, 1.7158737182617188, 0.05329132080078125, 0.8777618408203125, -1.3937759399414062, 0.20139312744140625, 1.3276481628417969, -0.05596923828125, 1.41143798828125, 1.5659255981445312, 1.0205860137939453, 1.64166259765625, 0.6830406188964844, 1.476837158203125, -0.40337371826171875, -0.050689697265625, 2.5655517578125, 0.7769908905029297, 0.33867645263671875, -0.7654800415039062, -1.066192626953125, 0.42713165283203125, 1.005859375, -0.0891876220703125, 0.3192310333251953, 0.3663330078125, -1.5724945068359375, -0.27691650390625, 0.8545875549316406, 0.8639373779296875, 0.9454002380371094, 0.3867683410644531, 0.7631454467773438, 0.00119781494140625, 0.79791259765625, 0.5689182281494141, 0.6561660766601562, 0.558197021484375, -0.1885204315185547, 0.34104156494140625, 0.088714599609375, -0.16840362548828125, -0.04335784912109375, 1.58026123046875, 0.7897491455078125, -0.3492431640625, 1.34503173828125, -0.0002079010009765625, 0.591400146484375, -0.4349021911621094, 0.3170318603515625, -0.5866298675537109, -1.1223678588867188, 0.5023307800292969, 0.7868118286132812, 0.12194061279296875, 0.2620048522949219, 0.08013916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000045.npy"}
{"epoch": 0.13636363636363635, "step": 46, "batch_size": 128, "mean": 0.44267454743385315, "std": 1.1222342252731323, "min": -2.0766372680664062, "p10": -0.7900165557861327, "median": 0.29041290283203125, "p90": 1.7423454284667967, "max": 4.71942138671875, "pos_frac": 0.640625, "sample": [1.25140380859375, 1.2279510498046875, 0.0030269622802734375, -0.5236587524414062, 0.32366943359375, 2.2039566040039062, 0.3613128662109375, 0.9147720336914062, -0.3325347900390625, 0.07490158081054688, 1.1636199951171875, -2.0766372680664062, 0.7314186096191406, 0.6165504455566406, 0.5048751831054688, -0.17053794860839844, -1.385833740234375, 0.7457656860351562, 1.9571456909179688, -0.4952125549316406, -0.4858856201171875, 1.2854690551757812, -1.0716285705566406, -1.8964767456054688, 0.3924674987792969, 0.2571563720703125, -0.5216865539550781, -0.23836135864257812, 0.035221099853515625, -0.5048036575317383, 1.5777244567871094, 1.610483169555664, 1.0538368225097656, 3.2039642333984375, 4.71942138671875, 0.16204833984375, -0.03394508361816406, -0.25261688232421875, 0.6193675994873047, -0.9813461303710938, -0.3035888671875, 1.1344356536865234, 0.9642391204833984, -0.8721065521240234, -1.9150848388671875, 0.5552825927734375, -0.3653411865234375, -0.9787368774414062, 2.3064956665039062, 1.1992416381835938, -0.2031707763671875, -0.2918853759765625, -0.7082157135009766, -0.23979949951171875, 0.07052230834960938, 1.7706451416015625, 1.9949188232421875, -0.08337593078613281, 1.785430908203125, 0.14068603515625, -1.0627975463867188, 0.2377910614013672, 0.3739166259765625, 0.6263904571533203, 0.9120101928710938, 0.5456199645996094, 0.25591278076171875, 0.0001354217529296875, -0.16284942626953125, -0.2664337158203125, 0.16106224060058594, 0.4768810272216797, 0.8592681884765625, -0.4261035919189453, 1.6429405212402344, -0.22747421264648438, 2.157501220703125, 0.16543197631835938, -0.05931663513183594, 1.0241928100585938, 1.9291877746582031, -0.23313522338867188, 1.595001220703125, 0.15703964233398438, -0.3145751953125, 1.18267822265625, -0.9599151611328125, 0.5866050720214844, 1.6290397644042969, -0.71649169921875, 0.4799003601074219, 3.760284423828125, 0.253387451171875, 0.12007522583007812, 1.6534271240234375, -1.4033775329589844, 1.5103988647460938, 0.0037822723388671875, -0.37302398681640625, 0.41309165954589844, 0.8924350738525391, -1.2729644775390625, 1.4473896026611328, 0.5080814361572266, 1.1633758544921875, 0.08483505249023438, -0.6691856384277344, -0.5723648071289062, -1.62310791015625, 1.2663726806640625, -0.7548351287841797, 1.4325675964355469, -0.53564453125, 0.41204071044921875, 1.496368408203125, 1.27362060546875, 0.8979148864746094, -0.17522048950195312, 1.7302169799804688, 1.7282333374023438, 2.7194290161132812, 1.3590164184570312, 0.08287239074707031, 0.554840087890625, 2.9160804748535156, -0.12000656127929688, 0.34062957763671875, -0.4150657653808594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000046.npy"}
{"epoch": 0.1393939393939394, "step": 47, "batch_size": 128, "mean": 0.4064600467681885, "std": 1.241542935371399, "min": -3.1708602905273438, "p10": -0.9282115936279297, "median": 0.34842491149902344, "p90": 1.6483551025390621, "max": 4.8576202392578125, "pos_frac": 0.6484375, "sample": [-0.04919624328613281, 0.909271240234375, -0.6818389892578125, 0.968658447265625, -0.6227531433105469, 0.7280330657958984, -1.3324127197265625, -0.5065689086914062, -0.14629364013671875, 1.144500732421875, -0.011432647705078125, -2.3022537231445312, 0.16423797607421875, 2.6319122314453125, -0.7654953002929688, 0.2150421142578125, -0.6674785614013672, 1.7457351684570312, -1.1693572998046875, -0.4636077880859375, 0.3539772033691406, -0.09069252014160156, -1.0462112426757812, 0.7171478271484375, 1.466796875, 0.6176853179931641, 2.0292816162109375, 0.6703777313232422, 1.353424072265625, 1.6066207885742188, -0.7395668029785156, -1.2046356201171875, 0.10797882080078125, 1.0066184997558594, -0.7319831848144531, 0.7692012786865234, 3.9816436767578125, 0.4395599365234375, 0.06082725524902344, -0.3523082733154297, 0.6683197021484375, -0.313812255859375, 1.4256439208984375, -0.054271697998046875, -0.2731056213378906, 0.3142890930175781, -0.422943115234375, 0.7212867736816406, 0.217559814453125, 0.5934829711914062, 0.726104736328125, 0.06382942199707031, -0.22144126892089844, -1.23028564453125, 0.49498939514160156, -0.2255096435546875, 2.18695068359375, -0.9282302856445312, -0.9328536987304688, 0.543243408203125, 2.323535919189453, -1.6567001342773438, -0.9282035827636719, 0.4077339172363281, 1.1768341064453125, -0.07798004150390625, 1.122100830078125, 1.0547351837158203, 0.4235191345214844, -3.0446510314941406, 0.050933837890625, 0.03433799743652344, -0.6255722045898438, -0.3650627136230469, 1.1523284912109375, -0.1735992431640625, 1.367818832397461, 0.6820602416992188, 1.4181404113769531, 0.6433639526367188, -0.2623786926269531, 0.0250091552734375, 1.2772598266601562, 0.6614494323730469, 1.3801727294921875, 0.0902252197265625, 1.0813865661621094, -0.4007110595703125, 1.3153839111328125, 0.7114067077636719, 4.8576202392578125, 0.09604454040527344, 0.0628662109375, 0.0841522216796875, 1.2538623809814453, 0.4206504821777344, 0.232666015625, 0.6072921752929688, 0.2792530059814453, 2.4706268310546875, -0.7171630859375, -3.1708602905273438, 0.7378997802734375, 1.5857658386230469, 1.5151481628417969, 0.34287261962890625, 0.5199985504150391, 0.9002571105957031, -1.1985397338867188, 1.1298675537109375, 1.4006843566894531, -0.11692619323730469, 2.511402130126953, 2.0407180786132812, -0.580902099609375, 2.95977783203125, 1.592071533203125, 1.5999984741210938, 0.6423263549804688, 2.86627197265625, -3.10125732421875, 1.054342269897461, 3.4078369140625, 0.31832313537597656, 0.01787567138671875, -0.5686111450195312, -0.9124851226806641, -0.13741111755371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000047.npy"}
{"epoch": 0.14242424242424243, "step": 48, "batch_size": 128, "mean": 0.4789579212665558, "std": 1.3291993141174316, "min": -2.5449447631835938, "p10": -0.9605899810791014, "median": 0.38762378692626953, "p90": 2.1090789794921876, "max": 4.8410491943359375, "pos_frac": 0.625, "sample": [-0.0171051025390625, -1.1909637451171875, -0.3919219970703125, -0.6658897399902344, -0.4109954833984375, -0.031110763549804688, 3.3766326904296875, 0.3901710510253906, -1.26580810546875, -1.0674896240234375, -0.22498703002929688, 0.0854644775390625, 1.0304412841796875, 1.5258941650390625, -0.04241752624511719, -0.39768218994140625, 0.5563621520996094, 0.5650711059570312, -0.3127727508544922, 0.39650726318359375, 2.7148513793945312, -0.2780494689941406, -0.8618316650390625, 1.5206222534179688, 0.7691478729248047, 2.565399169921875, 0.248779296875, 2.0239105224609375, -0.22158241271972656, 0.7112083435058594, 1.760955810546875, 2.105316162109375, 1.5813217163085938, -0.3690338134765625, -0.7433090209960938, -0.374786376953125, -0.407501220703125, -0.5526504516601562, 0.1229705810546875, -0.4634895324707031, -1.4285736083984375, 0.4472503662109375, 0.9691162109375, 1.2718048095703125, -1.1931991577148438, -2.029022216796875, 0.21644973754882812, 0.5185928344726562, 0.7879180908203125, 0.03115081787109375, 1.6841812133789062, 0.8466873168945312, -0.6751708984375, 1.8049392700195312, 0.302825927734375, 0.07973480224609375, -0.8669681549072266, 1.2376327514648438, 0.38507652282714844, 1.1377716064453125, -0.11059761047363281, 1.0782489776611328, 1.818328857421875, -0.8657569885253906, 0.8071060180664062, -0.4554901123046875, 2.1709136962890625, 0.1995697021484375, 4.8410491943359375, -1.840667724609375, 0.6612930297851562, -0.2403087615966797, -2.11077880859375, -1.3171768188476562, 0.6871986389160156, -0.7510337829589844, 0.28218841552734375, 1.5219650268554688, 2.6365203857421875, -2.3871841430664062, 3.71893310546875, 2.3148269653320312, -0.2735137939453125, 0.252655029296875, -2.5449447631835938, 0.1654052734375, -1.3709869384765625, 0.44293975830078125, 0.7365531921386719, -0.4549140930175781, 1.3169975280761719, -0.5792427062988281, 0.40573883056640625, 2.8897628784179688, 3.6238555908203125, 1.0458831787109375, 0.5174026489257812, -0.0059661865234375, 2.11785888671875, 0.20056533813476562, 0.8791427612304688, 3.008331298828125, 1.2494430541992188, 1.1922225952148438, -0.47609710693359375, 0.5669059753417969, 1.8677520751953125, -0.5986099243164062, 0.37451171875, -0.12160491943359375, 1.7755165100097656, 1.9148101806640625, 1.74151611328125, 0.8055076599121094, 0.4566783905029297, 0.40702056884765625, 4.54132080078125, 0.0347442626953125, 1.1955223083496094, 0.499847412109375, -0.7710914611816406, -0.12896728515625, 0.45938682556152344, 0.0952911376953125, 1.7375030517578125, 0.9334068298339844, -0.9147758483886719, -1.8516654968261719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000048.npy"}
{"epoch": 0.14545454545454545, "step": 49, "batch_size": 128, "mean": 0.2816643714904785, "std": 1.3598649501800537, "min": -3.4961090087890625, "p10": -1.445374298095703, "median": 0.27379322052001953, "p90": 1.8092002868652344, "max": 4.5582275390625, "pos_frac": 0.6015625, "sample": [-3.4961090087890625, -1.3210983276367188, -0.023050308227539062, 2.5835037231445312, 0.0762481689453125, 3.6470260620117188, -0.9461135864257812, -0.1620330810546875, 0.08822822570800781, 0.5763320922851562, 0.15370559692382812, 0.1558399200439453, 0.10964202880859375, 1.1735992431640625, 0.39249610900878906, -0.22994613647460938, 1.1609115600585938, 0.712738037109375, 0.5645599365234375, 0.12078857421875, 2.4456710815429688, 2.4201126098632812, -0.5406131744384766, 0.875701904296875, 3.548980712890625, -1.3651885986328125, -0.5822467803955078, 0.1626129150390625, -0.5588226318359375, -0.4852333068847656, -0.790618896484375, 0.1295318603515625, -0.6141014099121094, -3.122344970703125, -1.6478748321533203, -0.4000701904296875, -0.351043701171875, 1.1686363220214844, -1.887054443359375, -3.0269622802734375, 1.6948013305664062, -0.08182525634765625, -0.721923828125, 2.5150375366210938, -1.2304725646972656, 4.3881988525390625, 0.9219970703125, -1.2355194091796875, 0.8855972290039062, -2.5374832153320312, 4.5582275390625, 0.7953338623046875, -0.5515365600585938, -0.09911727905273438, 1.8817214965820312, 1.5272750854492188, 1.6699981689453125, -0.259552001953125, 1.0798091888427734, -0.10485076904296875, 0.01682281494140625, 0.31574249267578125, 1.2527694702148438, -0.35677528381347656, -0.11293792724609375, 0.25314903259277344, 1.8050689697265625, 0.6204032897949219, 0.5123291015625, 0.7965774536132812, 0.3302764892578125, 0.5885772705078125, -0.4657020568847656, 0.06812667846679688, 0.2944374084472656, -0.47556304931640625, 1.4234695434570312, 2.4096221923828125, -1.490325927734375, 2.113037109375, 1.6056900024414062, 1.292022705078125, -0.0775299072265625, 0.12743377685546875, 0.8632183074951172, 1.0084037780761719, 0.4296112060546875, -1.1570892333984375, 0.452056884765625, 1.5860595703125, -0.613616943359375, 1.8188400268554688, -1.4261093139648438, -0.7678203582763672, -1.936126708984375, 1.4298934936523438, -0.371337890625, 2.2259445190429688, -1.6114044189453125, -1.6825313568115234, 0.400421142578125, 0.4336051940917969, -2.0948867797851562, -0.25308990478515625, 0.3612518310546875, -0.4609966278076172, 1.1733283996582031, -0.27100372314453125, 1.0414390563964844, 0.5267333984375, 0.40692138671875, 1.101959228515625, 0.5733261108398438, -0.00032806396484375, -1.9135284423828125, 0.5982913970947266, 0.9425125122070312, -1.6182918548583984, 0.5557041168212891, 1.2361602783203125, 0.5402870178222656, 0.175506591796875, 0.42949676513671875, 1.802490234375, 1.3498001098632812, 0.6290664672851562, -0.30596160888671875, -0.2139434814453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000049.npy"}
{"epoch": 0.1484848484848485, "step": 50, "batch_size": 128, "mean": 0.41620010137557983, "std": 1.7633202075958252, "min": -4.7321014404296875, "p10": -1.1530733108520506, "median": 0.22049713134765625, "p90": 2.20543098449707, "max": 8.39471435546875, "pos_frac": 0.578125, "sample": [3.4065322875976562, 2.7013702392578125, -0.6858978271484375, 4.6032257080078125, 0.7810897827148438, 0.4354705810546875, -0.7476806640625, -0.5613365173339844, -0.5065803527832031, -0.3902091979980469, -0.6688690185546875, 0.89208984375, 0.887725830078125, -0.3730659484863281, -1.0322685241699219, -0.8990478515625, 0.07217025756835938, 0.99310302734375, 0.9029998779296875, 0.2306671142578125, 0.06566619873046875, -1.1129131317138672, -0.22884368896484375, 2.059385299682617, -0.6753692626953125, 0.7570533752441406, -0.027551651000976562, 0.5132465362548828, 0.9947891235351562, 0.33800506591796875, -0.58160400390625, 0.6956253051757812, -0.4260406494140625, 2.0412940979003906, 1.390716552734375, -0.3408966064453125, 0.2582378387451172, 1.6615638732910156, -0.3775901794433594, 0.4385223388671875, -2.290496826171875, -0.9367103576660156, 4.12176513671875, -2.484954833984375, -0.8841629028320312, 3.2283096313476562, 0.4449119567871094, -0.6805419921875, 0.5167350769042969, -3.209320068359375, 2.336956024169922, 2.89056396484375, -0.025653839111328125, 1.1932601928710938, 2.084339141845703, 0.6055259704589844, -0.9996223449707031, 0.8748397827148438, -0.02985382080078125, -0.8222198486328125, 0.029682159423828125, -0.3795318603515625, -1.6264419555664062, -1.5062789916992188, -1.273040771484375, -0.4096221923828125, -1.2467803955078125, 0.7262191772460938, 1.1956329345703125, -0.016265869140625, 0.2029132843017578, 0.9175682067871094, -0.32806396484375, -0.4250030517578125, 0.6042728424072266, 0.2065582275390625, -0.14756393432617188, 0.2103271484375, -1.4478721618652344, 0.059291839599609375, -0.6269149780273438, 0.15298843383789062, 0.3386802673339844, 0.4430084228515625, 2.1490631103515625, -0.027065277099609375, -3.9925384521484375, 2.0237197875976562, -0.5068817138671875, -0.11028289794921875, 1.659820556640625, 4.1021728515625, 0.42740631103515625, 1.4762020111083984, 1.0881500244140625, 0.8367385864257812, 1.3941574096679688, 1.6557998657226562, 1.7159576416015625, 0.834442138671875, 0.6017608642578125, 4.20220947265625, 0.6104164123535156, 1.4621734619140625, -1.4431190490722656, 2.1065444946289062, -0.5490474700927734, 2.556549072265625, 0.4382648468017578, -4.163299560546875, -0.4215431213378906, -0.5568389892578125, 0.4950714111328125, 5.77484130859375, -1.8078689575195312, 0.5309867858886719, 0.03095245361328125, -0.3401336669921875, 0.5459480285644531, -0.001674652099609375, -1.0750656127929688, 4.4923248291015625, 1.8561134338378906, 0.3801116943359375, 8.39471435546875, -0.0517578125, -4.7321014404296875, 0.13800048828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000050.npy"}
{"epoch": 0.15151515151515152, "step": 51, "batch_size": 128, "mean": 0.3919285535812378, "std": 1.545856237411499, "min": -4.179443359375, "p10": -1.2282344818115234, "median": 0.46160125732421875, "p90": 2.17946662902832, "max": 6.56634521484375, "pos_frac": 0.6171875, "sample": [-0.5488853454589844, -0.7905769348144531, 0.6220321655273438, -0.4907379150390625, 2.0901260375976562, 1.7993927001953125, 1.2635040283203125, 0.11408042907714844, 2.032390594482422, 0.5014801025390625, -0.9235382080078125, 0.012134552001953125, 1.0268325805664062, 1.3648147583007812, 1.4859771728515625, 0.27733612060546875, 1.7384605407714844, -0.03227996826171875, -0.41709136962890625, -0.8462543487548828, -0.02387237548828125, -1.6333847045898438, -0.6209144592285156, 1.3023757934570312, 0.047840118408203125, 0.5943031311035156, 3.0575332641601562, 0.22466278076171875, 2.735452651977539, 1.1543140411376953, 1.7192230224609375, -0.2959136962890625, 0.7331695556640625, -0.4391365051269531, -0.030710220336914062, 0.4609222412109375, -4.179443359375, 1.1736412048339844, 1.8957176208496094, 0.27291107177734375, 2.2987403869628906, -0.9003028869628906, 1.316162109375, 0.9765129089355469, 0.25257301330566406, 2.5796680450439453, 0.18297195434570312, 1.372314453125, -1.20697021484375, 0.5821304321289062, 0.5179405212402344, 0.49102783203125, 2.1283493041992188, 0.60028076171875, 2.0008010864257812, -3.638519287109375, 3.816741943359375, 1.1936912536621094, -0.16391754150390625, 0.7275657653808594, 0.6883544921875, 0.8923721313476562, 3.491455078125, 0.1091766357421875, 1.1998291015625, -1.083251953125, -1.183065414428711, 0.8376388549804688, -2.6764068603515625, -0.49152374267578125, -1.889404296875, 0.9543647766113281, -1.2778511047363281, 2.3051910400390625, -1.0838775634765625, 2.3636932373046875, -1.15216064453125, 1.071695327758789, -0.9387969970703125, 0.38532257080078125, -0.626068115234375, 0.9807205200195312, -0.6484298706054688, 0.33791351318359375, 1.2518539428710938, 3.460174560546875, 3.7245941162109375, -1.8152618408203125, 0.4622802734375, -0.6980247497558594, 0.5342864990234375, 1.3177337646484375, 1.2758941650390625, -1.7192192077636719, -0.5869026184082031, -1.2910041809082031, -2.7276687622070312, 0.3446998596191406, 1.3033981323242188, 1.1311798095703125, -0.7965011596679688, 0.6563949584960938, 1.6776123046875, -1.8199005126953125, 1.2410335540771484, 0.8900203704833984, 0.2331390380859375, 2.3545455932617188, -3.0949783325195312, 0.5363616943359375, 1.557037353515625, -0.5534896850585938, -0.1552295684814453, -1.0261077880859375, -2.5588912963867188, -0.37718963623046875, -0.6107711791992188, -0.2797050476074219, -0.02951812744140625, 1.3164634704589844, -0.2612342834472656, 6.56634521484375, -1.0946006774902344, -1.169525146484375, 0.7352828979492188, 1.2915725708007812, 2.601287841796875, 0.24684715270996094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000051.npy"}
{"epoch": 0.15454545454545454, "step": 52, "batch_size": 128, "mean": 0.6274353265762329, "std": 1.8086669445037842, "min": -4.875358581542969, "p10": -1.364111328125, "median": 0.693023681640625, "p90": 2.5824390411376945, "max": 7.337188720703125, "pos_frac": 0.671875, "sample": [-1.717071533203125, -1.0035209655761719, 1.1194915771484375, -2.3973846435546875, 0.21509170532226562, 1.2271080017089844, 0.4312744140625, -0.7137794494628906, 0.4956817626953125, -1.6244697570800781, -0.576385498046875, 2.4221572875976562, 1.0712814331054688, 1.60205078125, -0.2969932556152344, 0.5813522338867188, 4.396820068359375, -0.6131095886230469, 1.6247215270996094, 1.213165283203125, -2.8950881958007812, 0.64471435546875, -0.9444427490234375, 0.15073776245117188, 2.9257965087890625, -0.01992034912109375, 3.068328857421875, 1.7060928344726562, -1.9140472412109375, 0.8302650451660156, -1.424652099609375, -0.7915725708007812, 1.0668411254882812, 2.9737815856933594, -0.6279525756835938, 0.8400955200195312, -1.1151809692382812, 0.7413330078125, -0.4074554443359375, 2.162647247314453, 0.25991058349609375, 1.0693511962890625, -0.2614479064941406, 3.373016357421875, -0.8949966430664062, 1.4833126068115234, 0.5263824462890625, 2.2183914184570312, 1.7691726684570312, 0.8201332092285156, 0.43357276916503906, -0.9873046875, 0.7660446166992188, 0.44598388671875, 2.762584686279297, -1.0096206665039062, -0.55328369140625, -0.7391471862792969, -0.5452022552490234, 1.1224365234375, 0.9101524353027344, 1.1259536743164062, 2.087432861328125, 0.9803466796875, 4.98992919921875, 1.0720977783203125, 1.4127120971679688, -0.5794296264648438, -0.43321990966796875, 1.1950607299804688, -0.6167564392089844, 0.9057464599609375, 0.776123046875, 0.5373001098632812, 1.2379379272460938, 2.8386764526367188, -3.8046875, -2.752349853515625, 1.5814208984375, 0.90960693359375, 0.09171485900878906, 0.7440395355224609, 1.8538398742675781, 0.6263370513916016, 0.41327667236328125, -0.8224601745605469, 1.1657562255859375, 1.4238471984863281, -3.3083419799804688, -0.6665496826171875, 0.5694122314453125, 1.1975460052490234, 2.203094482421875, 1.1356430053710938, 0.20361709594726562, -1.4936904907226562, 2.0001754760742188, -1.0308837890625, -0.008052825927734375, -4.875358581542969, 7.337188720703125, 2.11187744140625, 0.01318359375, -1.0337295532226562, 2.169677734375, -0.46460914611816406, 0.13618087768554688, -2.0558853149414062, 1.3371353149414062, 0.2085437774658203, 0.2585620880126953, 1.0371513366699219, 0.49303436279296875, -1.4929981231689453, 5.82391357421875, 0.75433349609375, 1.8677253723144531, 4.61419677734375, 1.5381622314453125, 0.7560195922851562, 0.7516441345214844, -0.713470458984375, 0.40302085876464844, 3.6728515625, -1.338165283203125, 1.86688232421875, 2.5052337646484375, 5.469947814941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000052.npy"}
{"epoch": 0.15757575757575756, "step": 53, "batch_size": 128, "mean": 0.5087804198265076, "std": 1.9526575803756714, "min": -6.938140869140625, "p10": -1.6503231048583984, "median": 0.3281211853027344, "p90": 2.7065055847167963, "max": 7.0133209228515625, "pos_frac": 0.6171875, "sample": [0.05982780456542969, 1.4241561889648438, 0.33953857421875, 1.5522613525390625, 0.40006256103515625, 2.308816909790039, -2.0864715576171875, 1.411529541015625, -0.24529457092285156, -2.2601852416992188, -0.2703132629394531, 0.5068016052246094, 1.627532958984375, -0.5813331604003906, 0.8754234313964844, -1.99114990234375, -0.947418212890625, 1.220977783203125, -3.5843582153320312, 5.4431304931640625, 0.5830669403076172, 0.5310211181640625, 0.255096435546875, -0.07355308532714844, 0.2554931640625, -0.3886566162109375, -0.63580322265625, 0.29180145263671875, 0.1603546142578125, 3.107635498046875, 6.8897857666015625, -0.46370697021484375, -1.7424240112304688, 1.3812446594238281, 1.3997039794921875, 1.7766551971435547, -0.2782478332519531, 2.9694061279296875, -0.0759124755859375, 2.2224044799804688, 1.6383514404296875, 1.4791259765625, -0.7391281127929688, -4.0875091552734375, 0.6171417236328125, -3.22314453125, -1.2432937622070312, 0.3646697998046875, -0.22405242919921875, -0.744354248046875, -0.9430465698242188, 1.1265220642089844, -4.4575042724609375, 2.04705810546875, 3.930908203125, -0.8062057495117188, 2.352924346923828, 2.0366592407226562, -0.07433319091796875, 1.268136978149414, 0.500762939453125, 3.5511245727539062, -0.1910247802734375, 2.84967041015625, -1.8841171264648438, 2.4972915649414062, 1.21746826171875, 1.9103889465332031, -1.3452682495117188, -0.34838104248046875, 3.373464584350586, 1.291311264038086, 0.5364189147949219, 2.6451492309570312, 0.41950225830078125, -0.472900390625, -0.4030723571777344, 0.20923233032226562, -3.063323974609375, -0.413604736328125, -1.1787338256835938, -0.47589111328125, 2.2780685424804688, 2.0634727478027344, 0.18429183959960938, -0.18488311767578125, 3.0043106079101562, 7.0133209228515625, -6.938140869140625, 1.1665840148925781, -0.66888427734375, 0.23452377319335938, 0.31670379638671875, 1.5254478454589844, 1.1323928833007812, 2.9620132446289062, -0.0808868408203125, 0.10245132446289062, 0.811767578125, 2.4038925170898438, 1.9739456176757812, -0.03925323486328125, 0.7398853302001953, -1.6108512878417969, -0.5091400146484375, -0.3455085754394531, 0.5739402770996094, -0.2280731201171875, 0.17835617065429688, 4.960540771484375, -0.8590431213378906, -0.7916088104248047, 1.854736328125, 1.8219757080078125, 1.7309284210205078, 1.1159210205078125, -2.0239334106445312, 0.9774703979492188, 1.2749309539794922, 0.42133331298828125, 0.2213592529296875, 0.23145675659179688, -0.5230693817138672, 3.2591400146484375, -2.3500823974609375, 0.042255401611328125, 0.15734100341796875, 0.6291999816894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000053.npy"}
{"epoch": 0.1606060606060606, "step": 54, "batch_size": 128, "mean": 0.45940831303596497, "std": 2.0675790309906006, "min": -6.1021728515625, "p10": -1.9347843170166015, "median": 0.4635887145996094, "p90": 2.8512565612792966, "max": 6.98828125, "pos_frac": 0.609375, "sample": [0.6633987426757812, -1.25555419921875, 1.123931884765625, -1.3090705871582031, 1.8734207153320312, 1.1238327026367188, -6.1021728515625, 1.332183837890625, -0.8672027587890625, 0.9061813354492188, 2.1770496368408203, 0.7596282958984375, -0.42757415771484375, 2.4145660400390625, 3.2490921020507812, 1.0572834014892578, 0.4678802490234375, 2.8319320678710938, 0.36156463623046875, 0.82281494140625, 2.431974411010742, 0.6050643920898438, -0.0329437255859375, 3.63385009765625, 0.9936351776123047, -4.344696044921875, 3.3812942504882812, 0.8856964111328125, -1.2947864532470703, -1.6229114532470703, -0.17014694213867188, 0.367950439453125, -0.4486579895019531, 0.9137802124023438, -3.9877166748046875, 0.4776191711425781, 0.35223388671875, 0.121734619140625, 1.7047538757324219, -1.6251850128173828, 1.32611083984375, -3.76409912109375, 1.123495101928711, -1.5340003967285156, -0.8974761962890625, 0.14031982421875, 2.4897079467773438, -2.3360862731933594, 5.9756011962890625, 2.8963470458984375, -1.935791015625, -0.9081344604492188, 1.3055477142333984, 0.3675689697265625, 0.8192710876464844, 2.4758758544921875, 0.4511260986328125, 4.9129486083984375, 1.6612319946289062, -0.493743896484375, -0.90167236328125, -3.772918701171875, -0.6967391967773438, -2.0004119873046875, 0.6735153198242188, 1.9911117553710938, 3.75848388671875, 1.6237564086914062, -0.21416854858398438, 2.1580581665039062, 1.72802734375, 1.5564346313476562, -1.3111114501953125, 0.2254619598388672, 0.6399726867675781, -0.030759811401367188, 3.07281494140625, -0.5321426391601562, -0.411376953125, -0.7076416015625, 2.0735855102539062, 0.2523345947265625, 2.2797393798828125, 1.0173606872558594, 0.3700981140136719, -0.4617576599121094, -1.9343528747558594, 0.584716796875, 0.7417221069335938, -1.1989212036132812, -1.7940673828125, 1.1722640991210938, -2.174285888671875, 0.45929718017578125, 4.019416809082031, -0.048919677734375, 2.3937835693359375, -2.0093460083007812, 2.3982391357421875, 2.4877395629882812, -0.022064208984375, -0.4317951202392578, 2.6844635009765625, 1.0247230529785156, 0.08643341064453125, -0.9224166870117188, 1.3883781433105469, -3.348236083984375, 3.8070068359375, -0.0058441162109375, -5.6024627685546875, -1.0719852447509766, -0.5282211303710938, 3.4505538940429688, 0.8168106079101562, -0.3606376647949219, -2.3548126220703125, 1.1409454345703125, -0.04398345947265625, 0.7402534484863281, 0.17325210571289062, 6.98828125, 0.6820755004882812, -0.8076095581054688, 0.24347686767578125, 1.3362274169921875, -0.5031585693359375, 5.145721435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000054.npy"}
{"epoch": 0.16363636363636364, "step": 55, "batch_size": 128, "mean": 0.6902722716331482, "std": 2.2904021739959717, "min": -7.8722076416015625, "p10": -1.3870391845703123, "median": 0.49947357177734375, "p90": 3.3373519897460935, "max": 8.980987548828125, "pos_frac": 0.65625, "sample": [-0.28336334228515625, 5.9315948486328125, 3.938323974609375, 0.6478118896484375, 2.20440673828125, 2.585216522216797, 2.327472686767578, -2.063079833984375, 1.5333538055419922, 0.1848468780517578, -1.0760459899902344, -0.07474517822265625, 0.2902488708496094, 2.3099727630615234, -1.1936492919921875, 0.27666759490966797, 0.19736480712890625, -0.39345550537109375, -0.7298622131347656, 1.2972049713134766, -0.2712059020996094, 3.456756591796875, 0.02605438232421875, 1.48736572265625, -2.862762451171875, -1.079864501953125, 0.7860908508300781, 1.0998458862304688, 1.9158782958984375, 2.493927001953125, 3.8608322143554688, 8.637092590332031, 1.0441741943359375, 1.8427963256835938, -0.1468677520751953, 0.71942138671875, -0.0801544189453125, 0.4131011962890625, -0.6802215576171875, 0.298797607421875, 0.0409698486328125, 8.980987548828125, 0.8921318054199219, -0.246002197265625, 1.9840068817138672, 6.581329345703125, 1.5162639617919922, -1.25115966796875, 0.033588409423828125, 2.8107070922851562, 1.2029380798339844, 2.0565032958984375, -0.18499755859375, -0.7371864318847656, -0.6680831909179688, 0.5955486297607422, 4.935529708862305, 0.7617950439453125, 3.2861785888671875, 2.169830322265625, -1.1810073852539062, 0.12042427062988281, -1.50677490234375, 2.0588531494140625, 0.40541648864746094, -0.3250389099121094, 1.8153076171875, 0.8596954345703125, -0.5450305938720703, 4.160377502441406, 1.82733154296875, -2.4330825805664062, 0.8417587280273438, 3.8082351684570312, -0.10682296752929688, -1.2475109100341797, 1.40338134765625, -0.9506683349609375, 1.8592414855957031, 0.9155426025390625, 0.573883056640625, 0.6169166564941406, -2.9960403442382812, 0.387847900390625, -1.335723876953125, 3.2741546630859375, -0.7316131591796875, 0.9267997741699219, -2.637115478515625, -7.4777069091796875, 0.534881591796875, 0.4640655517578125, 0.811370849609375, 0.4434661865234375, 1.1356735229492188, 3.926971435546875, -2.5279541015625, -0.9069061279296875, 1.5448265075683594, 1.828237533569336, 0.7134971618652344, 2.8639068603515625, 0.32546234130859375, -0.9112777709960938, 0.037906646728515625, 0.27988433837890625, 0.09354591369628906, 3.1527328491210938, 0.31988525390625, -2.1553726196289062, -2.7851791381835938, 0.44268798828125, 3.6372451782226562, 1.3156661987304688, -0.818450927734375, -0.40203094482421875, -0.38498687744140625, 0.9708328247070312, 2.963603973388672, -2.9101181030273438, 3.7102813720703125, 1.2163047790527344, 1.0001678466796875, -0.294281005859375, -1.5253524780273438, 0.6732559204101562, -0.570648193359375, -7.8722076416015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000055.npy"}
{"epoch": 0.16666666666666666, "step": 56, "batch_size": 128, "mean": 0.6960806846618652, "std": 2.1661880016326904, "min": -4.541496276855469, "p10": -1.5718040466308594, "median": 0.6047945022583008, "p90": 3.317914581298828, "max": 6.186248779296875, "pos_frac": 0.6171875, "sample": [-0.07552337646484375, -0.1393890380859375, -0.7041778564453125, 4.0189971923828125, 0.583984375, 3.32330322265625, -0.206268310546875, 2.2356948852539062, -0.5260295867919922, 0.5501308441162109, -0.8960189819335938, -0.35959434509277344, 6.161308288574219, 2.3995361328125, 5.4335174560546875, 5.252246856689453, -3.59881591796875, -3.5243301391601562, 0.7682380676269531, -1.7925872802734375, -0.4911956787109375, 2.1484222412109375, -1.0942840576171875, 0.8095626831054688, -1.5452041625976562, 0.4023113250732422, 2.2266006469726562, 0.796905517578125, 1.7940540313720703, -1.0403690338134766, -1.6463546752929688, 0.44266510009765625, 0.30635833740234375, 0.6245651245117188, 1.3428077697753906, -1.216522216796875, -1.6442604064941406, 0.472076416015625, -4.541496276855469, -0.7535400390625, 5.061065673828125, 1.1702842712402344, 1.3291015625, 1.6694259643554688, 0.6169548034667969, -4.08673095703125, -0.4169197082519531, -1.316183090209961, 2.6548080444335938, 3.081512451171875, 3.3156051635742188, -0.2460346221923828, -1.5734329223632812, -0.3427448272705078, 0.5822219848632812, 2.9751205444335938, -1.5270538330078125, 2.96612548828125, -0.83447265625, 0.23662567138671875, 6.186248779296875, -1.57110595703125, 1.4180755615234375, -0.42205810546875, 2.696807861328125, 2.3810577392578125, 4.0722503662109375, 2.5171432495117188, 0.3645668029785156, 0.30823516845703125, 5.328330993652344, 0.9590568542480469, -0.8502044677734375, 1.5467987060546875, -0.8188400268554688, 0.48734283447265625, 1.0955772399902344, -0.2805023193359375, 1.5328254699707031, 2.962432861328125, -0.7537689208984375, 0.6000900268554688, 1.9860649108886719, -1.3442230224609375, 0.6094989776611328, -3.3922271728515625, 1.5260200500488281, 1.394317626953125, -0.12833023071289062, 1.1963539123535156, -1.2234153747558594, 0.046543121337890625, -1.2759552001953125, 0.7663726806640625, 2.3339385986328125, -1.01422119140625, 1.6865005493164062, 0.9312095642089844, 0.598388671875, 2.907012939453125, -3.0920562744140625, 0.5182838439941406, 1.035970687866211, -3.40509033203125, 1.0084457397460938, 1.551666259765625, 4.040046691894531, 0.7459182739257812, 4.974395751953125, 1.161264419555664, 2.356109619140625, -0.9623947143554688, -1.066781997680664, 0.671905517578125, 1.2750167846679688, 0.961151123046875, 1.0156326293945312, 4.2204742431640625, 1.7313919067382812, 3.0216827392578125, 1.3297977447509766, -0.9682540893554688, -2.5675277709960938, -0.121734619140625, -0.8948898315429688, -4.296539306640625, -0.07934188842773438, 5.95697021484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000056.npy"}
{"epoch": 0.1696969696969697, "step": 57, "batch_size": 128, "mean": 0.4161216616630554, "std": 2.3148062229156494, "min": -6.526611328125, "p10": -2.647469329833984, "median": 0.3656339645385742, "p90": 3.2960144042968738, "max": 8.479644775390625, "pos_frac": 0.6015625, "sample": [-1.8207244873046875, -3.4853286743164062, -2.80975341796875, 1.116109848022461, -0.1893310546875, 1.8676071166992188, 1.128335952758789, -4.133460998535156, 0.20958518981933594, 2.5314903259277344, 1.5525588989257812, -1.8784236907958984, -3.0742034912109375, 1.6770782470703125, 4.414588928222656, -2.154022216796875, 0.7399539947509766, 0.38350677490234375, 0.18131256103515625, 2.223745346069336, -6.526611328125, 1.6944007873535156, 1.586944580078125, -0.472503662109375, 0.5889167785644531, 0.96832275390625, 1.0638275146484375, -3.9001083374023438, 0.6477413177490234, 4.2832183837890625, 0.2297821044921875, -0.7679443359375, -2.6852645874023438, 0.7509384155273438, 0.53448486328125, -0.8291893005371094, -2.7952804565429688, -0.3345184326171875, -1.6431503295898438, -1.1583251953125, 0.32138633728027344, 0.158660888671875, 1.3363571166992188, 1.822122573852539, 3.1781654357910156, -0.6220073699951172, 1.2041664123535156, 8.479644775390625, 0.5556449890136719, 2.2731075286865234, -1.1813507080078125, 1.306976318359375, -1.5477447509765625, 4.9480743408203125, 0.24412155151367188, 0.10633087158203125, 1.2432174682617188, -0.2638530731201172, 1.5658187866210938, -0.1479644775390625, -0.49993896484375, -0.3490142822265625, -1.7519340515136719, 1.6616249084472656, -3.9992752075195312, -3.2169189453125, 0.6196784973144531, 3.6121139526367188, 1.2225170135498047, 1.3762588500976562, 1.7892036437988281, 5.3937835693359375, 0.9160690307617188, -1.559173583984375, -0.22100067138671875, -0.7176780700683594, 0.7058830261230469, -2.6312713623046875, 4.3602447509765625, -2.186878204345703, 3.740081787109375, 0.25579833984375, -0.7299652099609375, 1.1976470947265625, 2.9430084228515625, 0.05905914306640625, 0.6132068634033203, -2.120391845703125, -0.6116485595703125, -3.062755584716797, 0.37851715087890625, -4.3784027099609375, 1.2686710357666016, 3.11602783203125, 1.1105308532714844, 2.443511962890625, 1.3147048950195312, -1.3186016082763672, -2.3604259490966797, -0.4065132141113281, 1.47760009765625, -0.8663330078125, -0.14625930786132812, 1.5159988403320312, 0.18115997314453125, 3.6867218017578125, 3.0775890350341797, 0.9598350524902344, 0.7038650512695312, 2.080974578857422, -1.5368633270263672, 0.3527507781982422, 6.77239990234375, 3.570995330810547, 4.1268157958984375, 0.04355049133300781, 2.92999267578125, 0.8145503997802734, 5.5175628662109375, -0.12741851806640625, 0.2940082550048828, -0.6880569458007812, -2.9198837280273438, -2.429412841796875, 1.5409622192382812, -0.25431060791015625, -0.02651214599609375, -0.06224632263183594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000057.npy"}
{"epoch": 0.17272727272727273, "step": 58, "batch_size": 128, "mean": 1.0063899755477905, "std": 2.2546346187591553, "min": -4.6082916259765625, "p10": -1.745657730102539, "median": 0.7123050689697266, "p90": 3.3331146240234366, "max": 7.440582275390625, "pos_frac": 0.6796875, "sample": [-0.893768310546875, -1.7379112243652344, 2.000762939453125, -0.8973541259765625, 7.3292083740234375, -0.3424072265625, 2.3329238891601562, 1.3292579650878906, 0.4242515563964844, 1.61224365234375, 1.5847320556640625, 2.0812225341796875, 1.1899948120117188, 2.695953369140625, 1.2558517456054688, -0.36486053466796875, 0.4433708190917969, 3.6989898681640625, 0.28249359130859375, 0.9181976318359375, 0.7046279907226562, 2.2064990997314453, -2.5706634521484375, 0.6814384460449219, -0.849639892578125, 0.684661865234375, 0.1297588348388672, -0.5358257293701172, 1.5761833190917969, -0.7313728332519531, -0.11783599853515625, -0.19890785217285156, -1.76373291015625, 7.1559906005859375, 0.8623580932617188, -0.6409378051757812, 2.8422012329101562, -3.8419952392578125, -2.5459976196289062, -1.95135498046875, -1.7149944305419922, 0.4263172149658203, 2.4828567504882812, 0.4259300231933594, -4.6082916259765625, 2.656280517578125, 0.070404052734375, -0.1814746856689453, 0.9539108276367188, -1.9415969848632812, 1.6783523559570312, 1.6611328125, 5.289741516113281, 0.3972492218017578, -0.5504608154296875, 0.5075569152832031, 0.672149658203125, 0.023593902587890625, 2.1960601806640625, -0.21582794189453125, 3.0208282470703125, -0.10546112060546875, 1.0530662536621094, 5.6745452880859375, 4.375297546386719, -0.6663436889648438, -0.09617996215820312, 2.934600830078125, 2.3662567138671875, 3.238311767578125, 2.3387908935546875, 1.1660633087158203, 0.5800304412841797, 0.03145599365234375, 0.5090045928955078, 3.1004791259765625, -2.442718505859375, 0.16944122314453125, 1.4963531494140625, 2.060394287109375, -1.8317832946777344, 0.7199821472167969, 3.5543212890625, -1.08978271484375, 1.8921737670898438, 3.1461334228515625, 0.24466705322265625, -0.9836654663085938, 0.162200927734375, 0.07450103759765625, 0.8031806945800781, 2.697174072265625, 1.3001174926757812, -3.5456466674804688, -0.21570205688476562, 2.452239990234375, 1.3175697326660156, 2.985687255859375, 2.3790969848632812, -1.146738052368164, -4.3067779541015625, 5.487922668457031, -0.06866645812988281, -0.7178936004638672, -0.3980064392089844, 1.8880043029785156, 2.0639190673828125, 1.6879653930664062, 6.247161865234375, 0.39630126953125, 3.0866317749023438, -2.093585968017578, 2.1736793518066406, 3.8217391967773438, 0.7974452972412109, -0.35363006591796875, 6.4346923828125, 1.6127090454101562, 5.8856048583984375, -1.9452056884765625, -0.7988815307617188, 1.8690719604492188, 2.1917591094970703, 7.440582275390625, 2.2077789306640625, 0.08047294616699219, 2.8972625732421875, -0.7575855255126953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000058.npy"}
{"epoch": 0.17575757575757575, "step": 59, "batch_size": 128, "mean": 0.7120788097381592, "std": 2.00346040725708, "min": -5.2532501220703125, "p10": -1.4713012695312497, "median": 0.6299209594726562, "p90": 3.3467010498046874, "max": 6.9926910400390625, "pos_frac": 0.6328125, "sample": [0.31984710693359375, 1.7636947631835938, 2.1362533569335938, 0.00054168701171875, 4.1398162841796875, 0.7168941497802734, 3.0746116638183594, 0.7105998992919922, -1.3460235595703125, 1.1451263427734375, 0.82208251953125, 2.2263107299804688, 1.9832534790039062, 2.1771888732910156, 1.3462867736816406, -0.7374649047851562, 3.0195579528808594, 0.41213035583496094, 4.55682373046875, 0.210601806640625, 2.0911407470703125, 3.93646240234375, 0.49146270751953125, 1.6622467041015625, -0.305877685546875, 2.8429107666015625, 1.9464569091796875, -1.0536651611328125, 3.4092483520507812, -0.7273178100585938, 6.9926910400390625, -1.7447586059570312, -3.0465087890625, 1.5363922119140625, 0.6062278747558594, -0.2738151550292969, 1.282958984375, -2.5121994018554688, -0.7156829833984375, -0.18100738525390625, -0.6386756896972656, -3.644550323486328, -0.7167205810546875, -1.20135498046875, 3.4530105590820312, 3.3334999084472656, -0.2069549560546875, 3.3344879150390625, 1.725921630859375, -0.95703125, 1.3840789794921875, 1.56011962890625, -1.1958503723144531, -4.222686767578125, 0.5179061889648438, 0.8428115844726562, -1.6557426452636719, 1.3474006652832031, -5.2532501220703125, 0.8784942626953125, 1.2731285095214844, 0.18975067138671875, 0.5488185882568359, -0.4449138641357422, -0.3267059326171875, 1.0015792846679688, 1.2271881103515625, -0.56170654296875, -3.664224624633789, -3.107086181640625, 1.3112411499023438, 1.6464061737060547, -0.171875, 1.7154769897460938, -0.5225296020507812, -1.3627815246582031, -2.0002288818359375, 3.3751983642578125, 2.0499744415283203, 0.408966064453125, 3.2051620483398438, -1.3109817504882812, 1.3438720703125, -1.268280029296875, 0.4941558837890625, 2.9100875854492188, 4.4237518310546875, -1.1590499877929688, 1.7027740478515625, 4.149505615234375, 1.65985107421875, -1.4213752746582031, 1.207183837890625, 5.8517303466796875, 0.537628173828125, 0.994384765625, 2.0041866302490234, -0.016468048095703125, 2.407611846923828, -0.7113494873046875, 0.6536140441894531, 2.686351776123047, 0.41303062438964844, -1.4212417602539062, 0.01842498779296875, -0.5409393310546875, -2.2726974487304688, -0.3780975341796875, 1.2018890380859375, -0.09281158447265625, 1.7008247375488281, 1.1827659606933594, -0.49976348876953125, 3.70367431640625, -0.36092376708984375, 1.4580078125, -1.5877952575683594, 2.7211685180664062, -0.45418548583984375, 3.5509033203125, 0.3581390380859375, 1.6099777221679688, 1.8680477142333984, 4.101360321044922, 0.5747718811035156, -1.6273231506347656, 0.19376373291015625, -0.8053131103515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000059.npy"}
{"epoch": 0.1787878787878788, "step": 60, "batch_size": 128, "mean": 0.8127189874649048, "std": 2.240835428237915, "min": -6.6298980712890625, "p10": -1.851050567626953, "median": 0.5748481750488281, "p90": 3.528167724609375, "max": 8.092697143554688, "pos_frac": 0.625, "sample": [-3.292573928833008, 1.36090087890625, -1.3540210723876953, -1.0188789367675781, -0.49623870849609375, -1.0423698425292969, 0.5602951049804688, 0.3651580810546875, 0.2498779296875, -0.7080268859863281, 0.6807003021240234, 1.903045654296875, -0.08099365234375, 2.3729782104492188, -2.0656356811523438, 1.9442386627197266, 2.205324172973633, 1.7379283905029297, 2.11981201171875, 7.08123779296875, 1.4319515228271484, -2.0466156005859375, 0.04890251159667969, 2.6120147705078125, 3.5177764892578125, 1.44232177734375, -3.5599822998046875, -3.7332305908203125, 0.3495635986328125, -0.9109668731689453, 0.3017311096191406, -0.18968963623046875, -0.11649703979492188, 0.15209197998046875, -2.0730934143066406, -0.360504150390625, 0.62579345703125, 5.648414611816406, 3.4879512786865234, -6.6298980712890625, 3.4137496948242188, 2.8088226318359375, -3.1926193237304688, -0.248321533203125, 4.3578033447265625, 8.092697143554688, 2.1694412231445312, -0.47899627685546875, 2.9979591369628906, 0.17462158203125, -0.45331764221191406, 0.609954833984375, -1.4442214965820312, -3.5641212463378906, 4.2708740234375, 1.6000137329101562, 1.521209716796875, 4.1301422119140625, 2.446990966796875, -0.15082550048828125, 1.315896987915039, 2.32568359375, -0.34384918212890625, 0.46787261962890625, 3.75799560546875, -0.17246055603027344, -0.4291114807128906, 3.09613037109375, 1.4147491455078125, -0.20259857177734375, 3.9765968322753906, 0.37786102294921875, 2.588390350341797, -1.1277961730957031, 2.3621826171875, 2.4519729614257812, 2.682647705078125, -0.7446670532226562, 2.2804908752441406, 0.9924392700195312, 4.27288818359375, -0.8702659606933594, 2.9703826904296875, -0.10863113403320312, 0.8593788146972656, 0.1353473663330078, 1.693145751953125, -3.6120223999023438, 1.2111282348632812, -0.39908599853515625, -0.9342670440673828, 0.2277202606201172, 1.5956764221191406, 0.36202239990234375, -0.11084938049316406, -0.02837371826171875, 0.007854461669921875, -1.5461349487304688, 3.1548309326171875, 1.3975410461425781, 1.9309272766113281, 2.5648422241210938, 0.7695083618164062, 4.7501068115234375, -0.461761474609375, 0.5419540405273438, 2.428009033203125, -1.9296798706054688, 2.0162734985351562, 3.5524139404296875, 5.394813537597656, -1.817352294921875, 0.37982177734375, 1.6278419494628906, -0.54229736328125, 4.1703643798828125, -0.4914054870605469, -1.5381927490234375, -2.381040573120117, 0.5894012451171875, 1.610321044921875, -1.3602828979492188, -1.2763118743896484, -2.1948623657226562, 1.6160202026367188, 1.7494544982910156, 1.9519824981689453, 1.44580078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000060.npy"}
{"epoch": 0.18181818181818182, "step": 61, "batch_size": 128, "mean": 1.0470447540283203, "std": 2.858016014099121, "min": -4.63494873046875, "p10": -2.0736976623535157, "median": 0.7266721725463867, "p90": 4.278765106201171, "max": 10.576019287109375, "pos_frac": 0.6484375, "sample": [0.6042327880859375, 6.06719970703125, 0.32801055908203125, -0.39985084533691406, 2.1426734924316406, 7.134002685546875, 4.864795684814453, -1.843475341796875, 1.2967033386230469, 2.795562744140625, 2.7045364379882812, -2.0841064453125, 8.873992919921875, -1.0857620239257812, 3.1437225341796875, 0.4851531982421875, -0.6160964965820312, 1.3895797729492188, 1.9128341674804688, 1.049652099609375, -4.28082275390625, 2.1411285400390625, 2.228759765625, 0.09186935424804688, -3.2989349365234375, 0.5261936187744141, 2.599710464477539, 1.2069129943847656, 1.3644485473632812, 1.7287788391113281, -0.91253662109375, 6.745147705078125, 0.3616905212402344, 4.431510925292969, -0.32889556884765625, -1.5291366577148438, 4.2133026123046875, 3.1297683715820312, -2.060760498046875, -0.28272438049316406, 3.3474884033203125, 1.1822357177734375, 0.04360198974609375, -1.0411872863769531, -0.5037460327148438, 0.8294601440429688, -0.9112796783447266, 5.0126953125, -0.7384262084960938, -0.9652824401855469, 0.5295562744140625, 2.1694793701171875, 0.6870803833007812, -1.7614822387695312, -2.556427001953125, 8.664604187011719, -0.18900299072265625, -0.388397216796875, 0.4867839813232422, 1.2719001770019531, 0.44518280029296875, 1.5756111145019531, 3.2760009765625, -2.021940231323242, 8.757003784179688, -2.0935935974121094, 1.20965576171875, 2.007068634033203, 3.8554840087890625, -1.5878143310546875, 9.129913330078125, 5.050224304199219, -2.314483642578125, 1.2389984130859375, -1.9547882080078125, 2.9580860137939453, 0.6861534118652344, 2.3012657165527344, 1.9512672424316406, 1.9534168243408203, -0.8741912841796875, -1.387847900390625, -2.0780715942382812, 1.0836601257324219, 1.1099395751953125, 1.7706985473632812, 0.5681781768798828, 0.21967315673828125, -0.4158477783203125, 0.7884902954101562, -0.3709831237792969, -1.4872322082519531, 0.11053657531738281, -0.20087432861328125, -2.0718231201171875, 0.7662639617919922, -3.224191665649414, 4.094507217407227, 2.028858184814453, -0.717193603515625, 0.06952095031738281, 2.2882537841796875, 1.4334125518798828, 10.576019287109375, 1.2122993469238281, 4.1683807373046875, -2.1944236755371094, -3.0485305786132812, 2.1111297607421875, 2.3416099548339844, -4.63494873046875, 1.5843887329101562, -1.5887603759765625, -2.3059921264648438, -1.9484405517578125, 3.691925048828125, -4.363899230957031, 1.2535438537597656, 0.7773475646972656, 0.55303955078125, 1.3721237182617188, 0.4639854431152344, -1.4174957275390625, 2.681934356689453, -0.9983253479003906, 0.3587608337402344, 1.5892333984375, 9.851943969726562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000061.npy"}
{"epoch": 0.18484848484848485, "step": 62, "batch_size": 128, "mean": 0.9685189723968506, "std": 2.840014934539795, "min": -8.518539428710938, "p10": -2.1679439544677734, "median": 0.7586755752563477, "p90": 4.236691284179687, "max": 14.952789306640625, "pos_frac": 0.6953125, "sample": [1.4325141906738281, 3.2141952514648438, 0.7187423706054688, 14.952789306640625, -1.1526317596435547, -1.453338623046875, 1.0522918701171875, 0.234710693359375, 1.7069320678710938, 1.8200759887695312, 0.4165153503417969, 5.432891845703125, -1.8022613525390625, 0.6312484741210938, -1.2997093200683594, 3.0161209106445312, 0.4029884338378906, 3.8559932708740234, 0.03474235534667969, -2.991466522216797, -2.21197509765625, -2.1850204467773438, -0.4244804382324219, 0.6811141967773438, 1.5638923645019531, 1.3992481231689453, -5.181995391845703, 1.0622444152832031, 0.5274581909179688, 1.7423171997070312, 0.209747314453125, 4.2069091796875, 2.2013587951660156, 0.9118366241455078, 0.45650482177734375, 5.527313232421875, -0.7857627868652344, 0.340087890625, 0.12746429443359375, 2.247943878173828, 1.4951324462890625, 5.09564208984375, 0.7560367584228516, -0.8273239135742188, 3.7132568359375, 0.7062759399414062, 1.530984878540039, 4.554107666015625, -0.5209808349609375, -2.2213478088378906, 0.8252010345458984, -2.2979660034179688, 0.6366729736328125, 3.2177581787109375, -1.0448379516601562, -1.1307640075683594, -0.40421104431152344, 1.9820327758789062, -1.13873291015625, 0.6892890930175781, 0.7379264831542969, 1.678680419921875, -4.313690185546875, -3.584686279296875, -0.5356254577636719, 6.2409210205078125, -0.10774612426757812, -2.160625457763672, 1.4776535034179688, 2.1089859008789062, 5.6396026611328125, 2.608245849609375, 0.695587158203125, 0.7711277008056641, -0.4057655334472656, -0.8459529876708984, -8.518539428710938, 1.0839118957519531, 0.4805450439453125, 3.5257415771484375, -6.174285888671875, 3.2974853515625, 0.7903213500976562, 7.5701751708984375, 4.722877502441406, -0.6151199340820312, -0.25897216796875, 1.93212890625, -0.8123645782470703, 0.6503562927246094, 6.314849853515625, 1.4008522033691406, -0.4120979309082031, 0.869964599609375, 0.3936920166015625, 1.4293594360351562, 7.368133544921875, 4.306182861328125, 3.9155502319335938, 2.1268157958984375, 0.6530246734619141, 2.921966552734375, 0.6361503601074219, 1.629730224609375, 0.7613143920898438, 1.8665580749511719, 0.12500381469726562, 2.9234771728515625, -6.4863739013671875, -2.8218421936035156, 1.7277698516845703, 0.9655303955078125, -0.03620147705078125, 2.0095367431640625, 1.5174064636230469, 1.1816558837890625, 0.4758720397949219, 2.8984298706054688, -0.71795654296875, 5.072196960449219, 0.9783477783203125, -0.9240264892578125, -2.954254150390625, 2.0490798950195312, -0.7376518249511719, 2.6268653869628906, -0.46500396728515625, 2.445831298828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000062.npy"}
{"epoch": 0.18787878787878787, "step": 63, "batch_size": 128, "mean": 0.6536204814910889, "std": 2.6191351413726807, "min": -5.847808837890625, "p10": -2.738001251220703, "median": 0.5962438583374023, "p90": 3.632354354858398, "max": 11.908660888671875, "pos_frac": 0.6171875, "sample": [-3.0198822021484375, 3.5493392944335938, 3.6800804138183594, 0.71759033203125, -1.081787109375, -0.5036430358886719, 2.4539108276367188, 0.4886207580566406, 4.28460693359375, -1.78985595703125, 1.9945487976074219, -0.49981117248535156, 2.1077728271484375, 1.8018531799316406, -1.010467529296875, 1.9631271362304688, 3.1674537658691406, 3.86077880859375, 1.9749336242675781, -1.8584365844726562, 1.5574092864990234, -0.34857940673828125, 5.475303649902344, 11.908660888671875, 1.124490737915039, 0.21791648864746094, 1.7566986083984375, 0.8340644836425781, -0.4497642517089844, 1.9136695861816406, -1.2434463500976562, 4.279914855957031, 5.602783203125, -0.08097267150878906, 10.1597900390625, -3.7972793579101562, 0.5609970092773438, -0.10913848876953125, 1.0286121368408203, 3.7723159790039062, -0.5262069702148438, 0.27203369140625, 0.9861907958984375, -2.887981414794922, 0.9517669677734375, 0.28243255615234375, 1.7345657348632812, 3.2445220947265625, -1.59228515625, 2.0042495727539062, 3.6083450317382812, 0.880828857421875, 1.0399513244628906, -4.092887878417969, -0.59765625, 1.9645748138427734, 0.3074798583984375, -0.8104400634765625, 2.919994354248047, 1.0626754760742188, 1.5178108215332031, -1.1840324401855469, -4.2796630859375, -5.847808837890625, 2.358247756958008, -0.1595611572265625, 1.6378765106201172, 0.9587135314941406, 0.6021327972412109, 0.14984130859375, 0.5866165161132812, 0.2271747589111328, -1.8054161071777344, -1.6028671264648438, -0.0070247650146484375, -1.9918022155761719, 2.1057586669921875, 2.1386260986328125, -0.9255294799804688, 1.5735759735107422, -0.9017581939697266, 1.1992568969726562, -4.415952682495117, 0.5903549194335938, -1.8007526397705078, 2.5674896240234375, 1.190237045288086, -3.05670166015625, -1.7875785827636719, -0.6772994995117188, -0.998992919921875, 2.5451889038085938, 0.8634719848632812, 1.5313224792480469, 0.307861328125, -2.8711471557617188, 1.3482894897460938, -0.7485179901123047, 1.9188175201416016, 1.453216552734375, -4.7122955322265625, -0.5454025268554688, 3.86993408203125, 0.2951831817626953, -1.2697124481201172, 0.2110443115234375, 3.1937789916992188, -3.6809921264648438, -1.9924468994140625, 0.14404678344726562, -0.9299354553222656, -4.54266357421875, 3.7000656127929688, 2.1198043823242188, 1.7792778015136719, 2.5833778381347656, 3.6119003295898438, -2.680938720703125, 3.7879562377929688, 5.0919036865234375, 0.3265228271484375, -0.48548126220703125, -0.9741897583007812, -3.5682525634765625, 2.8224945068359375, 1.9389266967773438, -0.8663311004638672, 2.9300384521484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000063.npy"}
{"epoch": 0.19090909090909092, "step": 64, "batch_size": 128, "mean": 1.19741952419281, "std": 2.853752851486206, "min": -5.4060821533203125, "p10": -1.9298179626464842, "median": 0.9911346435546875, "p90": 4.627633666992187, "max": 11.01361083984375, "pos_frac": 0.6796875, "sample": [-2.1408767700195312, 3.734344482421875, 6.1587677001953125, 0.5756912231445312, -1.77777099609375, 4.2205657958984375, 0.1861724853515625, 2.5946197509765625, 2.2464675903320312, 0.2386932373046875, 2.5809860229492188, 0.21345901489257812, 3.0885772705078125, -1.760101318359375, 0.795257568359375, 1.6489524841308594, 0.05414772033691406, 6.29833984375, -0.07257080078125, 4.993459701538086, 1.7795677185058594, 2.393756866455078, 4.265159606933594, 4.265220642089844, 0.4534912109375, -3.4105224609375, -2.5269012451171875, 3.310821533203125, -0.34389495849609375, 0.010955810546875, 0.5442657470703125, -2.0625152587890625, 0.06157684326171875, 1.4597930908203125, 4.099205017089844, 0.40679931640625, 2.651439666748047, 3.43603515625, -1.8729476928710938, -0.48087310791015625, 1.2067527770996094, -2.9866790771484375, 1.436676025390625, 0.94000244140625, 4.1122589111328125, 1.1213531494140625, 0.17959976196289062, 2.0360679626464844, -3.724987030029297, 3.3237838745117188, 1.6398124694824219, -0.3845062255859375, -1.0840225219726562, 0.8227119445800781, -1.3067703247070312, -0.6687507629394531, -1.6265792846679688, -1.48443603515625, 2.1794815063476562, 1.1596527099609375, 0.9149456024169922, 1.6095237731933594, 3.3696441650390625, -0.28525352478027344, 5.7711181640625, 2.643157958984375, 1.6873550415039062, 1.0336875915527344, 1.0550346374511719, 1.3529701232910156, 1.2873077392578125, 8.657501220703125, -0.06679153442382812, 1.7776870727539062, -4.7796478271484375, 6.791252136230469, 6.9849700927734375, -0.4109210968017578, -0.2897987365722656, 11.01361083984375, 1.5993385314941406, 5.971153259277344, 2.5952529907226562, 2.2093276977539062, -1.3507194519042969, 1.7376461029052734, -5.215522766113281, 2.8973159790039062, -2.9055099487304688, -0.5659427642822266, 1.8851318359375, 0.3764495849609375, 1.849853515625, 0.094696044921875, 0.9485816955566406, -1.2114486694335938, -1.2215385437011719, 8.612396240234375, 1.6089019775390625, -5.4060821533203125, 4.0312957763671875, 0.24673080444335938, 2.7321834564208984, -4.646488189697266, 7.3604278564453125, 1.54693603515625, 0.29064178466796875, 1.7606964111328125, -1.8667221069335938, 0.48810386657714844, 0.2066497802734375, -0.14617538452148438, -0.4257354736328125, 1.1412353515625, -0.2525444030761719, 0.24734878540039062, -3.178314208984375, 4.6190643310546875, -0.5858440399169922, -0.13214111328125, -0.6317214965820312, 3.428638458251953, 7.341705322265625, -1.6403121948242188, -2.347301483154297, 1.8814926147460938, 3.316547393798828, 4.6476287841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000064.npy"}
{"epoch": 0.19393939393939394, "step": 65, "batch_size": 128, "mean": 0.7975848913192749, "std": 3.253969192504883, "min": -9.9725341796875, "p10": -2.9528278350830073, "median": 0.8165969848632812, "p90": 4.50690803527832, "max": 12.999542236328125, "pos_frac": 0.59375, "sample": [-6.139457702636719, -2.3647289276123047, 1.2741661071777344, -3.638458251953125, -0.2932090759277344, 2.6895523071289062, 3.5211334228515625, 0.3629188537597656, 5.760292053222656, -0.4595603942871094, 0.5085678100585938, 1.2903480529785156, -0.553009033203125, -9.9725341796875, -0.44921112060546875, 2.03521728515625, -2.8357696533203125, 1.5400142669677734, 0.5501480102539062, 5.953254699707031, -5.029304504394531, -0.645416259765625, -2.669178009033203, 1.7742233276367188, 2.8056182861328125, 3.3120574951171875, 1.3089675903320312, 3.8228988647460938, 4.728485107421875, -0.14899063110351562, -1.2038764953613281, -2.5275344848632812, 3.5748138427734375, 4.478328704833984, 2.9797744750976562, -0.1861419677734375, 2.0130691528320312, -2.6154251098632812, -1.2648239135742188, 0.64190673828125, -0.5578060150146484, 12.999542236328125, 1.8565673828125, -2.2117538452148438, 0.7754440307617188, 0.360565185546875, 2.3987350463867188, -4.200569152832031, 2.2695465087890625, -2.1354923248291016, -1.86834716796875, -1.018606185913086, 1.0848159790039062, 1.619760513305664, -4.0030059814453125, 5.145105361938477, 0.9797859191894531, -6.470619201660156, 4.791473388671875, 0.8577499389648438, 1.9968509674072266, -4.2467803955078125, -0.44801902770996094, 1.9536514282226562, -1.4282455444335938, 0.9484367370605469, 3.16876220703125, 5.4588623046875, -0.6734085083007812, -4.586151123046875, 0.37042236328125, 3.8969860076904297, 0.4430198669433594, -6.3495941162109375, -3.520843505859375, 4.5735931396484375, -0.8185863494873047, -2.807373046875, -0.21520233154296875, 2.2975234985351562, 3.774547576904297, 1.187002182006836, 1.8060302734375, 4.041648864746094, 4.026954650878906, 1.1201400756835938, 9.125411987304688, 1.5664215087890625, 3.0720062255859375, -0.7002983093261719, -1.93084716796875, -1.1321830749511719, 1.7313690185546875, 0.6792659759521484, -1.405517578125, 3.3778076171875, 2.848468780517578, 0.9994850158691406, -1.4093475341796875, 4.2258453369140625, 4.7189788818359375, 0.6782512664794922, -3.6707916259765625, 3.693817138671875, 0.12277984619140625, -1.6974945068359375, -1.8681182861328125, -1.7430419921875, -0.36560630798339844, -0.07345962524414062, 4.4106903076171875, 3.2313613891601562, -0.8611221313476562, -3.225963592529297, 1.5848312377929688, -1.6903572082519531, 0.4735984802246094, 1.6346054077148438, 3.8262176513671875, 1.1180763244628906, 8.644866943359375, -0.14993667602539062, 4.7718353271484375, 6.732635498046875, 2.9055519104003906, 1.0618324279785156, -0.1217803955078125, 4.328483581542969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000065.npy"}
{"epoch": 0.19696969696969696, "step": 66, "batch_size": 128, "mean": 1.2050213813781738, "std": 2.8948519229888916, "min": -9.71270751953125, "p10": -1.7115333557128904, "median": 1.064743995666504, "p90": 4.845614242553711, "max": 10.33782958984375, "pos_frac": 0.6875, "sample": [1.2108917236328125, 4.271997451782227, -1.6725044250488281, -1.2025260925292969, -0.9863262176513672, 6.528533935546875, 2.1262874603271484, 1.0900840759277344, 4.105926513671875, 1.9759521484375, 3.2639694213867188, -9.71270751953125, -2.022357940673828, -0.4725799560546875, 10.33782958984375, 0.7244415283203125, 1.524444580078125, -0.5385150909423828, 6.450538635253906, 10.153060913085938, 0.180450439453125, 6.509613037109375, 1.249776840209961, 2.8789749145507812, 0.6983871459960938, 4.49176025390625, 1.0994462966918945, -0.20670700073242188, 2.3821372985839844, -3.6933822631835938, -0.5064163208007812, 6.096443176269531, -1.7582473754882812, 7.257713317871094, 0.37224578857421875, 0.9986591339111328, -0.5638160705566406, 0.41943359375, 0.2485198974609375, 1.9959526062011719, -3.693603515625, -0.4244232177734375, -0.7892379760742188, -1.0032196044921875, -1.0612716674804688, -0.6948165893554688, 0.39544677734375, 4.440093994140625, -0.32743263244628906, 5.392799377441406, -0.48293495178222656, 0.9028167724609375, -0.1142425537109375, 0.16553878784179688, 0.24947547912597656, 0.3953857421875, 1.075979232788086, 1.0535087585449219, 6.02587890625, 2.5481185913085938, 1.28033447265625, 1.0918598175048828, 4.262851715087891, 1.8556251525878906, 0.6749725341796875, 0.7344150543212891, 0.38552093505859375, 3.5211029052734375, -1.398712158203125, -0.10495758056640625, 3.2061538696289062, 0.2277374267578125, -3.3162307739257812, 6.8715057373046875, 3.5706787109375, 0.8755416870117188, -4.483482360839844, -3.79718017578125, 4.812747955322266, -1.83795166015625, -3.1750526428222656, 2.199310302734375, 0.89727783203125, 5.6747894287109375, 4.5058746337890625, 1.3592987060546875, 2.4561328887939453, 1.7740478515625, -0.24851226806640625, 1.6764411926269531, 2.4761390686035156, 2.0928573608398438, -0.46756744384765625, 2.5100059509277344, 2.001310348510742, 2.3617706298828125, -1.38983154296875, -1.6915130615234375, 0.8486194610595703, 1.1949520111083984, 0.7325820922851562, -0.039470672607421875, 1.8239517211914062, 0.04444122314453125, -1.3796920776367188, -1.9110183715820312, 2.0236282348632812, 3.3778762817382812, 1.7353324890136719, -0.8868560791015625, 7.105369567871094, 0.27463531494140625, 1.128377914428711, 1.692535400390625, 3.9524993896484375, 1.1895294189453125, 1.4475841522216797, -1.2030792236328125, 2.9050445556640625, 1.4625740051269531, 4.92230224609375, -1.3591842651367188, 0.1100006103515625, 1.8721542358398438, -2.8308181762695312, 1.294952392578125, -5.5640869140625, 3.471435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000066.npy"}
{"epoch": 0.2, "step": 67, "batch_size": 128, "mean": 0.9870754480361938, "std": 3.196611166000366, "min": -8.049835205078125, "p10": -2.5778173446655273, "median": 0.580317497253418, "p90": 5.006455993652343, "max": 11.783248901367188, "pos_frac": 0.625, "sample": [-0.674530029296875, -4.320281982421875, 0.3021049499511719, 1.5390701293945312, -0.12106513977050781, -1.2989501953125, 2.0992050170898438, 5.717632293701172, -1.360931396484375, -1.91412353515625, 3.660064697265625, -0.308563232421875, 0.5921058654785156, 0.8955497741699219, -2.281951904296875, 2.709360122680664, -3.8809967041015625, 2.3866233825683594, -3.442340850830078, -0.5512771606445312, 0.27504730224609375, -4.7007598876953125, 3.332611083984375, 5.599456787109375, -3.362274169921875, 5.572257995605469, -0.5210418701171875, 0.327667236328125, -0.16955184936523438, 3.3733062744140625, 1.4408988952636719, -0.7608375549316406, 0.08886337280273438, 0.7923049926757812, -1.3546142578125, 11.783248901367188, 0.892608642578125, -0.120208740234375, -5.1945648193359375, -2.6207275390625, 1.8498306274414062, -1.1666450500488281, 5.4864959716796875, -1.163726806640625, 4.558708190917969, -0.3001422882080078, -1.2205162048339844, -0.2028789520263672, -5.228759765625, 0.3027191162109375, 0.21087646484375, -2.2837257385253906, -0.0026397705078125, 3.7167205810546875, 2.2845230102539062, 3.8847618103027344, 1.8857955932617188, 4.047695159912109, 3.4156723022460938, -0.16324424743652344, 1.1068801879882812, -2.4013519287109375, -0.6727066040039062, -1.3565292358398438, -4.430488586425781, 3.3859634399414062, -1.7845535278320312, 2.404041290283203, 0.35224151611328125, 0.3164558410644531, 1.080718994140625, 2.2610702514648438, 4.049705505371094, 1.9283294677734375, -3.373394012451172, 4.169292449951172, 3.642078399658203, 1.1948814392089844, 2.0780029296875, -1.5912551879882812, 3.7388763427734375, -0.12313079833984375, 0.10055351257324219, -2.559427261352539, 2.377410888671875, 0.8399467468261719, -2.7282791137695312, 2.7307186126708984, 5.11077880859375, 0.45621490478515625, 1.83905029296875, 0.5888442993164062, -0.5343093872070312, 3.0482177734375, 7.2308502197265625, 1.043670654296875, 4.786155700683594, 7.798919677734375, 4.9617462158203125, 2.947376251220703, -1.2390594482421875, -0.5238151550292969, 0.40665245056152344, 0.5063037872314453, 3.3915367126464844, 2.8316116333007812, -1.1317291259765625, 0.31850433349609375, 0.1653118133544922, 0.3850898742675781, -1.4070205688476562, 0.5717906951904297, 9.801666259765625, 2.0742721557617188, -7.69354248046875, 4.0457763671875, 0.8033294677734375, 1.2182884216308594, 6.3528594970703125, -8.049835205078125, 0.5890426635742188, 0.7357101440429688, -0.15458106994628906, 5.871513366699219, 8.843399047851562, 7.12701416015625, 0.7093162536621094, -0.549224853515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000067.npy"}
{"epoch": 0.20303030303030303, "step": 68, "batch_size": 128, "mean": 0.9517187476158142, "std": 3.2600560188293457, "min": -9.133575439453125, "p10": -2.900426387786865, "median": 0.8287143707275391, "p90": 4.880363464355469, "max": 10.215682983398438, "pos_frac": 0.65625, "sample": [-0.5482330322265625, 0.07175445556640625, -3.2629928588867188, 0.6431140899658203, 1.0333518981933594, 1.0306835174560547, -4.286994934082031, -1.6357269287109375, 3.5583038330078125, -1.18585205078125, -0.6289501190185547, 1.6548309326171875, 1.230794906616211, 2.885650634765625, -1.7673912048339844, 3.0955047607421875, 5.3336944580078125, 1.318765640258789, 3.155364990234375, -0.4002265930175781, 0.5711135864257812, -0.1737194061279297, 0.07074737548828125, -0.6063747406005859, 1.83282470703125, 5.673797607421875, -1.0977630615234375, -8.974700927734375, -1.5921783447265625, 1.5038490295410156, 3.572052001953125, 0.9472122192382812, 0.7167987823486328, 3.4577178955078125, 2.9390411376953125, -5.251838684082031, -0.26507568359375, 0.28258323669433594, 1.043731689453125, -5.170963287353516, 2.2263450622558594, 0.5996284484863281, 0.6854171752929688, 3.9554176330566406, 1.6754150390625, 1.2301139831542969, 3.1598434448242188, -3.8128795623779297, 0.05289649963378906, 10.215682983398438, 0.4082355499267578, 3.2044143676757812, 0.7144889831542969, 1.9884490966796875, 0.21947479248046875, -0.2715435028076172, -0.007720947265625, 0.6018505096435547, -2.566282272338867, 3.3574066162109375, 1.39752197265625, 3.4010391235351562, 6.686943054199219, 0.6505393981933594, 4.8385467529296875, 2.7816925048828125, -0.9507408142089844, 2.162342071533203, -4.436309814453125, 4.517040252685547, -1.4695358276367188, -1.4585800170898438, 3.4902000427246094, 2.0576553344726562, 0.625640869140625, -1.7832794189453125, 2.2287750244140625, 2.6533355712890625, 3.0309219360351562, 8.11297607421875, 9.879501342773438, 3.3939132690429688, 7.9795379638671875, 0.07122802734375, -1.1437759399414062, 2.559734344482422, 1.2659072875976562, 0.9238510131835938, -0.423370361328125, 5.5055999755859375, -0.8779563903808594, -5.0071563720703125, 1.3232574462890625, 7.5786285400390625, 1.8733253479003906, 6.5225830078125, -3.0685043334960938, 3.939878463745117, 1.0651397705078125, -9.133575439453125, -0.1726360321044922, -0.558868408203125, -0.751617431640625, 1.72259521484375, -1.4705352783203125, 0.7335777282714844, 4.977935791015625, -1.2783393859863281, 0.9451217651367188, 7.789794921875, 3.51312255859375, -2.431102752685547, -2.82839298248291, -2.3958282470703125, -0.1325836181640625, 2.8755569458007812, 5.239543914794922, 0.5013351440429688, -3.6066818237304688, 3.2561416625976562, -7.268035888671875, 0.4643402099609375, 1.5280132293701172, 1.2847938537597656, 2.2889251708984375, -0.6820220947265625, 0.373260498046875, -3.2728424072265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000068.npy"}
{"epoch": 0.20606060606060606, "step": 69, "batch_size": 128, "mean": 1.435215950012207, "std": 3.4488017559051514, "min": -7.571807861328125, "p10": -1.9402961730957031, "median": 1.2595243453979492, "p90": 4.913960266113281, "max": 18.9632568359375, "pos_frac": 0.71875, "sample": [-0.6248779296875, -2.4476318359375, 3.0388717651367188, 6.65484619140625, 1.1159439086914062, 2.3122634887695312, 7.002544403076172, 3.3426361083984375, -0.0716705322265625, 1.2112579345703125, -0.9681854248046875, 0.9190864562988281, 1.6825942993164062, 1.3652267456054688, 3.1110458374023438, -2.9509429931640625, -1.9210014343261719, 2.5634613037109375, 2.7128067016601562, 0.3280448913574219, 0.7119579315185547, 0.5588645935058594, 1.5769309997558594, -0.01093292236328125, 8.762466430664062, -0.0531005859375, 0.5754489898681641, -1.1408805847167969, -0.4882640838623047, -3.9916610717773438, 1.8042163848876953, 0.8910903930664062, 1.6519393920898438, 18.9632568359375, 1.7583580017089844, 0.6920013427734375, 2.080219268798828, 1.0652999877929688, 2.492206573486328, -0.6773300170898438, -0.17937660217285156, 2.5941162109375, 3.6723175048828125, 6.5895233154296875, 2.1307239532470703, 0.36487579345703125, 2.7157745361328125, -1.4009170532226562, 3.7973060607910156, 1.2663402557373047, 2.1864051818847656, 4.191864013671875, 0.6156959533691406, -2.2283401489257812, -0.8277854919433594, 2.0622997283935547, 0.6206245422363281, -2.0087127685546875, -0.1518402099609375, 2.8629074096679688, 0.908843994140625, 3.2130966186523438, -6.195068359375, 7.322418212890625, 0.460540771484375, 1.4101638793945312, 1.7172889709472656, 1.7216949462890625, 0.7179241180419922, -1.9853172302246094, 4.1602325439453125, -3.517425537109375, 0.7847747802734375, 1.5609169006347656, 2.550567626953125, -1.33026123046875, 2.636615753173828, 1.1586380004882812, 14.248397827148438, 1.5170326232910156, 2.3231353759765625, -0.38509368896484375, 0.6500320434570312, -1.2514686584472656, 2.2662181854248047, -7.372261047363281, -0.4190692901611328, 2.5406455993652344, 4.909309387207031, -1.148406982421875, 0.6649322509765625, 1.2527084350585938, 3.1801300048828125, 0.3868865966796875, 2.91741943359375, 0.23967742919921875, -7.571807861328125, -0.37168312072753906, 1.1755867004394531, -6.240875244140625, 9.299713134765625, 0.37325286865234375, 8.326072692871094, -5.1103668212890625, 2.8979110717773438, 1.5481605529785156, 3.586202621459961, 3.2048873901367188, 1.3560714721679688, 0.16594696044921875, 2.2053070068359375, -0.29709815979003906, 4.924812316894531, 6.003364562988281, 1.2916603088378906, 2.3328018188476562, 0.3260993957519531, -3.6757431030273438, 3.4645137786865234, 1.5835208892822266, 1.2525062561035156, 2.0174007415771484, -1.4871234893798828, -0.5378646850585938, 6.6801300048828125, 2.8577728271484375, 5.213584899902344, -1.375152587890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000069.npy"}
{"epoch": 0.20909090909090908, "step": 70, "batch_size": 128, "mean": 0.9826250076293945, "std": 3.4016666412353516, "min": -10.649002075195312, "p10": -3.2428442001342774, "median": 0.8862190246582031, "p90": 5.135281372070311, "max": 10.69781494140625, "pos_frac": 0.6171875, "sample": [0.01844024658203125, 2.2626590728759766, 1.105926513671875, 2.0149307250976562, -4.31231689453125, 4.986797332763672, 2.012908935546875, 2.8261451721191406, 6.688667297363281, 0.5606422424316406, 3.4162139892578125, 3.234987258911133, -1.8845062255859375, -5.475372314453125, -5.433464050292969, 2.0772056579589844, 1.9466781616210938, 1.62384033203125, -0.3242683410644531, -0.94976806640625, 0.5284461975097656, -0.4607124328613281, 5.76300048828125, 0.8405399322509766, 7.0162811279296875, 6.305995941162109, -3.314056396484375, 1.2511672973632812, 0.7689895629882812, -5.404075622558594, 0.908416748046875, 3.2779159545898438, 1.1244468688964844, -3.283113479614258, -3.4789581298828125, 3.688556671142578, 0.5283451080322266, -2.206602096557617, 2.21575927734375, -1.212738037109375, 0.2723846435546875, 2.598651885986328, 4.10699462890625, -0.7554111480712891, 0.9585380554199219, -2.3984832763671875, 2.2695884704589844, 3.2440452575683594, 0.5313148498535156, -2.0357437133789062, 2.931690216064453, -0.4018898010253906, 0.4558830261230469, -5.606452941894531, 3.013702392578125, -3.1613006591796875, -0.15284347534179688, 3.7278289794921875, -1.5737247467041016, 0.9176483154296875, 2.8562850952148438, 4.933982849121094, 1.7356128692626953, -4.2941436767578125, 6.7149505615234375, 1.868865966796875, -0.9179306030273438, -2.3790817260742188, -1.9229202270507812, 0.3427314758300781, -3.3094863891601562, -10.649002075195312, -0.2995414733886719, -2.03826904296875, 4.049407958984375, 2.523345947265625, 0.054046630859375, 4.1886138916015625, 3.755035400390625, 3.9357986450195312, 3.0755844116210938, -2.6358184814453125, -1.9152393341064453, 10.69781494140625, 2.8426437377929688, 3.0887451171875, 6.16143798828125, 9.153121948242188, 5.9707794189453125, -1.5237808227539062, -3.2255859375, 2.69036865234375, 0.0329132080078125, 0.4702434539794922, 3.9833221435546875, -0.31479454040527344, 1.4448318481445312, 6.0466461181640625, -2.1567230224609375, 2.9360427856445312, -2.7054672241210938, 4.679443359375, 5.4727783203125, -0.7894287109375, -1.0470771789550781, 4.499908447265625, -1.2443256378173828, 2.9560699462890625, 3.1922607421875, -4.626121520996094, -0.87799072265625, -1.02093505859375, 4.482383728027344, 5.955066680908203, 5.368568420410156, 3.999969482421875, -0.6395034790039062, -0.7348537445068359, 5.035301208496094, 1.9764251708984375, 0.8640213012695312, -7.510902404785156, -0.480499267578125, 0.17236328125, -1.0426559448242188, 4.049221038818359, -1.6212844848632812, -0.75396728515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000070.npy"}
{"epoch": 0.21212121212121213, "step": 71, "batch_size": 128, "mean": 1.2390192747116089, "std": 3.259361982345581, "min": -8.380989074707031, "p10": -2.6289440155029293, "median": 1.1285667419433594, "p90": 4.9519401550292965, "max": 13.90972900390625, "pos_frac": 0.6484375, "sample": [-4.416473388671875, -0.7067642211914062, 7.0078887939453125, 0.4659461975097656, 2.76220703125, -0.6960296630859375, 1.7519187927246094, 3.9158935546875, 0.4151802062988281, 0.5790920257568359, -3.4045867919921875, -8.380989074707031, 0.51849365234375, -0.9526901245117188, -0.5771598815917969, 1.93511962890625, 4.1304931640625, 4.05975341796875, 2.9645652770996094, 1.5727272033691406, 3.4856719970703125, 0.239013671875, 5.578264236450195, 5.34906005859375, 0.9317302703857422, 2.58087158203125, -4.1579742431640625, -0.5565090179443359, 2.0511951446533203, 7.92919921875, 1.2077484130859375, 13.90972900390625, 1.4009513854980469, -1.2001628875732422, -0.15961456298828125, -4.276042938232422, 0.4516944885253906, -2.4741878509521484, 6.9152069091796875, 1.854024887084961, 0.852264404296875, 4.0920257568359375, 1.4449920654296875, -2.9495849609375, 2.7632675170898438, -4.6739044189453125, -0.1019439697265625, -4.040657043457031, 4.014808654785156, 2.9922657012939453, 4.309349060058594, 1.4965972900390625, 7.4761199951171875, 2.4464492797851562, -0.18553543090820312, 5.0193328857421875, 0.42362022399902344, 3.0374832153320312, 4.923057556152344, 10.831100463867188, 2.673095703125, -0.2998390197753906, -3.5458831787109375, 1.830657958984375, 2.1923599243164062, -6.041648864746094, 0.8973064422607422, 2.5601806640625, -0.8801651000976562, 3.506805419921875, 2.5749053955078125, -0.20592498779296875, 0.9539947509765625, 1.9625625610351562, -0.8456459045410156, 0.6093044281005859, -0.36965179443359375, 3.195068359375, 1.610483169555664, -1.3431282043457031, 6.824798583984375, 5.132598876953125, -1.1327133178710938, 1.1335678100585938, 0.21082687377929688, -1.9759960174560547, 0.7690582275390625, 2.662639617919922, -0.6084747314453125, 1.728302001953125, 1.1864852905273438, -0.3882560729980469, -0.51275634765625, 8.9942626953125, 2.663818359375, 4.631378173828125, -2.3499717712402344, -1.5620880126953125, 2.3192291259765625, -0.39892578125, 1.123565673828125, -2.836406707763672, -4.120700836181641, 0.82049560546875, 0.8362808227539062, 5.795463562011719, -1.4671669006347656, -0.96307373046875, -2.5400314331054688, 4.472148895263672, -1.2623138427734375, 3.8851470947265625, 4.1951141357421875, 3.9101028442382812, 0.29894256591796875, -0.6241722106933594, -4.137027740478516, -2.1895294189453125, 1.4854164123535156, 2.9567413330078125, 2.8139724731445312, 2.074960708618164, 2.842376708984375, 1.9907150268554688, 0.130401611328125, -1.2377204895019531, -0.3471527099609375, 2.1497344970703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000071.npy"}
{"epoch": 0.21515151515151515, "step": 72, "batch_size": 128, "mean": 0.43176063895225525, "std": 3.8344993591308594, "min": -15.1102294921875, "p10": -3.7776870727539062, "median": 0.33831024169921875, "p90": 4.9487409591674805, "max": 13.533645629882812, "pos_frac": 0.5625, "sample": [-1.4765548706054688, 3.279144287109375, 1.4656906127929688, 4.605297088623047, 0.8277473449707031, 3.43865966796875, 0.3494110107421875, -0.23180389404296875, 0.22196197509765625, -6.1867218017578125, 5.371421813964844, 1.4222049713134766, -3.1896514892578125, 0.8058242797851562, 5.209327697753906, 0.8603553771972656, -2.872100830078125, 0.6864166259765625, 3.1355857849121094, -0.2830238342285156, -4.883697509765625, -0.2867565155029297, -3.2661361694335938, 4.1851654052734375, -0.15142440795898438, 2.456085205078125, -2.4168014526367188, -2.305917739868164, 1.8183059692382812, 4.916385650634766, -5.3897857666015625, 2.2956581115722656, 5.083599090576172, -1.7446098327636719, 3.3496017456054688, -3.6702423095703125, 8.745864868164062, -0.25568389892578125, 5.059654235839844, 0.5209503173828125, 2.592367172241211, 1.0823020935058594, -1.7099838256835938, -3.74163818359375, 0.46961402893066406, -2.9178237915039062, -6.274375915527344, 1.333913803100586, 3.2676620483398438, -15.1102294921875, 1.8429794311523438, 0.191650390625, 0.1297016143798828, 1.5946502685546875, 5.8237457275390625, 3.14385986328125, 4.178993225097656, 1.79547119140625, 1.5422935485839844, 2.214059829711914, 3.474374771118164, -1.6844615936279297, -1.1991996765136719, 0.6551628112792969, 4.44312858581543, 1.8194046020507812, -0.6223678588867188, 0.07135009765625, -0.2508201599121094, 2.2254257202148438, -0.27447509765625, 2.0591278076171875, 5.170707702636719, -1.5815963745117188, -2.775177001953125, 4.6871185302734375, 1.4847412109375, -3.508441925048828, -6.267791748046875, 2.8433380126953125, -2.4244041442871094, -3.3223800659179688, 0.32720947265625, -1.3657608032226562, -0.0998687744140625, 5.024236679077148, 13.533645629882812, 4.365207672119141, 0.7031631469726562, -2.9765472412109375, 0.35378456115722656, -1.8266220092773438, -0.1529083251953125, 3.669900894165039, -5.48211669921875, 3.573688507080078, 1.5207653045654297, -0.18593978881835938, -4.054962158203125, 4.035697937011719, -3.165088653564453, 0.0517425537109375, -1.676025390625, -1.0572357177734375, -3.3220462799072266, -2.3444442749023438, -4.300506591796875, 7.27899169921875, 0.1524658203125, -0.26578330993652344, 7.638702392578125, -3.97772216796875, 7.156402587890625, 0.7010955810546875, -5.087677001953125, 10.341659545898438, -1.786376953125, -3.8618011474609375, 0.2120819091796875, -1.0218544006347656, 2.8924636840820312, 2.8409881591796875, -0.30953216552734375, -10.850730895996094, 3.01226806640625, -1.0751819610595703, 0.6154537200927734, -2.4609031677246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000072.npy"}
{"epoch": 0.21818181818181817, "step": 73, "batch_size": 128, "mean": 0.7568708658218384, "std": 4.003836631774902, "min": -12.551300048828125, "p10": -3.482269287109375, "median": 1.0080175399780273, "p90": 5.299250793457031, "max": 20.65594482421875, "pos_frac": 0.6875, "sample": [1.2851791381835938, 3.688199996948242, 0.3480968475341797, 5.3193206787109375, -0.7676239013671875, -10.435302734375, 3.8731231689453125, -2.9555587768554688, 0.3005828857421875, 6.0277099609375, -5.552101135253906, 2.445343017578125, 2.7053909301757812, 1.819314956665039, -2.0619945526123047, -1.5478515625, 2.47821044921875, 6.044670104980469, -12.551300048828125, 2.341817855834961, 1.254037857055664, 1.0029411315917969, 3.0081787109375, 2.204212188720703, 0.14302444458007812, -0.523468017578125, 2.0454139709472656, 1.0130939483642578, 1.5342998504638672, 0.5239944458007812, -3.479522705078125, 20.65594482421875, 7.0566864013671875, -7.5742950439453125, 0.6020355224609375, 0.22649383544921875, 0.3920001983642578, 1.9652099609375, 5.393251419067383, 4.0573577880859375, 3.22467041015625, 0.374664306640625, 2.64227294921875, 2.51824951171875, -1.72015380859375, -1.2553520202636719, 3.6984519958496094, -0.9251022338867188, -6.860404968261719, 1.4426078796386719, 6.087028503417969, 1.1864910125732422, -1.7201976776123047, -3.488677978515625, 0.2601051330566406, 1.8556804656982422, 3.0960540771484375, -2.7191619873046875, -9.107955932617188, 2.04791259765625, 5.2906494140625, 0.0052642822265625, -4.4699249267578125, 0.3615875244140625, 6.950065612792969, -1.5695571899414062, 0.0211944580078125, 0.9525909423828125, 4.717620849609375, 0.08060073852539062, 0.7038040161132812, 0.6726303100585938, 4.024320602416992, -2.6902313232421875, 1.7090396881103516, -1.75347900390625, 4.181396484375, 0.8406753540039062, -2.23388671875, -0.24666213989257812, 7.4003753662109375, 0.7979736328125, 3.5970458984375, 0.1819305419921875, 1.485321044921875, -0.8415489196777344, -4.1819915771484375, 1.2031307220458984, 1.5826454162597656, -1.2083911895751953, 1.3196372985839844, -3.38580322265625, 3.278257369995117, -4.255851745605469, -0.7380428314208984, 0.363372802734375, 0.9559173583984375, 2.5519638061523438, -6.978363037109375, -8.388725280761719, 5.371368408203125, 0.9895133972167969, -3.1401290893554688, 1.59222412109375, -3.0258312225341797, -0.9386024475097656, 2.432485580444336, -1.36724853515625, -5.620216369628906, 1.5280532836914062, 1.0842666625976562, -1.5917816162109375, 4.9095611572265625, 1.445465087890625, 1.557647705078125, 5.475654602050781, 1.1570396423339844, 3.3418350219726562, 1.535074234008789, 2.076641082763672, 9.59979248046875, -2.8519287109375, 1.7162952423095703, 0.17504501342773438, 5.031467437744141, -0.085662841796875, 1.517486572265625, 5.734104156494141], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000073.npy"}
{"epoch": 0.22121212121212122, "step": 74, "batch_size": 128, "mean": 1.0515202283859253, "std": 3.512601852416992, "min": -14.43267822265625, "p10": -2.6536521911621094, "median": 1.030867576599121, "p90": 5.614778137207031, "max": 10.24945068359375, "pos_frac": 0.6015625, "sample": [1.139404296875, 1.6868705749511719, 3.2660598754882812, -1.8145675659179688, -1.82867431640625, 2.1433944702148438, -2.1428451538085938, 2.8373565673828125, 4.136569976806641, -14.43267822265625, 1.0677776336669922, -0.085174560546875, 0.5801239013671875, 0.7115249633789062, -0.3639087677001953, 0.9217529296875, 5.22247314453125, -4.409175872802734, 3.3898239135742188, 0.99395751953125, 1.6196575164794922, 3.4401092529296875, 6.365760803222656, -0.37799644470214844, 5.458915710449219, -1.73211669921875, -4.100166320800781, 2.2919082641601562, -1.280120849609375, 0.17749786376953125, -6.85382080078125, 1.7475967407226562, -1.4369354248046875, -2.165822982788086, 2.79827880859375, 4.5565643310546875, 9.657669067382812, 4.93133544921875, -2.2101821899414062, -2.6056442260742188, 0.7083969116210938, 1.1888771057128906, -0.6263427734375, -0.9710216522216797, -1.1120948791503906, 1.1708641052246094, -1.123779296875, -1.9247589111328125, 3.0692596435546875, 5.0967254638671875, -5.0386199951171875, 5.121358871459961, 1.7989635467529297, -0.3834190368652344, 5.533622741699219, -1.3139457702636719, -0.1963043212890625, -3.2782135009765625, -0.9654998779296875, -2.000732421875, -1.6995487213134766, 1.3533172607421875, 4.967546463012695, -0.4632682800292969, 7.12921142578125, 1.1401824951171875, -1.6136054992675781, -1.8537349700927734, 2.795928955078125, -0.47321319580078125, 1.3737640380859375, -0.31787109375, 1.4130802154541016, -2.194488525390625, 6.311195373535156, 0.6558799743652344, -1.6379432678222656, -2.090728759765625, 8.505508422851562, 0.33489036560058594, 2.4796104431152344, -3.8090591430664062, 1.3243255615234375, 6.333686828613281, 0.3623542785644531, -0.021030426025390625, -5.115013122558594, -0.03875923156738281, 6.353691101074219, -4.307060241699219, 1.7936973571777344, 6.2052459716796875, 1.238149642944336, 1.987152099609375, 0.5192737579345703, -0.07137298583984375, 3.4847412109375, 2.7367935180664062, -0.3146553039550781, 2.63348388671875, 2.1333389282226562, 1.892822265625, 5.7107696533203125, -3.4377593994140625, 5.915092468261719, 3.6542282104492188, 3.2541275024414062, 1.3612022399902344, 9.114608764648438, -0.6776504516601562, 1.5903396606445312, 10.24945068359375, 4.577674865722656, 0.6941909790039062, 4.50091552734375, 5.746849060058594, -4.624715805053711, 2.6064109802246094, 1.6931915283203125, 0.25737762451171875, 5.573638916015625, 1.1810836791992188, -0.277374267578125, -3.3603286743164062, 0.8466892242431641, 2.948822021484375, -1.2319869995117188, -2.7656707763671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000074.npy"}
{"epoch": 0.22424242424242424, "step": 75, "batch_size": 128, "mean": 1.410144329071045, "std": 3.5535190105438232, "min": -9.789710998535156, "p10": -2.632626724243164, "median": 1.1953325271606445, "p90": 6.292250061035156, "max": 12.88409423828125, "pos_frac": 0.671875, "sample": [0.42214202880859375, 6.051177978515625, 4.386760711669922, 1.9770698547363281, -5.2755279541015625, 2.7578163146972656, -2.1395416259765625, 2.2816410064697266, 0.7553634643554688, -0.17430877685546875, 2.2384109497070312, 0.6385040283203125, 2.461496353149414, 7.0872802734375, 3.456308364868164, -2.4427947998046875, 7.2460174560546875, 5.027740478515625, 5.113075256347656, 3.8375625610351562, 2.2383346557617188, -7.104377746582031, 3.3144073486328125, -2.9082260131835938, -1.0202560424804688, -0.00429534912109375, -0.9884414672851562, 0.3055419921875, -1.59527587890625, -0.55279541015625, -0.48685455322265625, 1.9312896728515625, 1.4297294616699219, 2.595998764038086, 2.82244873046875, -1.3701305389404297, 1.177032470703125, -0.09563255310058594, -4.9302215576171875, 0.15195846557617188, 3.0312633514404297, 1.6911067962646484, 0.051898956298828125, 1.4107017517089844, 0.8585567474365234, 0.4626655578613281, 0.13391876220703125, 2.602996826171875, -1.6070556640625, 6.2644500732421875, -1.3431873321533203, 7.99859619140625, 0.2288970947265625, 1.33038330078125, 1.0578460693359375, 0.5673332214355469, 0.81787109375, 6.513496398925781, 1.8027572631835938, 1.4164409637451172, -0.5698585510253906, 1.3125381469726562, -4.530445098876953, -0.6365432739257812, -1.264556884765625, 0.08484077453613281, 9.4041748046875, 7.573692321777344, -9.789710998535156, -0.6682891845703125, 0.6914634704589844, -1.3892326354980469, 1.6385002136230469, 5.902923583984375, -4.7321319580078125, -4.612579345703125, 2.7432403564453125, -0.04882049560546875, 12.88409423828125, -2.9249114990234375, 4.043113708496094, 5.357601165771484, 3.2657470703125, -4.0629730224609375, -2.1393966674804688, 0.8471584320068359, 5.343727111816406, 4.6612396240234375, -1.6655693054199219, 8.890762329101562, 2.948150634765625, -0.6766281127929688, 4.935523986816406, -4.367561340332031, 1.213632583618164, 6.8129425048828125, -0.59124755859375, 3.7674636840820312, 0.03418731689453125, 3.5938796997070312, 0.38763427734375, -0.8114395141601562, -1.7361030578613281, 0.10477447509765625, 3.0625762939453125, 1.5970115661621094, 2.550943374633789, 0.83905029296875, -0.5792007446289062, 3.034130096435547, 1.4098739624023438, 6.0972747802734375, 6.35711669921875, -0.11532211303710938, 3.4522476196289062, 2.3755340576171875, 8.623374938964844, -2.7085723876953125, 0.9742012023925781, 1.90704345703125, -2.600078582763672, -1.3592300415039062, 5.394611358642578, -5.659637451171875, 1.983062744140625, 6.3621826171875, 3.79998779296875, 6.567909240722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000075.npy"}
{"epoch": 0.22727272727272727, "step": 76, "batch_size": 128, "mean": 1.7543277740478516, "std": 3.6995604038238525, "min": -7.7888031005859375, "p10": -1.579293441772461, "median": 1.3716487884521484, "p90": 6.035638427734373, "max": 16.26812744140625, "pos_frac": 0.6953125, "sample": [0.8343658447265625, 2.878101348876953, 4.178789138793945, 2.603626251220703, 2.0618858337402344, -0.9088115692138672, 16.26812744140625, -1.3589591979980469, 2.7979698181152344, -0.7352504730224609, -1.0538177490234375, 0.6370887756347656, -1.6098175048828125, 0.7264556884765625, 2.48150634765625, 0.68011474609375, -1.99981689453125, 7.1674652099609375, -0.5084609985351562, -1.1686553955078125, 7.507789611816406, -0.01047515869140625, 2.1241455078125, 15.945144653320312, 3.6654739379882812, 3.8947296142578125, -1.172821044921875, 4.5045928955078125, -1.0914936065673828, 0.105804443359375, -1.4858283996582031, 1.9155921936035156, 1.53314208984375, 0.09554862976074219, 7.104789733886719, 0.6497611999511719, 3.36407470703125, -4.57525634765625, -0.2577037811279297, 2.019237518310547, 2.6464996337890625, -5.673973083496094, -1.5021705627441406, 1.5810508728027344, -0.8375873565673828, -1.579803466796875, 0.9999580383300781, -1.3350067138671875, 0.05872917175292969, 2.1103134155273438, 0.6909751892089844, 2.5047683715820312, 3.121776580810547, 7.467018127441406, -0.59625244140625, 2.3473358154296875, 2.5067520141601562, 3.6901912689208984, 0.2828330993652344, 1.395294189453125, 1.0355148315429688, 3.8737335205078125, -0.20440101623535156, 3.6787567138671875, 1.1458511352539062, 1.9075164794921875, 3.070171356201172, 0.338104248046875, -3.1920318603515625, 5.535919189453125, 5.7315521240234375, 1.5781478881835938, 1.2830619812011719, 1.7914657592773438, -0.020330429077148438, 0.32022857666015625, -7.650794982910156, 3.9690208435058594, 0.635345458984375, -2.399139404296875, 0.4064979553222656, -1.143646240234375, -0.530731201171875, 5.7933349609375, 0.2909202575683594, -7.7888031005859375, 5.040412902832031, -0.42919921875, 4.6602630615234375, 9.868148803710938, -4.8162994384765625, 2.8189239501953125, 1.4215679168701172, 11.830635070800781, 4.0058746337890625, 2.6960983276367188, -0.4425506591796875, -2.751953125, 1.5589027404785156, 4.948699951171875, 0.7629814147949219, -1.922210693359375, 3.2828750610351562, 0.614013671875, -2.2330360412597656, 0.09013938903808594, 1.7808609008789062, 1.3534660339355469, -0.2840118408203125, 0.7603530883789062, 1.8158817291259766, 3.3199234008789062, -1.0828170776367188, 3.183530807495117, 6.60101318359375, 7.704929351806641, 5.431854248046875, 6.842319488525391, 2.8450164794921875, -0.7903289794921875, 6.957130432128906, 4.075965881347656, 1.0312957763671875, 12.407455444335938, -1.3947486877441406, -1.5790748596191406, 1.38983154296875, 2.0376930236816406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000076.npy"}
{"epoch": 0.23030303030303031, "step": 77, "batch_size": 128, "mean": 0.9833223223686218, "std": 3.7695183753967285, "min": -11.217269897460938, "p10": -2.9745357513427733, "median": 0.6032571792602539, "p90": 5.621718406677246, "max": 11.09039306640625, "pos_frac": 0.625, "sample": [0.5909595489501953, 3.9248275756835938, -2.9838790893554688, -0.727508544921875, 3.4833412170410156, 0.6155548095703125, -3.7670974731445312, -2.970531463623047, 3.9523391723632812, 0.3671417236328125, -0.18363189697265625, 5.591045379638672, 0.4444999694824219, -0.9124488830566406, -0.4254264831542969, -1.9073486328125, -1.6901741027832031, 0.5063858032226562, 2.6999664306640625, -1.0202522277832031, -3.8804550170898438, 4.532630920410156, 10.5428466796875, 1.2537612915039062, 6.256919860839844, -2.3983325958251953, 1.83111572265625, 1.8530845642089844, -0.7300777435302734, -3.2173995971679688, 0.5406284332275391, -2.243579864501953, 1.1899871826171875, 1.3389053344726562, -0.000873565673828125, 0.6211433410644531, -0.13414764404296875, 11.09039306640625, 5.7438507080078125, 6.5667724609375, 5.005901336669922, -0.9243888854980469, 0.3576183319091797, -0.17546653747558594, 0.20038986206054688, 0.10239410400390625, 0.4426116943359375, 4.298063278198242, -5.8690948486328125, -0.4807167053222656, 5.8993072509765625, -2.9428482055664062, 2.6837501525878906, 0.6627998352050781, 5.149314880371094, -2.6028213500976562, -5.444915771484375, 0.1637420654296875, 8.388442993164062, 7.178131103515625, 1.693603515625, 1.110250473022461, 2.518817901611328, 5.207313537597656, -8.36037826538086, -4.210052490234375, -2.808746337890625, -1.1600189208984375, 3.0652923583984375, 4.067605972290039, -1.8912200927734375, 0.25459861755371094, -1.8605194091796875, -0.5459117889404297, 1.5239410400390625, -2.8091659545898438, -0.0324554443359375, 5.235668182373047, 4.560146331787109, 2.937286376953125, -0.8695831298828125, 1.3922767639160156, 3.851503372192383, -0.19379806518554688, 0.2899436950683594, 2.1778087615966797, 1.6208782196044922, 0.17002105712890625, 1.7995338439941406, 2.6495132446289062, 7.4814910888671875, 2.4972610473632812, -0.7670936584472656, 1.7139511108398438, 8.9210205078125, 4.856170654296875, -11.217269897460938, -5.988739013671875, 3.402149200439453, -0.5280952453613281, -0.3168182373046875, 10.92156982421875, 0.8261032104492188, 0.5156974792480469, -5.575469970703125, 0.7827510833740234, 5.002197265625, -7.082557678222656, 0.5860595703125, -8.063766479492188, 1.4767589569091797, 1.7004985809326172, 0.69635009765625, 0.8314208984375, -0.7519340515136719, -2.506153106689453, 2.9305877685546875, 4.5833892822265625, -1.5389480590820312, 0.5833110809326172, 0.8865318298339844, 0.9475040435791016, 5.693288803100586, 9.216045379638672, -0.6007461547851562, 2.9333667755126953, -1.0958061218261719, 2.091888427734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000077.npy"}
{"epoch": 0.23333333333333334, "step": 78, "batch_size": 128, "mean": 1.6098604202270508, "std": 4.123882293701172, "min": -11.84002685546875, "p10": -2.711335754394531, "median": 1.5161495208740234, "p90": 5.908187103271484, "max": 19.210372924804688, "pos_frac": 0.6640625, "sample": [-0.20121002197265625, -0.0979156494140625, 7.3924560546875, 3.3402938842773438, -1.526275634765625, 4.4865264892578125, 1.5795135498046875, 2.1980838775634766, -1.1847076416015625, 5.7754974365234375, 1.5937728881835938, 3.680868148803711, 4.081169128417969, -2.269102096557617, -0.5864105224609375, -0.8723583221435547, 6.281877517700195, 0.5634613037109375, 5.624931335449219, 4.35101318359375, -1.0692214965820312, 5.006248474121094, -8.147262573242188, -0.48714447021484375, 4.6447296142578125, 2.9996871948242188, 4.1030426025390625, -11.84002685546875, 0.6561450958251953, -2.1440277099609375, 0.2243509292602539, -2.2553863525390625, -0.33303260803222656, 6.190238952636719, 3.6198959350585938, 0.7503986358642578, 1.9362411499023438, 4.835395812988281, -0.42919921875, 0.73638916015625, 5.3052825927734375, 3.7156410217285156, -6.890777587890625, 6.901557922363281, -0.4327278137207031, 5.665508270263672, -6.6566925048828125, 2.1743831634521484, 2.7498931884765625, -0.432342529296875, -4.014259338378906, 1.233551025390625, 7.3740081787109375, -0.00318145751953125, 0.11486434936523438, 3.466552734375, 1.4712409973144531, -0.6360893249511719, -7.380043029785156, -2.2728805541992188, 6.327491760253906, -0.531890869140625, 1.9377632141113281, 0.9320602416992188, -2.6331634521484375, -2.3867740631103516, -0.5467376708984375, -4.565223693847656, 6.6409149169921875, 4.941009521484375, 2.7652835845947266, -0.00986480712890625, 5.2350616455078125, 4.094886779785156, 1.1160163879394531, 2.366851806640625, 0.9611587524414062, 1.0742931365966797, 3.4347305297851562, 0.531463623046875, 3.2243270874023438, -3.48785400390625, 8.662994384765625, 19.210372924804688, -5.276519775390625, -5.8748321533203125, -2.0363388061523438, 5.035728454589844, 1.873321533203125, 1.203125, 3.2780189514160156, 9.320098876953125, 3.480010986328125, -1.5160446166992188, 5.261802673339844, 0.26026153564453125, 3.7330780029296875, 0.8264636993408203, 0.16742706298828125, 1.5610580444335938, -4.3517913818359375, 7.763641357421875, 2.766937255859375, 4.859310150146484, -2.89373779296875, 4.384712219238281, -1.2782421112060547, 2.3814239501953125, 0.06465911865234375, 0.885406494140625, -0.716949462890625, 5.7873077392578125, 1.1485214233398438, 4.133819580078125, 0.3443450927734375, 2.0292510986328125, 3.190919876098633, 1.8092575073242188, -3.467266082763672, -0.6044235229492188, -2.4693450927734375, 10.34405517578125, 3.3355712890625, 2.5352783203125, 3.2972049713134766, 13.813125610351562, 2.091968536376953, -0.37709808349609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000078.npy"}
{"epoch": 0.23636363636363636, "step": 79, "batch_size": 128, "mean": 1.076951265335083, "std": 3.7651093006134033, "min": -10.881675720214844, "p10": -2.9132844924926755, "median": 0.7163152694702148, "p90": 5.037140655517577, "max": 16.583587646484375, "pos_frac": 0.6328125, "sample": [4.664207458496094, 3.7734203338623047, -1.3692626953125, 3.3712692260742188, -1.2651596069335938, -2.8378067016601562, 5.78729248046875, 0.464630126953125, -4.766899108886719, -3.3631744384765625, 3.678112030029297, 2.9209728240966797, -1.2992210388183594, 6.136589050292969, 7.7490692138671875, 2.4920730590820312, 2.6785354614257812, 4.547199249267578, 2.5899581909179688, 3.4944992065429688, -1.4700088500976562, 3.365724563598633, 3.0591659545898438, 3.1570587158203125, 0.6537418365478516, 1.420318603515625, 2.0590972900390625, -1.6978569030761719, 0.163726806640625, 0.7788887023925781, 2.606351852416992, 4.187625885009766, -1.6015625, 1.6532974243164062, -4.1261749267578125, 1.66552734375, 0.2440948486328125, 1.2225971221923828, -5.173927307128906, 0.12334442138671875, 1.4094696044921875, 6.976066589355469, -2.4986038208007812, 0.4627704620361328, -0.5582351684570312, -0.26394081115722656, 2.7899951934814453, -0.5308837890625, 2.8497314453125, 2.0847930908203125, -1.16229248046875, 5.3517913818359375, -6.4223175048828125, -2.3555545806884766, 0.27043914794921875, 1.2682857513427734, 2.229694366455078, 0.26053619384765625, -3.6117630004882812, -1.2267475128173828, 6.6604766845703125, 0.2166290283203125, 0.0316314697265625, -0.3944854736328125, -7.6040802001953125, 1.1886978149414062, -0.6240081787109375, 1.5951271057128906, 0.6146774291992188, 2.1690025329589844, 6.995147705078125, 9.744735717773438, 16.583587646484375, 1.8709144592285156, -0.2038726806640625, 7.946170806884766, -0.05010223388671875, -1.6389923095703125, 3.8451175689697266, 2.9263763427734375, 2.8946533203125, -10.881675720214844, 3.5955123901367188, 4.111423492431641, 4.902290344238281, -0.3278045654296875, -0.7098293304443359, -6.6888275146484375, 14.946502685546875, 0.9359397888183594, -0.1477813720703125, 0.0247039794921875, -0.8009510040283203, -0.03435516357421875, 1.6040096282958984, -0.1533050537109375, 2.12255859375, -3.6186370849609375, 1.7716217041015625, 2.4039459228515625, -1.238525390625, 0.2198028564453125, -1.318359375, -1.5906753540039062, 3.129423141479492, 4.544898986816406, 0.0514373779296875, -1.0684967041015625, 4.16357421875, -3.2967071533203125, 1.940032958984375, -2.1938629150390625, 8.617294311523438, 5.7151336669921875, -2.545074462890625, 2.527069091796875, 0.6511917114257812, 1.5106945037841797, -0.4212512969970703, 1.4341354370117188, -3.0893993377685547, -2.2847213745117188, 1.6737747192382812, -1.0559539794921875, 0.9483261108398438, 0.3289337158203125, 0.16313743591308594, -6.54937744140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000079.npy"}
{"epoch": 0.23939393939393938, "step": 80, "batch_size": 128, "mean": 1.3143092393875122, "std": 3.188249349594116, "min": -9.192489624023438, "p10": -3.109318161010742, "median": 1.4148054122924805, "p90": 5.114297866821289, "max": 9.305633544921875, "pos_frac": 0.7265625, "sample": [1.0196189880371094, 2.7550926208496094, 2.6651535034179688, 6.410820007324219, 1.61651611328125, -6.862205505371094, 5.505950927734375, 4.597450256347656, 0.78521728515625, 6.6435089111328125, 1.8189315795898438, 0.5713653564453125, 3.0246047973632812, -3.2726516723632812, 2.782796859741211, 0.2703399658203125, 3.6279258728027344, -2.5306015014648438, 3.1253509521484375, 1.6688613891601562, 1.749725341796875, 4.185432434082031, 3.5308151245117188, 0.9395751953125, 2.9997787475585938, 2.6945419311523438, 0.29157447814941406, 6.888515472412109, 5.155727386474609, 0.332244873046875, 0.008203506469726562, -9.192489624023438, 4.240283966064453, 3.1855316162109375, -4.5828857421875, 2.238494873046875, 4.7983245849609375, 0.2035675048828125, -1.1024703979492188, -0.025005340576171875, 4.6366729736328125, 1.3572273254394531, 0.41778564453125, -4.753440856933594, 0.1885223388671875, 6.192298889160156, -3.5764236450195312, 8.615203857421875, 2.508129119873047, -1.1093292236328125, 6.491691589355469, -0.9714813232421875, 2.7138023376464844, 2.4367599487304688, 0.33905792236328125, 2.4284095764160156, 1.236318588256836, -4.0518035888671875, -0.042179107666015625, 1.7403640747070312, 2.3414077758789062, 0.7699127197265625, -3.8757705688476562, -0.20207977294921875, 0.8026256561279297, -3.6284637451171875, 1.0804595947265625, 2.3743896484375, -0.47196006774902344, 0.32286834716796875, 2.5823211669921875, -0.2202167510986328, 1.764425277709961, -8.01986312866211, 0.07273101806640625, 0.9973258972167969, -2.0998611450195312, -3.8378219604492188, 2.5632247924804688, 3.0081329345703125, 5.0965423583984375, 3.2569618225097656, -0.2705650329589844, -1.8978290557861328, 3.079254150390625, -0.7360000610351562, 0.5432014465332031, 1.0382232666015625, 1.4723834991455078, 2.485809326171875, 4.1982421875, -0.3213539123535156, 3.063079833984375, 4.275135040283203, -2.94293212890625, 0.49672698974609375, 2.837749481201172, 1.1844482421875, 8.640380859375, -1.3792266845703125, 4.409921646118164, 1.6356353759765625, 0.9732303619384766, 5.704673767089844, 3.703401565551758, 0.9107551574707031, 5.4764251708984375, -0.029815673828125, 3.5003890991210938, 1.5011062622070312, 2.567230224609375, 4.297721862792969, 0.09845733642578125, 3.1703834533691406, -0.27567291259765625, -3.039318084716797, -1.9091472625732422, 3.4182891845703125, -2.4860801696777344, -2.9035415649414062, 0.004474639892578125, 3.0886688232421875, -3.3369522094726562, 1.1443710327148438, 3.5766143798828125, 5.449474334716797, 9.305633544921875, -3.729888916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000080.npy"}
{"epoch": 0.24242424242424243, "step": 81, "batch_size": 128, "mean": 1.4938733577728271, "std": 3.8567862510681152, "min": -14.487579345703125, "p10": -2.7096786499023438, "median": 1.5635480880737305, "p90": 5.203166198730469, "max": 13.450790405273438, "pos_frac": 0.7265625, "sample": [1.9908981323242188, -4.443244934082031, 2.8796844482421875, 4.57086181640625, 0.7357635498046875, 0.3565177917480469, 8.583038330078125, 4.465995788574219, 1.7410736083984375, -6.944160461425781, 1.0392608642578125, -0.1059722900390625, -0.14374923706054688, 4.361968994140625, 2.114349365234375, -4.771781921386719, 3.0843467712402344, -6.665061950683594, 0.5569610595703125, -3.6068077087402344, -1.8134918212890625, 3.23663330078125, -1.5454559326171875, 2.8573837280273438, 5.1804656982421875, 4.167198181152344, 3.113842010498047, 3.3620948791503906, 3.9342613220214844, 3.9414215087890625, 1.0416946411132812, 1.0164470672607422, 3.47344970703125, -0.421478271484375, -2.8070907592773438, -3.4981231689453125, 0.09866523742675781, 2.59161376953125, 4.100410461425781, 3.262317657470703, 3.9894027709960938, -1.7294197082519531, 2.9516372680664062, -1.3866329193115234, -3.4670944213867188, 11.636711120605469, 1.3900604248046875, 2.470733642578125, 1.8857841491699219, 0.41357421875, -0.6237010955810547, 0.791656494140625, 2.6439170837402344, -1.1989402770996094, -0.3604888916015625, 1.6636619567871094, 2.3336639404296875, -0.04451560974121094, 3.2674331665039062, 8.64617919921875, 6.342060089111328, -2.9396286010742188, 0.6403560638427734, 1.4438056945800781, 4.20068359375, -2.0798568725585938, -11.927703857421875, 1.1258316040039062, 7.7874298095703125, 0.38726806640625, 1.1269645690917969, 4.3560028076171875, 12.06640625, 1.4533576965332031, 8.941619873046875, 4.275848388671875, 2.2220535278320312, 3.0401229858398438, 2.399791717529297, 0.24822998046875, 2.3755645751953125, 3.5848617553710938, -14.487579345703125, 3.803619384765625, 2.0026473999023438, 3.518918991088867, 0.35182762145996094, -1.5017471313476562, 0.5509414672851562, 0.7527694702148438, 1.2168502807617188, 2.4039993286132812, 5.356208801269531, 1.509256362915039, 4.11126708984375, 2.98992919921875, -6.2337493896484375, 3.009206771850586, -2.4184417724609375, -0.7954692840576172, 0.9071292877197266, 13.450790405273438, 0.7677841186523438, 5.385044097900391, 0.3943901062011719, 2.3132553100585938, 2.22412109375, 7.337272644042969, -2.6679306030273438, 5.256134033203125, 1.6178398132324219, 3.51214599609375, -3.1379470825195312, -2.590362548828125, 7.118522644042969, 2.4248828887939453, 1.2084064483642578, -0.070404052734375, 3.8632354736328125, 0.2376556396484375, 2.321249008178711, -2.171009063720703, -0.3533477783203125, 1.4787406921386719, -1.2044219970703125, 0.36185264587402344, 4.570217132568359, -0.9168014526367188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000081.npy"}
{"epoch": 0.24545454545454545, "step": 82, "batch_size": 128, "mean": 1.5039454698562622, "std": 3.2631330490112305, "min": -12.205665588378906, "p10": -2.068355941772461, "median": 1.2937335968017578, "p90": 5.545811462402344, "max": 9.746551513671875, "pos_frac": 0.6796875, "sample": [-0.12930774688720703, 4.056739807128906, -1.991790771484375, 2.5917129516601562, 3.0603408813476562, 2.7073211669921875, -2.074382781982422, 5.4211578369140625, -0.7405776977539062, -2.1968612670898438, 6.4062347412109375, 4.1779022216796875, -0.245574951171875, 5.607505798339844, 2.554218292236328, -0.32839202880859375, 4.442047119140625, -0.198272705078125, 3.139068603515625, 5.993965148925781, -0.667572021484375, 7.423431396484375, -1.7518348693847656, 0.7811908721923828, 0.49505615234375, -4.617645263671875, -5.474174499511719, 3.4774036407470703, 3.0434722900390625, 0.9933223724365234, 0.5631484985351562, -4.0518646240234375, 2.7125396728515625, -1.39520263671875, 0.02964019775390625, 5.849945068359375, 6.733985900878906, 2.374114990234375, 0.0795440673828125, -1.5529708862304688, -1.6406116485595703, 4.059242248535156, -0.7570075988769531, 5.169380187988281, -0.45762062072753906, -5.814476013183594, 1.20556640625, -0.1746349334716797, 2.2423667907714844, 5.3188629150390625, -1.6493377685546875, 0.9506874084472656, -1.6878662109375, -1.9172115325927734, 0.8998832702636719, -1.0328598022460938, 0.4459991455078125, 0.869964599609375, 4.776866912841797, 1.580209732055664, 2.072673797607422, 2.1519508361816406, 0.6483001708984375, 0.7694511413574219, -0.09320068359375, 0.5340843200683594, 2.0786972045898438, -0.16568756103515625, 0.23116302490234375, 7.481300354003906, 5.78680419921875, 5.519371032714844, -0.11327743530273438, 0.8984870910644531, 7.52276611328125, 2.325498580932617, 1.3912906646728516, -2.752899169921875, 2.6933212280273438, 5.0765533447265625, -0.7752628326416016, 3.1434898376464844, 2.129558563232422, -2.0657730102539062, 7.994991302490234, 3.1696853637695312, 3.4658241271972656, -12.205665588378906, 9.49078369140625, 2.752368927001953, 9.746551513671875, -3.111370086669922, 2.4784793853759766, -4.21605110168457, -0.9398345947265625, -2.6149749755859375, 4.7281036376953125, 1.9933967590332031, -1.883087158203125, -2.440692901611328, 1.8249130249023438, 1.2915267944335938, 4.0640411376953125, 3.144439697265625, 0.15199851989746094, 1.6845951080322266, 4.2335205078125, 2.4540786743164062, -0.95025634765625, 0.10804939270019531, 1.2959403991699219, 3.955394744873047, 4.522544860839844, -2.5281295776367188, 2.102020263671875, 5.753009796142578, 0.23743820190429688, 0.7154560089111328, 5.1718597412109375, -0.34870338439941406, 0.1472015380859375, 4.063484191894531, 4.6625518798828125, -0.7022285461425781, 3.5835647583007812, 4.180412292480469, 0.8767223358154297, 2.226421356201172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000082.npy"}
{"epoch": 0.24848484848484848, "step": 83, "batch_size": 128, "mean": 1.6977624893188477, "std": 3.734921932220459, "min": -7.4894561767578125, "p10": -2.0194381713867187, "median": 1.2292547225952148, "p90": 6.446568870544434, "max": 14.432167053222656, "pos_frac": 0.7109375, "sample": [7.1143646240234375, 2.2911911010742188, 3.4322261810302734, 3.57177734375, 2.5792770385742188, -1.0864715576171875, 1.1668510437011719, -7.4894561767578125, -0.5718231201171875, 0.13452529907226562, -1.9798851013183594, -0.33306884765625, -0.581329345703125, -2.1117286682128906, 3.2873077392578125, 0.26453208923339844, 7.2104949951171875, 1.7973785400390625, 1.1726341247558594, -5.282958984375, 0.6831817626953125, 0.2693347930908203, 5.194370269775391, -0.2501373291015625, 1.5178890228271484, -1.4763221740722656, 8.862747192382812, 4.01007080078125, -7.204551696777344, 2.987987518310547, 0.5263996124267578, -6.150321960449219, 2.8516464233398438, 0.9759998321533203, 0.6382484436035156, 1.7074851989746094, 4.945838928222656, -1.593973159790039, 0.5028228759765625, 0.2698516845703125, 11.2398681640625, 3.1653404235839844, 1.8309974670410156, 0.5174789428710938, 3.0405311584472656, 3.9075775146484375, 3.0939102172851562, -0.4544410705566406, -1.077993392944336, -1.3273544311523438, 5.74822998046875, 6.466928482055664, 3.104461669921875, -3.0921783447265625, 1.5265998840332031, 1.7002525329589844, 6.437843322753906, 1.338165283203125, 1.8956432342529297, 2.9319000244140625, 10.756202697753906, 4.47283935546875, 14.432167053222656, -1.6094818115234375, 6.3251800537109375, 2.21270751953125, 0.25780487060546875, 3.3684005737304688, 1.4050140380859375, -0.6365089416503906, -5.538215637207031, 7.976261138916016, 3.4957656860351562, 0.6171245574951172, -1.0646209716796875, 0.06669998168945312, -0.9604167938232422, 2.8607254028320312, 0.4996604919433594, 0.8999176025390625, 0.4476737976074219, -0.657562255859375, 6.52630615234375, 0.30425262451171875, 7.288398742675781, -2.1429595947265625, 5.120765686035156, 0.9325942993164062, 1.2561092376708984, 6.281272888183594, 3.3793487548828125, -1.6277351379394531, 10.029541015625, 1.2024002075195312, 0.6359539031982422, 0.056797027587890625, 3.4072265625, -0.80181884765625, 2.470195770263672, 2.378814697265625, 2.5591087341308594, -4.0784759521484375, 1.3426971435546875, 5.42578125, 2.776691436767578, -0.24308204650878906, -0.29517364501953125, 12.27349853515625, 3.3286819458007812, 5.6546783447265625, -2.5899658203125, -5.0826416015625, 3.391754150390625, 3.6510009765625, -4.988258361816406, -1.7484359741210938, 6.745849609375, -1.6282730102539062, -0.8156967163085938, -4.306049346923828, 4.938690185546875, 0.2570953369140625, 4.768928527832031, 0.6827926635742188, 4.0024261474609375, 0.21973419189453125, -1.1975078582763672, 0.0947723388671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000083.npy"}
{"epoch": 0.2515151515151515, "step": 84, "batch_size": 128, "mean": 1.2717559337615967, "std": 3.661672592163086, "min": -9.159378051757812, "p10": -3.259578704833984, "median": 0.9588441848754883, "p90": 5.840610504150391, "max": 12.246826171875, "pos_frac": 0.6640625, "sample": [3.2841415405273438, 2.5335006713867188, 0.116302490234375, 6.029396057128906, -0.43253326416015625, 3.6359786987304688, 5.990081787109375, -0.0758056640625, 3.3311996459960938, 7.5801849365234375, 3.7518157958984375, -5.376365661621094, -1.1730308532714844, 0.1161651611328125, 1.548788070678711, -3.1610336303710938, -0.574315071105957, 2.1252288818359375, 2.0035476684570312, 6.0636444091796875, 6.894657135009766, 8.920310974121094, 3.3386993408203125, -1.6037063598632812, 0.5776138305664062, 4.300079345703125, 0.8922004699707031, 0.7108230590820312, 2.6834964752197266, 5.408119201660156, 2.579986572265625, 3.7150421142578125, 0.8366031646728516, 5.8335723876953125, 1.6065864562988281, 1.5160560607910156, 0.8246688842773438, 0.3175048828125, -2.8726272583007812, 0.5321140289306641, -0.6027183532714844, -9.159378051757812, 0.133544921875, 0.2655830383300781, -0.905517578125, 3.7369384765625, 4.549049377441406, 2.7786693572998047, 0.9202556610107422, 6.243072509765625, -1.3782119750976562, 3.9409637451171875, 5.4812469482421875, 0.6679229736328125, -0.844757080078125, 4.034248352050781, -2.5521926879882812, -3.4895172119140625, -0.1236114501953125, 5.857032775878906, 3.4377517700195312, -0.7085132598876953, -0.5585174560546875, -3.1123619079589844, 0.9974327087402344, -4.938270568847656, 4.757717132568359, -2.1840057373046875, 0.5003433227539062, -0.17284393310546875, 0.3684844970703125, -0.16909027099609375, 0.2224578857421875, 3.4563751220703125, 2.8073883056640625, 1.5353546142578125, 8.423049926757812, 0.15959739685058594, -0.5045623779296875, 3.691591262817383, 11.315139770507812, 4.143089294433594, 3.8178844451904297, -4.5123443603515625, 12.246826171875, -3.6106529235839844, -1.2423095703125, -0.1746673583984375, 1.5396881103515625, 3.707935333251953, 5.994293212890625, 1.2289237976074219, 0.4379005432128906, -0.9134654998779297, 0.07942962646484375, 2.3396377563476562, -4.448089599609375, -5.6682891845703125, -2.2464370727539062, -0.7138481140136719, 3.1585464477539062, 5.460273742675781, 1.2662353515625, 1.0355472564697266, -2.049549102783203, 4.160652160644531, 3.5646286010742188, 1.8873634338378906, -4.803680419921875, 0.21466064453125, -2.9634170532226562, -4.941154479980469, 1.0407695770263672, 2.73626708984375, -5.773063659667969, 5.086299896240234, 4.317893981933594, -5.904144287109375, 11.09259033203125, 1.028411865234375, -1.15802001953125, -3.0696067810058594, -6.043479919433594, 0.2240009307861328, 1.84979248046875, 3.4617843627929688, 2.7819366455078125, -0.08812713623046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000084.npy"}
{"epoch": 0.2545454545454545, "step": 85, "batch_size": 128, "mean": 1.119603157043457, "std": 3.7214748859405518, "min": -10.571121215820312, "p10": -3.4196647644042963, "median": 1.1048583984375, "p90": 5.94258804321289, "max": 8.608978271484375, "pos_frac": 0.6796875, "sample": [-0.5231704711914062, 1.7247390747070312, 4.558387756347656, -0.2866973876953125, 5.2140960693359375, 0.9416351318359375, 6.455413818359375, -0.9637851715087891, 3.3395614624023438, -3.3143768310546875, 2.3519821166992188, 1.1586112976074219, 2.0899658203125, 5.8855438232421875, 1.9592132568359375, 2.4719314575195312, 1.3149032592773438, -2.075418472290039, 1.0359458923339844, 0.3994789123535156, 8.468677520751953, -7.147956848144531, 3.277172088623047, -0.5241851806640625, 7.3512115478515625, 7.9955902099609375, -1.2804031372070312, 2.2199020385742188, 1.6623611450195312, -9.132919311523438, -8.33526611328125, 3.7524566650390625, 0.6639175415039062, 0.02779388427734375, -2.37890625, -0.6556282043457031, 7.18695068359375, 3.6824951171875, -4.236907958984375, -3.7831459045410156, -2.4641189575195312, 7.75921630859375, 0.705902099609375, 1.6818466186523438, -1.7847175598144531, 3.250804901123047, 1.348073959350586, -3.9925689697265625, -2.591411590576172, 5.337345123291016, 0.41269683837890625, 1.4313583374023438, 1.7633018493652344, -1.6388168334960938, 2.7293853759765625, -10.571121215820312, 5.1832427978515625, -2.5861053466796875, -1.018331527709961, 5.233013153076172, -3.3250656127929688, 5.444267272949219, 2.6315155029296875, 0.16771697998046875, 3.151947021484375, 0.24256134033203125, 0.16074562072753906, -1.5028915405273438, 1.0511054992675781, 0.6661529541015625, 0.6003932952880859, 3.0775222778320312, -1.5938301086425781, 3.0729827880859375, -3.6623992919921875, -2.2625350952148438, -1.2887535095214844, 6.2192230224609375, 0.21628952026367188, 0.5886859893798828, 4.800525665283203, 0.6101779937744141, -2.1704673767089844, 1.7629661560058594, -3.7701873779296875, 0.9459304809570312, 6.075691223144531, 3.233184814453125, 2.618072509765625, 1.6980743408203125, -1.8772811889648438, -2.2350540161132812, 2.054607391357422, 0.20892333984375, -5.581928253173828, 3.2169418334960938, -1.6302490234375, 4.450490951538086, -2.9018325805664062, 0.36185646057128906, 3.4461593627929688, 7.535858154296875, 3.0670166015625, 0.655548095703125, -3.124176025390625, 1.641876220703125, -3.6403961181640625, 4.723335266113281, -4.201292037963867, 7.875022888183594, 4.287055969238281, 5.5413665771484375, 3.4516983032226562, 0.1635456085205078, 0.90814208984375, -1.0631484985351562, 7.014720916748047, 8.608978271484375, 3.586517333984375, 8.0487060546875, 1.3070526123046875, 2.2331809997558594, -1.1475868225097656, 0.3431262969970703, 2.0037612915039062, 4.802951812744141, 3.6975173950195312, -6.723579406738281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000085.npy"}
{"epoch": 0.25757575757575757, "step": 86, "batch_size": 128, "mean": 1.6025245189666748, "std": 3.9474587440490723, "min": -8.703903198242188, "p10": -2.507826805114746, "median": 1.240264892578125, "p90": 5.112394714355468, "max": 18.042083740234375, "pos_frac": 0.7109375, "sample": [3.8167877197265625, 0.3336334228515625, 1.1300811767578125, 4.852691650390625, 1.7020378112792969, 5.868125915527344, 4.717586517333984, 2.0611724853515625, 1.5204830169677734, 1.9601554870605469, 1.0849227905273438, 0.1911163330078125, 0.8163509368896484, 1.9680633544921875, -3.158161163330078, 18.042083740234375, 1.3917694091796875, -0.962890625, 0.5608749389648438, 1.1259384155273438, -5.720634460449219, -7.4112701416015625, 4.9098052978515625, 11.359176635742188, -1.84912109375, 4.855926513671875, 3.2968482971191406, 5.3466644287109375, 1.4149761199951172, -0.4676361083984375, -1.0125083923339844, 5.7393798828125, 0.6035976409912109, 2.0348968505859375, -0.10029411315917969, -6.994140625, 4.020786285400391, 1.8533248901367188, 7.279052734375, 3.683563232421875, 3.1683387756347656, -0.4260406494140625, -7.9653778076171875, -1.8946990966796875, 4.160064697265625, -2.518674850463867, 3.785327911376953, -0.024749755859375, -5.610481262207031, 0.17911529541015625, 4.778423309326172, 3.9632186889648438, 3.949981689453125, 8.274246215820312, 3.8650665283203125, 1.0589752197265625, 3.45123291015625, 0.5366477966308594, -1.660797119140625, -8.703903198242188, 2.296600341796875, -0.34096527099609375, -0.5268630981445312, -1.5460205078125, -4.400306701660156, 2.7637901306152344, 0.7770538330078125, 1.6569061279296875, -4.30078125, 0.9905796051025391, 0.17828941345214844, 0.8233833312988281, 6.415077209472656, 2.0353546142578125, -2.5031776428222656, 4.39532470703125, 8.356178283691406, -0.2228240966796875, -0.9562740325927734, 0.8671188354492188, 4.6154327392578125, 14.12030029296875, -0.010570526123046875, -0.23250198364257812, 2.6224327087402344, 1.66412353515625, 3.2391510009765625, 0.12699508666992188, 1.7650604248046875, 0.16790390014648438, -5.024570465087891, 5.8306427001953125, 3.5195865631103516, -3.4655628204345703, 2.4861412048339844, 4.92254638671875, 5.011993408203125, 3.3584823608398438, 14.946990966796875, 0.32759857177734375, -1.85223388671875, 1.0375328063964844, -2.268585205078125, 4.1027374267578125, -0.7677764892578125, 1.12371826171875, -1.229644775390625, 4.4221954345703125, -1.6295623779296875, -2.6282501220703125, 2.7585906982421875, 2.8634185791015625, 0.7448711395263672, 2.635692596435547, 1.0019569396972656, 3.786529541015625, 2.7979278564453125, 1.2574615478515625, 1.2230682373046875, 2.0914535522460938, -1.4150238037109375, -1.7557525634765625, 0.9627857208251953, 4.049184799194336, 0.2651214599609375, 0.397186279296875, 3.924407958984375, 6.242362976074219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000086.npy"}
{"epoch": 0.2606060606060606, "step": 87, "batch_size": 128, "mean": 2.1159305572509766, "std": 4.438603401184082, "min": -8.013992309570312, "p10": -2.028190517425537, "median": 1.404489517211914, "p90": 7.530275726318358, "max": 20.17583465576172, "pos_frac": 0.671875, "sample": [-2.980051040649414, -7.406669616699219, -5.635829925537109, 1.6183547973632812, 0.5950546264648438, 7.1086273193359375, -0.2898826599121094, 0.30092620849609375, 5.878570556640625, -0.31183624267578125, 0.7044868469238281, 0.3018512725830078, 9.551254272460938, 1.6867904663085938, 0.19817733764648438, 7.352325439453125, -0.5162887573242188, 5.336662292480469, 3.1414566040039062, 0.47861289978027344, 2.4144325256347656, 1.6774520874023438, 8.815628051757812, 16.597503662109375, 15.007537841796875, 1.329681396484375, 2.3973236083984375, -1.947866439819336, 2.56768798828125, 8.442085266113281, 7.041416168212891, 0.3307037353515625, 0.8365039825439453, 2.9324798583984375, -0.8366432189941406, 2.1820945739746094, 20.17583465576172, -2.6180953979492188, -0.4180564880371094, 0.8818893432617188, -0.8289546966552734, -2.496185302734375, 3.3059463500976562, 0.021240234375, 2.5163021087646484, -0.25099945068359375, 4.4290771484375, 8.683685302734375, 2.968414306640625, 0.01932525634765625, 2.210538864135742, 1.1526718139648438, 0.7343673706054688, 0.00341033935546875, 2.6497840881347656, -7.028190612792969, -2.1287307739257812, -1.4525279998779297, -0.7174777984619141, 9.55511474609375, 0.9224700927734375, 1.1229114532470703, -1.46209716796875, -0.854888916015625, 5.9429473876953125, 9.509010314941406, 3.1845703125, -0.32212257385253906, 1.4095497131347656, 5.74090576171875, 1.3994293212890625, 2.5089569091796875, 2.8488388061523438, 1.8886871337890625, 4.825958251953125, -3.0514373779296875, 5.3038787841796875, 2.9703445434570312, -0.4319171905517578, 3.240419387817383, 7.277185440063477, -0.9029273986816406, 1.9140815734863281, 8.334487915039062, 7.078407287597656, -4.4897003173828125, -0.9416999816894531, 5.563232421875, 3.7546539306640625, -2.0171518325805664, 1.0566940307617188, 4.918815612792969, -0.19849014282226562, -0.1255931854248047, -1.828460693359375, -0.37359619140625, -2.0539474487304688, 2.3883495330810547, 2.6427688598632812, 7.392189025878906, 3.1225433349609375, -3.6026687622070312, 10.984115600585938, -1.5004806518554688, 5.821949005126953, -0.5774955749511719, 7.85247802734375, 5.07611083984375, 4.270378112792969, 2.9251174926757812, 1.980804443359375, 10.78094482421875, 1.6480789184570312, 7.05865478515625, -1.6564750671386719, 0.5087070465087891, 0.1503448486328125, 4.461677551269531, 2.4863967895507812, -0.5424270629882812, 5.2767486572265625, -1.0781021118164062, 0.02779388427734375, -8.013992309570312, -0.7544765472412109, -1.8002395629882812, -7.801948547363281, -0.6201171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000087.npy"}
{"epoch": 0.2636363636363636, "step": 88, "batch_size": 128, "mean": 1.6691548824310303, "std": 4.637127876281738, "min": -13.153091430664062, "p10": -3.8554676055908197, "median": 2.0946550369262695, "p90": 6.353694725036621, "max": 19.6839599609375, "pos_frac": 0.6640625, "sample": [-1.7466583251953125, 2.2775421142578125, -2.6840972900390625, 0.8946456909179688, 5.250907897949219, 4.133148193359375, -0.22475242614746094, 6.666351318359375, 7.436798095703125, 3.5978317260742188, 2.823373794555664, -3.6222496032714844, 2.8511505126953125, 3.325563430786133, 1.0875396728515625, 6.43548583984375, 2.933135986328125, -0.20500946044921875, -3.3801422119140625, 0.20751571655273438, -0.2751960754394531, 1.632415771484375, 0.8475399017333984, -0.6216659545898438, 11.351409912109375, 4.391973495483398, 2.1269474029541016, 4.326189041137695, 0.22642898559570312, -0.8482284545898438, 3.889801025390625, 5.402626037597656, 4.21795654296875, 1.7002983093261719, 3.0973434448242188, 4.007904052734375, 1.4537487030029297, 2.656780242919922, -0.25215911865234375, 3.723234176635742, 3.4013519287109375, 1.027252197265625, -2.8694000244140625, -5.46533203125, 6.40153694152832, -11.08056640625, 0.92205810546875, -0.8053741455078125, 2.873443603515625, -13.153091430664062, 1.466094970703125, 5.747001647949219, -4.78094482421875, -2.8092498779296875, 2.5972366333007812, -2.8959007263183594, -0.6390838623046875, 6.410102844238281, 5.4492950439453125, 19.6839599609375, 0.8481178283691406, 6.33319091796875, -6.654205322265625, 4.914348602294922, 9.921661376953125, -4.4517059326171875, 5.866111755371094, 3.065521240234375, 1.3943061828613281, 3.341796875, -4.3996429443359375, 2.2583389282226562, -0.38031768798828125, 3.528106689453125, 5.9786376953125, -1.1227493286132812, 5.5188140869140625, 2.7703170776367188, 5.326375961303711, 9.390640258789062, 2.9680938720703125, 5.4461517333984375, 2.5028648376464844, 4.04595947265625, 1.815093994140625, 11.917129516601562, 4.575340270996094, -5.915313720703125, 1.1795578002929688, 10.633155822753906, -0.82049560546875, 4.7350921630859375, -0.9203224182128906, 3.24566650390625, 2.6123924255371094, 0.3300819396972656, 0.4129371643066406, 2.0623626708984375, -3.175445556640625, -4.424247741699219, -3.206165313720703, 4.9512176513671875, -3.5090408325195312, 2.523160934448242, 9.029808044433594, -0.11376571655273438, -2.75286865234375, 1.4742355346679688, 2.2558975219726562, 2.866546630859375, -2.190826416015625, -0.23768997192382812, -5.478141784667969, 4.3790435791015625, 3.4017982482910156, 5.307582855224609, 2.2371444702148438, 0.7469558715820312, -5.173492431640625, -7.061225891113281, -2.2059478759765625, 2.0396041870117188, -1.0634613037109375, 13.439727783203125, -0.5747604370117188, -9.150161743164062, 4.986053466796875, -2.53692626953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000088.npy"}
{"epoch": 0.26666666666666666, "step": 89, "batch_size": 128, "mean": 2.617180109024048, "std": 3.9809391498565674, "min": -4.4785919189453125, "p10": -2.2598453521728517, "median": 2.0970163345336914, "p90": 7.737217712402341, "max": 18.82513427734375, "pos_frac": 0.75, "sample": [-1.1539154052734375, -1.2064933776855469, 1.3728065490722656, 11.030105590820312, 2.7034072875976562, 0.07562828063964844, 2.9197921752929688, 10.1910400390625, 0.6219329833984375, 2.047454833984375, -1.4648170471191406, 1.8707599639892578, 1.3161468505859375, 0.735107421875, 0.7905235290527344, 1.1771163940429688, 3.7329559326171875, 2.636810302734375, -0.4924774169921875, 5.663198471069336, 5.122840881347656, 0.06226348876953125, -0.458587646484375, 2.3581066131591797, 3.2682418823242188, 2.068796157836914, 12.871063232421875, 3.096202850341797, -3.1694183349609375, 6.45074462890625, 3.0020523071289062, 8.738456726074219, 11.24542236328125, 2.4102249145507812, 8.816265106201172, 7.4753265380859375, 5.026762008666992, -3.0888633728027344, 0.8828887939453125, -0.5681076049804688, 3.35565185546875, 3.4827041625976562, 5.217628479003906, 3.6567230224609375, 0.37720680236816406, 1.0051441192626953, 5.2324676513671875, 2.8765830993652344, 3.3700942993164062, 5.4811859130859375, 5.775856018066406, 12.874359130859375, 4.587684631347656, -1.67132568359375, 3.8704376220703125, 1.4117431640625, 4.149639129638672, 12.705413818359375, -1.38153076171875, 2.23126220703125, 5.312980651855469, 2.964265823364258, -1.9313507080078125, 1.0834503173828125, 2.7027721405029297, 6.110450744628906, 3.4176673889160156, -4.4785919189453125, 1.6435890197753906, 0.11095237731933594, 3.904125213623047, 5.756053924560547, 3.968090057373047, 0.7173843383789062, 1.9179191589355469, 2.0610599517822266, 4.6074066162109375, -0.0030727386474609375, 1.6676406860351562, 5.749298095703125, -1.3678970336914062, 6.757999420166016, 2.1409988403320312, 7.214973449707031, 1.6630401611328125, 2.1252365112304688, 8.370918273925781, -3.8660354614257812, 1.6050186157226562, -0.77923583984375, 1.9615325927734375, 11.022369384765625, 1.0022602081298828, 4.769989013671875, 1.9087657928466797, 2.210651397705078, 2.3880996704101562, 4.916576385498047, -2.339893341064453, 18.82513427734375, 5.283317565917969, -3.158998489379883, -2.9096298217773438, 0.6250343322753906, -1.8389244079589844, -0.255340576171875, -2.3065834045410156, 10.149139404296875, -3.2641448974609375, 1.5138511657714844, -0.402496337890625, 0.20721435546875, -3.3904876708984375, -0.04071807861328125, 4.547172546386719, 3.3015213012695312, -3.265613555908203, 2.1892852783203125, 0.8609085083007812, -2.572742462158203, 3.1206817626953125, -0.036716461181640625, -2.6714210510253906, -2.2398147583007812, 1.18701171875, 8.348297119140625, 5.866416931152344, -0.4464855194091797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000089.npy"}
{"epoch": 0.2696969696969697, "step": 90, "batch_size": 128, "mean": 1.219221830368042, "std": 3.8404386043548584, "min": -7.9571685791015625, "p10": -3.2013454437255855, "median": 1.2560968399047852, "p90": 5.111078262329101, "max": 13.424606323242188, "pos_frac": 0.6171875, "sample": [1.0401458740234375, -7.9571685791015625, 0.8134689331054688, -0.2978363037109375, 2.639698028564453, 0.8490867614746094, 1.163370132446289, 0.30744171142578125, -5.852378845214844, -1.0535945892333984, -1.7572021484375, -0.0269927978515625, -2.3738956451416016, 5.361698150634766, -6.9795989990234375, 5.36309814453125, 1.357391357421875, 1.8733444213867188, 0.085113525390625, -3.698383331298828, -4.5025787353515625, 13.424606323242188, 4.97479248046875, -6.033027648925781, 5.003669738769531, 12.796890258789062, 4.860809326171875, 1.3431415557861328, -2.873138427734375, -2.0374298095703125, 1.976419448852539, 4.211006164550781, 6.4832611083984375, 3.98321533203125, 2.986879348754883, 3.919393539428711, 2.9760589599609375, 0.745086669921875, 2.0287094116210938, -2.6170578002929688, -0.27979278564453125, -4.50201416015625, -3.0291213989257812, 3.9549026489257812, -0.7411441802978516, 1.5293006896972656, 2.3268089294433594, 7.442138671875, 3.59930419921875, 2.8216094970703125, 3.312318801879883, -0.6412467956542969, -1.5759658813476562, 7.4408416748046875, 8.51312255859375, 3.9568939208984375, -0.6766395568847656, 1.1690521240234375, 1.5981903076171875, 0.5793991088867188, -4.6417236328125, 0.0473480224609375, 2.6071395874023438, -0.8325939178466797, 1.80853271484375, 0.5739212036132812, 1.7976417541503906, 4.365959167480469, -1.52001953125, 3.3022079467773438, -5.999267578125, 4.285877227783203, -2.8899993896484375, 4.342010498046875, -3.103778839111328, -4.936958312988281, 4.157501220703125, 2.396726608276367, 1.668487548828125, 4.21826171875, 5.375389099121094, 4.270423889160156, 3.8150405883789062, 4.943504333496094, -2.938770294189453, 10.937522888183594, -0.8259868621826172, -1.4993419647216797, 6.293422698974609, -2.7352733612060547, 2.5254135131835938, -3.593372344970703, 3.4072952270507812, -0.8394012451171875, -5.74468994140625, -0.9405899047851562, -2.1005096435546875, -0.3830127716064453, -0.44432830810546875, 0.3894157409667969, 2.40618896484375, -2.177164077758789, 3.8097686767578125, 2.264556884765625, 1.7099075317382812, 1.641763687133789, -1.6395416259765625, 1.0485687255859375, 3.0323143005371094, 0.9597721099853516, -0.17524337768554688, 4.011222839355469, -0.4828948974609375, 3.719684600830078, 4.9122467041015625, -1.690399169921875, -2.486713409423828, 0.5008831024169922, 11.323410034179688, 1.7547988891601562, 7.2205810546875, -3.4290008544921875, 4.1764678955078125, -2.7338638305664062, -3.0333404541015625, 2.6107635498046875, 4.3004913330078125, -0.3597412109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000090.npy"}
{"epoch": 0.2727272727272727, "step": 91, "batch_size": 128, "mean": 2.1246557235717773, "std": 4.003537654876709, "min": -10.567794799804688, "p10": -2.1730152130126954, "median": 1.902024269104004, "p90": 7.574102020263671, "max": 14.916656494140625, "pos_frac": 0.703125, "sample": [-3.128032684326172, 0.8452339172363281, -4.4861602783203125, -0.6152591705322266, 4.176872253417969, 0.1937236785888672, 3.6364593505859375, -1.973358154296875, 3.5081748962402344, -0.40592193603515625, 5.475372314453125, -10.567794799804688, 7.28143310546875, 6.743766784667969, 0.39606475830078125, -0.9420166015625, 14.916656494140625, -3.149263381958008, 3.8267059326171875, 0.9467124938964844, -2.203998565673828, 8.155548095703125, -0.6071243286132812, -1.3906784057617188, 0.39382171630859375, -0.46851348876953125, 0.4487457275390625, 2.2114791870117188, 5.035285949707031, -0.6421432495117188, 5.84991455078125, 2.646453857421875, 2.4427032470703125, -0.68963623046875, 5.6934967041015625, 0.23290634155273438, 0.8679313659667969, -3.542236328125, -1.1879310607910156, 0.4563140869140625, 7.8542633056640625, 10.011917114257812, -0.2176666259765625, 10.656074523925781, -6.0474853515625, -1.473785400390625, 3.9153213500976562, 1.5580215454101562, 1.9301624298095703, 1.4544811248779297, 9.201728820800781, 3.2859573364257812, 2.5703201293945312, -1.5145301818847656, 2.8376293182373047, 2.527721405029297, 4.8492584228515625, 1.8738861083984375, 3.1688079833984375, -1.0852851867675781, 3.8662948608398438, 0.36033058166503906, 2.0372238159179688, 2.25738525390625, 5.711578369140625, -2.429716110229492, 1.3485088348388672, -0.8212051391601562, 3.0172805786132812, 5.402553558349609, -0.5410346984863281, -7.427280426025391, 0.646270751953125, 0.5666847229003906, 0.9177703857421875, 2.737579345703125, 2.4876480102539062, 0.36389732360839844, 6.555145263671875, 3.9434814453125, 5.48089599609375, 1.5898628234863281, 10.987213134765625, -4.6406707763671875, -3.212066650390625, 3.7430343627929688, 6.059478759765625, -0.0043964385986328125, 0.74346923828125, 1.0054702758789062, 2.2362899780273438, 3.2906723022460938, 3.6406784057617188, 2.3350582122802734, 2.94329833984375, -0.4335823059082031, -2.9717864990234375, 6.441436767578125, 3.592233657836914, -0.001369476318359375, 9.1768798828125, 3.7557449340820312, -0.2568168640136719, 9.3056640625, 0.7591018676757812, 8.324981689453125, 4.256893157958984, 3.770204544067383, 9.55523681640625, 6.280248641967773, 10.191192626953125, 3.001676559448242, 2.0564422607421875, -2.108919143676758, 6.6311798095703125, 9.899139404296875, 3.679567337036133, 7.454032897949219, 0.6690521240234375, -1.5972518920898438, 1.2383346557617188, 2.175912857055664, 0.33379364013671875, 1.7237911224365234, -2.1597366333007812, -1.7980575561523438, -1.9929046630859375, -3.9335556030273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000091.npy"}
{"epoch": 0.27575757575757576, "step": 92, "batch_size": 128, "mean": 1.4692165851593018, "std": 4.016690731048584, "min": -10.981437683105469, "p10": -2.964550018310547, "median": 0.9276647567749023, "p90": 7.012417221069335, "max": 14.784423828125, "pos_frac": 0.625, "sample": [-0.79400634765625, 0.1502227783203125, -4.371246337890625, -1.2350883483886719, 0.4211273193359375, 1.7258758544921875, -4.09063720703125, 5.4755706787109375, -0.3925514221191406, 0.708160400390625, 3.35296630859375, 2.9042282104492188, 7.96759033203125, -1.8042984008789062, 4.278358459472656, -1.7945919036865234, -1.3726119995117188, 6.5423126220703125, -0.9959945678710938, -1.5621109008789062, 4.962394714355469, 6.8837127685546875, -10.981437683105469, 3.544689178466797, -3.893798828125, -3.9638137817382812, -0.356689453125, 6.0525970458984375, 0.3261241912841797, 1.1558303833007812, 10.592056274414062, -0.3099517822265625, -1.0901336669921875, -5.586936950683594, -2.9760284423828125, 0.31565093994140625, 0.6133499145507812, -2.630218505859375, 1.1655044555664062, 1.7324695587158203, 4.746978759765625, -1.2227363586425781, 8.285064697265625, -0.22797393798828125, -2.959869384765625, -1.818817138671875, -1.2733001708984375, -7.172393798828125, -3.6748085021972656, -2.9571685791015625, 7.1967926025390625, -0.4871559143066406, 0.7292118072509766, 1.4557609558105469, 0.5873222351074219, 1.1518096923828125, 9.333221435546875, -1.13714599609375, -1.8449783325195312, -0.388336181640625, 3.5391998291015625, 8.035263061523438, 2.9926223754882812, 5.8085174560546875, 0.544281005859375, 1.2950630187988281, 3.9006309509277344, 2.743043899536133, 3.7815475463867188, 2.95562744140625, 1.0387020111083984, -3.5552978515625, 4.439266204833984, 4.225498199462891, 0.09430885314941406, 9.718994140625, -2.9754714965820312, 3.352214813232422, 1.2147026062011719, 6.933399200439453, 1.3040847778320312, -0.09345436096191406, -1.34136962890625, 2.181304931640625, 4.912067413330078, 2.9765090942382812, -1.7373046875, 4.625877380371094, 0.026727676391601562, 6.356914520263672, -0.11151123046875, 0.3717041015625, 1.0804710388183594, 1.4574356079101562, 14.784423828125, 6.15631103515625, 2.2227554321289062, -3.70965576171875, -0.11248016357421875, 2.859893798828125, 1.6418266296386719, 2.2369136810302734, 1.2129554748535156, -1.0603485107421875, 0.8601188659667969, 4.543113708496094, -0.6284904479980469, 13.340927124023438, 9.739234924316406, 1.845510482788086, 8.237991333007812, -1.2567977905273438, -2.7539749145507812, 2.3247909545898438, 8.567543029785156, -0.40842247009277344, 8.407089233398438, 0.9952106475830078, -2.6395263671875, 2.1034622192382812, -1.8516368865966797, 1.8847885131835938, 1.202392578125, 0.4109344482421875, -3.6131820678710938, 0.259368896484375, -0.827850341796875, 0.0028533935546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000092.npy"}
{"epoch": 0.2787878787878788, "step": 93, "batch_size": 128, "mean": 1.9722445011138916, "std": 4.83702278137207, "min": -10.644302368164062, "p10": -3.5564762115478517, "median": 1.9201936721801758, "p90": 8.1975341796875, "max": 19.19781494140625, "pos_frac": 0.65625, "sample": [2.5168190002441406, 5.3247528076171875, 1.0159683227539062, 1.8836898803710938, 8.917587280273438, -7.586402893066406, 0.16819000244140625, 3.6105270385742188, 8.971012115478516, 1.5432167053222656, 1.7503471374511719, 3.1558399200439453, -1.7525558471679688, 5.998661041259766, 1.2157554626464844, 4.394645690917969, 2.865377426147461, 9.188461303710938, -3.3773422241210938, 3.9203948974609375, 4.307525634765625, 3.1111679077148438, 2.5110855102539062, 1.9566974639892578, 6.8280487060546875, -0.45401954650878906, 6.248779296875, -0.45345306396484375, 9.52099609375, 0.5718975067138672, -2.7108535766601562, 2.4189987182617188, -3.5069580078125, -2.0024337768554688, 10.99380111694336, 6.144233703613281, -3.2192764282226562, 2.7894287109375, 2.7112884521484375, 3.1795997619628906, 1.7024688720703125, 2.1040802001953125, 0.643096923828125, 1.4490928649902344, 2.41693115234375, 9.020034790039062, -0.6455516815185547, -5.0400238037109375, -5.938606262207031, 3.8165130615234375, -2.3767337799072266, 8.8038330078125, 6.2425689697265625, -10.644302368164062, 2.856395721435547, -0.4950675964355469, -3.7107391357421875, -7.397621154785156, 2.3319149017333984, -3.60516357421875, -1.0045318603515625, 6.5422515869140625, 5.299459457397461, 4.468496322631836, 8.160926818847656, 3.9317779541015625, -2.4115447998046875, -0.21001434326171875, -0.16455841064453125, 7.0831298828125, 4.7950592041015625, -6.456150054931641, -0.03841400146484375, 3.9909744262695312, 5.249610900878906, -5.802093505859375, 1.16876220703125, 1.5936813354492188, 2.82706356048584, 8.282951354980469, 5.014892578125, -8.59417724609375, -0.09959602355957031, -1.9581108093261719, 5.6773529052734375, 7.346885681152344, 3.529590606689453, -3.5356101989746094, -1.8112030029296875, 2.9005889892578125, 10.76190185546875, 0.7650279998779297, 13.80908203125, 15.4619140625, 2.5709228515625, -3.1458892822265625, -0.1854705810546875, 2.5968551635742188, 1.0237960815429688, -2.8668365478515625, -1.396240234375, 3.9682445526123047, 19.19781494140625, 4.729379653930664, -7.132743835449219, -3.4338607788085938, 0.5962181091308594, -2.16162109375, 0.04193115234375, -0.5785560607910156, -0.4140777587890625, 5.096132278442383, 0.4063453674316406, -5.2902069091796875, -4.22492790222168, -0.8878860473632812, 3.61358642578125, -0.01129150390625, 4.386692047119141, 6.7904510498046875, 11.834548950195312, 4.747188568115234, -1.7955818176269531, 4.307090759277344, 0.8685150146484375, 3.255096435546875, 1.4042205810546875, 1.7574577331542969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000093.npy"}
{"epoch": 0.2818181818181818, "step": 94, "batch_size": 128, "mean": 1.7266556024551392, "std": 4.5816426277160645, "min": -13.5408935546875, "p10": -3.2697351455688475, "median": 1.6258296966552734, "p90": 7.058432006835937, "max": 17.345855712890625, "pos_frac": 0.65625, "sample": [2.6921463012695312, -13.5408935546875, 9.858978271484375, 10.872047424316406, 16.190719604492188, 7.485420227050781, 7.29132080078125, 0.9525432586669922, 0.40563392639160156, 3.9817047119140625, -0.6823883056640625, 3.411355972290039, -10.40582275390625, -5.438421249389648, 5.7344818115234375, 0.9525203704833984, -3.436023712158203, -4.08160400390625, 0.34014892578125, 2.41192626953125, 1.5957489013671875, 2.8183937072753906, -3.6676788330078125, -0.3985595703125, 4.9174652099609375, 5.392181396484375, 2.344940185546875, -5.8571929931640625, 5.940254211425781, 17.345855712890625, 1.8601932525634766, 8.349533081054688, 7.140228271484375, -0.5634117126464844, -1.13885498046875, 3.4395904541015625, -1.7717208862304688, 6.2924957275390625, 3.07049560546875, 2.570648193359375, 0.66619873046875, -0.06543350219726562, 2.9364471435546875, 7.17010498046875, -2.1845550537109375, 4.1279754638671875, -0.22069168090820312, 7.805320739746094, -3.385160446166992, -0.2871131896972656, 4.1616363525390625, 0.1806163787841797, -1.4600677490234375, 2.9364700317382812, 6.6303863525390625, 0.8603782653808594, -6.583953857421875, -3.3100738525390625, 1.4050064086914062, 4.527214050292969, -2.3641510009765625, -1.1959724426269531, -3.6973419189453125, -1.7178459167480469, 3.3052978515625, -1.030670166015625, 2.7146377563476562, -3.2524471282958984, 0.41846466064453125, 3.6831283569335938, 4.892425537109375, 0.9144382476806641, 1.6039276123046875, 5.5927734375, -1.542989730834961, -1.6463985443115234, 3.3939437866210938, 2.384521484375, -2.8540115356445312, -4.5155029296875, 6.6566162109375, 7.02337646484375, 11.457191467285156, 0.041412353515625, 2.0760650634765625, 3.0062255859375, -2.1365432739257812, -0.626068115234375, 2.285137176513672, 3.440021514892578, -2.47601318359375, -11.831085205078125, -3.0723876953125, 0.8573036193847656, -1.7380218505859375, 2.7717514038085938, 6.445526123046875, 2.8488311767578125, 0.3833160400390625, 13.129806518554688, 6.956554412841797, 2.8752212524414062, -1.827178955078125, -0.2059612274169922, 4.5628662109375, -0.3130035400390625, -0.6172981262207031, 3.808074951171875, 1.4381637573242188, 2.739459991455078, 4.466585159301758, -1.0213088989257812, 7.887866973876953, -2.1180648803710938, 2.4670562744140625, 1.6477317810058594, 2.5341415405273438, -1.178802490234375, 1.5833930969238281, 2.9492340087890625, 0.7474422454833984, -0.8325347900390625, 3.2751617431640625, 4.108518600463867, 3.958925247192383, 2.8685035705566406, 0.9042778015136719, 1.1310882568359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000094.npy"}
{"epoch": 0.28484848484848485, "step": 95, "batch_size": 128, "mean": 2.089350938796997, "std": 4.616724967956543, "min": -11.86871337890625, "p10": -3.838589859008789, "median": 2.1344966888427734, "p90": 7.653237915039062, "max": 15.27716064453125, "pos_frac": 0.7109375, "sample": [-1.9220657348632812, -0.15435791015625, 1.2544898986816406, -0.8905715942382812, -9.5960693359375, 5.0291595458984375, 5.199977874755859, 5.545886993408203, 3.9057083129882812, 4.084659576416016, -0.12414932250976562, 0.4451637268066406, 0.9624404907226562, 5.176534652709961, 1.3704948425292969, 8.740386962890625, 5.907447814941406, 6.3050537109375, -0.6451072692871094, 5.805950164794922, 0.7384719848632812, -0.8889694213867188, 2.376974105834961, 1.802337646484375, -2.349639892578125, -1.6023712158203125, -7.896369934082031, 6.352195739746094, 1.2067718505859375, 2.7325820922851562, 9.614700317382812, 3.9102325439453125, -0.4676799774169922, -2.991607666015625, -6.6626434326171875, 3.462299346923828, -0.045684814453125, -0.9520301818847656, -2.4666748046875, -1.7018280029296875, 2.012115478515625, 1.7249698638916016, 5.0001220703125, 0.8786201477050781, 0.4313678741455078, 2.0030479431152344, -6.1031494140625, -11.86871337890625, 0.7944011688232422, 0.437957763671875, 3.1122817993164062, -0.25757598876953125, -4.850944519042969, 3.2478485107421875, -5.46246337890625, -1.0084037780761719, -2.2925758361816406, -3.8809967041015625, 1.2785911560058594, -2.042755126953125, 2.0231056213378906, -3.820415496826172, 12.665023803710938, 4.968902587890625, 1.0053901672363281, 11.354446411132812, 8.931808471679688, -8.931427001953125, 4.742835998535156, 5.462532043457031, 0.8106231689453125, 2.88006591796875, -0.13083648681640625, 3.484954833984375, -1.027639389038086, 0.3092689514160156, 3.8161163330078125, -0.6093292236328125, -4.042266845703125, 2.2458877563476562, 5.9523162841796875, 15.27716064453125, 2.4981842041015625, 2.3024730682373047, 7.608551025390625, 1.179229736328125, 0.26754188537597656, 6.456596374511719, 11.767608642578125, 3.6404285430908203, 1.103719711303711, 3.648670196533203, 0.8627986907958984, 3.807647705078125, 4.6949005126953125, 9.71319580078125, -5.906343460083008, 11.052131652832031, 6.413471221923828, 4.204887390136719, 6.535762786865234, 0.8066482543945312, 2.622814178466797, 3.9919281005859375, -2.2137680053710938, 1.1657638549804688, -6.9297332763671875, 3.7916259765625, 2.938091278076172, 2.9582881927490234, 8.582801818847656, -0.4064979553222656, 3.74566650390625, 5.887786865234375, 3.576904296875, 12.929824829101562, 4.393627166748047, -4.802085876464844, 0.9864006042480469, 6.43536376953125, 7.75750732421875, 2.431060791015625, 3.5356788635253906, 7.846168518066406, 1.396026611328125, 2.4864501953125, 5.880645751953125, 2.696096420288086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000095.npy"}
{"epoch": 0.2878787878787879, "step": 96, "batch_size": 128, "mean": 1.407242774963379, "std": 4.4752936363220215, "min": -17.05029296875, "p10": -3.3648969650268556, "median": 1.0349607467651367, "p90": 7.362012481689452, "max": 14.57196044921875, "pos_frac": 0.6171875, "sample": [9.663787841796875, -3.357088088989258, -3.042022705078125, 8.710250854492188, 3.1671714782714844, -0.616424560546875, 6.614227294921875, 9.163551330566406, -3.7789077758789062, 2.080047607421875, 14.57196044921875, 3.2782039642333984, 7.1948394775390625, 8.3675537109375, 2.8211593627929688, 0.079193115234375, -0.37099456787109375, 4.112220764160156, 7.6050567626953125, -9.20880126953125, -0.43572998046875, -6.871673583984375, 1.4415130615234375, 4.981269836425781, -0.4581470489501953, 1.3746414184570312, -0.8577976226806641, -0.28803443908691406, 9.2197265625, -7.678581237792969, -2.034637451171875, 13.221099853515625, 0.39232826232910156, 1.9959640502929688, -3.38311767578125, 6.696697235107422, 1.9154815673828125, -0.5273895263671875, 6.3101959228515625, 1.028421401977539, -4.2787322998046875, 2.1921539306640625, -0.5470733642578125, 3.270732879638672, -0.28002166748046875, -0.9579753875732422, -1.05712890625, -0.43079376220703125, 2.851024627685547, 7.0426025390625, 0.6099472045898438, 2.5833206176757812, 1.0918941497802734, 2.885364532470703, -2.1513595581054688, 5.6982574462890625, 5.307506561279297, -17.05029296875, 2.9964466094970703, -0.8995208740234375, 0.8815155029296875, 2.933990478515625, -7.019248962402344, -3.1970901489257812, -7.310577392578125, -0.5514144897460938, 0.5725555419921875, 2.0045013427734375, 3.9840316772460938, 0.304779052734375, -3.3081417083740234, 8.0372314453125, 7.649349212646484, -3.9561614990234375, -2.1669063568115234, 4.153083801269531, 3.8215789794921875, 10.003807067871094, 0.6276607513427734, 0.2756080627441406, -1.0928287506103516, 1.6540851593017578, -3.5915756225585938, 1.0415000915527344, -0.10114288330078125, -1.890188217163086, -2.369384765625, 7.8485565185546875, 3.7358245849609375, 0.891876220703125, -0.4136505126953125, 2.331897735595703, -0.09979248046875, 6.5386962890625, -0.07329368591308594, 1.5031585693359375, 2.1483306884765625, 0.9305953979492188, -2.9744606018066406, 2.04681396484375, 1.6791915893554688, 0.5015583038330078, 5.686973571777344, -4.28125, -1.9911956787109375, 0.6775455474853516, -1.4203338623046875, 3.5822525024414062, 1.5651397705078125, -0.27947235107421875, 6.206142425537109, 7.257850646972656, 1.3657665252685547, -0.206512451171875, -4.846340179443359, 5.390531539916992, 8.930740356445312, 2.633584976196289, -1.1010398864746094, 1.461843490600586, 0.2958946228027344, -0.9449043273925781, 5.874153137207031, 3.2588424682617188, 1.2623863220214844, -2.931253433227539, 2.5090370178222656, 0.21121597290039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000096.npy"}
{"epoch": 0.2909090909090909, "step": 97, "batch_size": 128, "mean": 1.4855334758758545, "std": 4.20941686630249, "min": -7.666595458984375, "p10": -3.459672927856445, "median": 1.1126785278320312, "p90": 6.3168384552001955, "max": 16.11883544921875, "pos_frac": 0.6484375, "sample": [1.0358734130859375, -1.7946224212646484, 1.9754467010498047, 0.4004554748535156, 1.1553459167480469, -3.183513641357422, 6.3317413330078125, 2.3809127807617188, -1.26373291015625, 3.7183876037597656, 0.6041259765625, -0.794525146484375, 0.034862518310546875, 4.082592010498047, 5.429538726806641, -1.5763664245605469, -1.5367660522460938, 6.4680633544921875, -2.1742630004882812, -1.280975341796875, -3.3527450561523438, -4.0403900146484375, 4.059783935546875, 3.7233314514160156, 0.12964820861816406, 5.959228515625, 4.551231384277344, 0.2785530090332031, 7.543281555175781, 5.097690582275391, 0.8466758728027344, -1.49700927734375, -5.672210693359375, 1.6188697814941406, 5.1638336181640625, 0.95880126953125, 5.486179351806641, -1.3564453125, 6.310451507568359, -0.1577301025390625, 6.302650451660156, 5.9671173095703125, 2.7843151092529297, 1.9852752685546875, 0.11206817626953125, 0.6549358367919922, 3.3139419555664062, -2.692577362060547, 4.776275634765625, -3.06671142578125, 1.4166679382324219, 10.059837341308594, -1.6030158996582031, -7.1222686767578125, -0.03317832946777344, 0.5168209075927734, 10.174652099609375, -3.7091712951660156, 3.3888931274414062, 3.353849411010742, -2.9780044555664062, 5.450550079345703, -0.1578521728515625, 0.7811737060546875, 4.077781677246094, -4.346519470214844, 6.290840148925781, -1.7561149597167969, -1.1827239990234375, 1.4287910461425781, -5.538604736328125, 1.4754085540771484, 8.738121032714844, -6.1240692138671875, 0.4292488098144531, 0.080841064453125, -1.3243560791015625, 2.4134769439697266, 5.929481506347656, -2.2035980224609375, -3.2878456115722656, 0.22729110717773438, -4.390710830688477, -0.9780445098876953, 4.139223098754883, 12.368377685546875, 3.2430343627929688, 7.308221817016602, 16.11883544921875, 4.321441650390625, 3.6385936737060547, -3.2588348388671875, -1.37359619140625, 3.047626495361328, 6.927085876464844, 9.20159912109375, -1.7452621459960938, 8.107269287109375, 0.467681884765625, 3.4029769897460938, 1.2763671875, 4.4432220458984375, -7.010955810546875, -7.164588928222656, -0.8188095092773438, 4.196319580078125, -0.4332008361816406, 4.191314697265625, 1.9858856201171875, 5.09039306640625, 1.4874591827392578, 4.351497650146484, -5.658603668212891, -0.05197906494140625, 1.3039474487304688, 0.15218734741210938, 9.935287475585938, 5.660242080688477, 2.869365692138672, -7.666595458984375, 2.51629638671875, 1.0114326477050781, 1.0700111389160156, 3.8501052856445312, 1.29095458984375, -3.7903823852539062, -2.3093833923339844, -2.842317581176758], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000097.npy"}
{"epoch": 0.29393939393939394, "step": 98, "batch_size": 128, "mean": 1.7936640977859497, "std": 3.8789114952087402, "min": -9.333663940429688, "p10": -2.771893310546875, "median": 2.1820030212402344, "p90": 5.982435226440429, "max": 15.59344482421875, "pos_frac": 0.6953125, "sample": [3.0636978149414062, 3.985677719116211, -2.0473175048828125, 0.4147491455078125, 7.321577072143555, 4.2296600341796875, 2.3361854553222656, 2.15167236328125, -1.7882919311523438, 4.742679595947266, -0.09313201904296875, 4.7825469970703125, -4.7594146728515625, -2.6630783081054688, 2.9531707763671875, 3.6101837158203125, 4.222553253173828, 5.287322998046875, 5.154918670654297, -1.8350906372070312, -0.0894622802734375, 4.21807861328125, 5.3882293701171875, 6.9720458984375, -2.7167510986328125, 2.2175216674804688, 2.060638427734375, -3.39276123046875, 0.174713134765625, 4.064563751220703, 4.2812957763671875, 3.2418441772460938, 4.237548828125, 9.4031982421875, 3.5186386108398438, 2.4390182495117188, -2.0673789978027344, -3.0079421997070312, -9.333663940429688, -6.94879150390625, 4.56390380859375, -1.1467723846435547, 3.3408126831054688, 6.96014404296875, -0.7877044677734375, 5.741661071777344, 0.3689441680908203, 0.24997711181640625, -0.5473670959472656, -1.73797607421875, 1.0121231079101562, -0.14483642578125, 0.7596549987792969, 2.033597946166992, -6.106353759765625, 3.9513702392578125, 7.1157989501953125, -2.9005584716796875, -2.1539840698242188, 8.579536437988281, -0.25136566162109375, 5.111087799072266, 4.557708740234375, 1.5552139282226562, 4.168701171875, -7.743600845336914, 0.7266025543212891, -3.8875732421875, 0.4978485107421875, -1.1677474975585938, -4.791473388671875, 2.89263916015625, -6.108890533447266, 4.7831268310546875, -2.375537872314453, 6.229076385498047, -1.9370536804199219, 0.7503166198730469, 3.0176658630371094, -0.6248035430908203, 0.779083251953125, -0.1605052947998047, 2.487346649169922, 2.3909454345703125, -1.1107177734375, 4.184898376464844, -1.6083297729492188, 5.0748443603515625, 0.5999908447265625, 1.7293853759765625, 3.4627456665039062, 4.666940689086914, 0.043598175048828125, 3.8758468627929688, 1.2970695495605469, 5.538665771484375, 3.2130279541015625, -2.4544219970703125, -3.161773681640625, 7.093299865722656, 0.6077728271484375, 1.70208740234375, 4.113399505615234, 3.7780914306640625, 2.2135276794433594, 2.4163665771484375, 11.761184692382812, 3.4713973999023438, 6.304107666015625, 15.59344482421875, 0.2366199493408203, 2.4381675720214844, 3.1492767333984375, 5.876731872558594, 0.4576377868652344, -0.9707412719726562, 0.9512481689453125, -3.4571456909179688, 0.6654701232910156, 2.788299560546875, 5.392162322998047, 10.779365539550781, 2.2123336791992188, -1.6184463500976562, 2.3787612915039062, -0.5484733581542969, 0.36458587646484375, 8.305068969726562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000098.npy"}
{"epoch": 0.296969696969697, "step": 99, "batch_size": 128, "mean": 2.0569705963134766, "std": 4.282451629638672, "min": -15.168777465820312, "p10": -2.9575302124023435, "median": 2.253650188446045, "p90": 6.812407684326171, "max": 12.771339416503906, "pos_frac": 0.6953125, "sample": [-0.0576934814453125, 6.250526428222656, 3.936185836791992, 1.47369384765625, 10.026397705078125, 2.1120834350585938, -5.3564453125, -15.168777465820312, 4.310356140136719, 7.213748931884766, 8.862911224365234, 12.771339416503906, -0.3343963623046875, 2.4262466430664062, 3.4578323364257812, 3.1997528076171875, -10.153823852539062, 2.5006332397460938, -3.608440399169922, -6.594028472900391, 3.7022933959960938, -1.1932525634765625, 6.7757568359375, 0.492462158203125, 0.8063583374023438, 4.29279899597168, 2.5938587188720703, 6.33087158203125, 4.084892272949219, 3.150928497314453, 0.7966632843017578, -4.304660797119141, 0.37223052978515625, 3.3673553466796875, 6.078212738037109, 6.897926330566406, -3.8309059143066406, -1.43389892578125, 1.8147754669189453, 7.0055694580078125, 3.9158096313476562, -0.12398529052734375, 4.2706451416015625, -1.4458675384521484, 1.3261890411376953, 7.333610534667969, 2.0951271057128906, 9.712997436523438, -1.2930107116699219, 4.910133361816406, 4.918067932128906, -2.0767059326171875, 0.1453704833984375, 6.471954345703125, -2.1060943603515625, -1.9577255249023438, 10.984207153320312, 2.9022216796875, 2.1799192428588867, -1.6144447326660156, 0.2775421142578125, 9.624740600585938, 3.144947052001953, 1.0965461730957031, -3.8195724487304688, 3.311504364013672, -0.49344635009765625, -0.10329437255859375, -1.4327106475830078, 6.718955993652344, -0.3663158416748047, 3.912689208984375, -3.04339599609375, 5.597076416015625, 6.1890106201171875, 0.8714427947998047, 0.3234405517578125, 3.034698486328125, 6.1038360595703125, -6.0953521728515625, -2.9207305908203125, -5.9483795166015625, 0.8845100402832031, -1.0138359069824219, 1.8759517669677734, -0.2233409881591797, -0.48802947998046875, -1.2481880187988281, 4.8955583572387695, -1.381072998046875, -0.49596405029296875, 6.2664031982421875, 3.4919815063476562, 2.1514930725097656, 0.18300628662109375, 3.318572998046875, 1.285400390625, 2.5419387817382812, 3.4361648559570312, 5.986175537109375, 1.5690326690673828, -1.3881111145019531, 9.151206970214844, 3.2209129333496094, 7.870330810546875, -7.40570068359375, 1.4922409057617188, 0.6921367645263672, 9.657958984375, 4.309436798095703, 5.756843566894531, -1.9283065795898438, 3.64013671875, 5.108634948730469, 5.117832183837891, 5.824148178100586, 4.927284240722656, 6.6039581298828125, -1.9659080505371094, 3.528543472290039, 5.519561767578125, -1.531778335571289, -3.360048294067383, 2.0876197814941406, 2.327381134033203, 4.131660461425781, 3.7203369140625, 1.548177719116211], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000099.npy"}
{"epoch": 0.3, "step": 100, "batch_size": 128, "mean": 2.6003317832946777, "std": 4.610889911651611, "min": -10.623886108398438, "p10": -2.8448448181152344, "median": 2.751842498779297, "p90": 7.878493881225586, "max": 15.402877807617188, "pos_frac": 0.7265625, "sample": [7.401149749755859, 4.270809173583984, 5.8556671142578125, 1.0107040405273438, 2.8507823944091797, -2.0602035522460938, 13.0447998046875, 3.4842071533203125, 4.662994384765625, 1.4339790344238281, 1.29254150390625, 3.6576900482177734, -1.7826385498046875, -2.5506553649902344, 2.6012420654296875, 5.700736999511719, 4.9943084716796875, -1.3039779663085938, -0.2034139633178711, 0.88934326171875, 11.78863525390625, -5.663703918457031, -3.009613037109375, 1.0927047729492188, 2.327014923095703, 3.899261474609375, 0.366424560546875, -7.389656066894531, -6.8198394775390625, 6.408294677734375, -2.80377197265625, 1.1476554870605469, 6.154632568359375, 7.305023193359375, 12.233306884765625, -3.4651336669921875, 6.9995269775390625, 2.736724853515625, 3.8657989501953125, -2.69903564453125, 0.3988628387451172, 3.9891510009765625, 2.3631210327148438, -2.4560546875, 3.087635040283203, 5.129524230957031, 2.7669601440429688, -0.20457839965820312, -4.660026550292969, 7.040252685546875, 6.5167388916015625, 3.478607177734375, -0.13790130615234375, 5.841339111328125, 4.445957183837891, 1.4957923889160156, 3.6750736236572266, 2.05133056640625, 6.3955230712890625, 0.84979248046875, 5.6817779541015625, -5.935417175292969, 7.751365661621094, 1.1722831726074219, 1.4919281005859375, 6.354881286621094, 12.58221435546875, 9.443229675292969, 3.901519775390625, 1.104257583618164, 6.7232818603515625, 4.91790771484375, 0.24858856201171875, -0.1608428955078125, 8.060829162597656, -2.32659912109375, 2.137176513671875, -5.417449951171875, 4.465141296386719, 8.788368225097656, -1.7318038940429688, 15.402877807617188, 7.800350189208984, 2.552227020263672, -10.623886108398438, 3.417072296142578, 5.235382080078125, 5.304512023925781, 0.3013019561767578, 4.4127655029296875, -0.2984161376953125, 4.2218780517578125, 1.0247917175292969, -0.5333442687988281, 6.2678070068359375, 6.6896514892578125, 4.289756774902344, -4.878562927246094, 3.6764373779296875, 8.08258056640625, -1.9156150817871094, 8.653045654296875, 2.4980316162109375, -0.9920921325683594, 0.3868751525878906, 4.512168884277344, 2.982149124145508, -0.8875885009765625, 1.6017189025878906, -1.5723495483398438, 4.2584686279296875, 6.163543701171875, 10.22341537475586, 3.5262374877929688, 13.214324951171875, 0.86175537109375, 2.9434585571289062, -9.031661987304688, -1.1261749267578125, 4.939849853515625, -2.9406814575195312, 0.3675098419189453, 1.90142822265625, -2.1178321838378906, -1.0610694885253906, -3.284414291381836, 6.981513977050781, 8.367202758789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000100.npy"}
{"epoch": 0.30303030303030304, "step": 101, "batch_size": 128, "mean": 2.2845005989074707, "std": 4.118752479553223, "min": -12.2154541015625, "p10": -1.96605110168457, "median": 1.7914257049560547, "p90": 7.967103004455566, "max": 13.377853393554688, "pos_frac": 0.6875, "sample": [-1.138275146484375, 7.5254058837890625, 3.4625320434570312, 3.6217422485351562, 0.09636116027832031, 1.7073326110839844, 7.7914581298828125, -0.44683074951171875, 8.828102111816406, -0.9438629150390625, -1.8389739990234375, 5.474218368530273, -2.678882598876953, 0.7234992980957031, 8.764427185058594, -3.26995849609375, 1.110870361328125, 1.9330635070800781, 3.1602134704589844, 9.819686889648438, 0.38443756103515625, -6.321258544921875, 11.345947265625, 1.6033935546875, 1.3300304412841797, 11.274826049804688, 0.6248531341552734, 0.57379150390625, 3.7942962646484375, 3.21728515625, 2.6999874114990234, 9.746681213378906, 0.5292110443115234, -0.48192596435546875, 8.312965393066406, 2.7419509887695312, 3.3118324279785156, 9.618385314941406, 1.6471900939941406, -12.2154541015625, -0.25773048400878906, 4.944053649902344, 0.24440765380859375, 3.6301040649414062, -1.0060901641845703, 5.448921203613281, 1.623870849609375, 5.07623291015625, 6.024131774902344, 6.439233779907227, -0.55084228515625, 6.176876068115234, -2.0805130004882812, 4.843955993652344, -0.2680397033691406, -2.969207763671875, -0.2599906921386719, 5.2073974609375, 2.982585906982422, 4.221588134765625, 7.943622589111328, -1.0910263061523438, 1.5638885498046875, -6.599853515625, -1.845895767211914, 4.60723876953125, 8.656383514404297, 13.377853393554688, 3.4262008666992188, -0.5470428466796875, 1.4019355773925781, 1.758331298828125, -1.7303390502929688, -1.3998336791992188, 7.8992462158203125, -1.9169960021972656, 1.483306884765625, 3.0529708862304688, 5.6627960205078125, 5.669517517089844, 0.8662223815917969, -0.6355743408203125, 4.3056793212890625, -5.248260498046875, 8.534194946289062, 2.3830032348632812, 0.086273193359375, -3.099864959716797, 7.613838195800781, -0.3931427001953125, 1.4328556060791016, -0.8861045837402344, -0.016698837280273438, -2.278322219848633, -3.573577880859375, 2.4141998291015625, 1.8707408905029297, 3.8708229064941406, 3.3671741485595703, 3.2063446044921875, 1.2559394836425781, 3.3516407012939453, 1.8245201110839844, 3.5083694458007812, -0.870513916015625, -1.395660400390625, 2.4591064453125, 9.602790832519531, -0.8787460327148438, 8.021890640258789, -0.8332901000976562, 0.5678367614746094, -3.0817947387695312, 1.705780029296875, 2.0131072998046875, 6.387214660644531, 2.6232528686523438, 2.9924697875976562, 7.029319763183594, 0.4652080535888672, 7.800506591796875, -0.05388450622558594, -1.7014293670654297, 4.193376541137695, -6.120059967041016, -0.5694427490234375, 2.8727493286132812, 7.142234802246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000101.npy"}
{"epoch": 0.30606060606060603, "step": 102, "batch_size": 128, "mean": 2.3918027877807617, "std": 4.274868488311768, "min": -6.828193664550781, "p10": -2.571599769592285, "median": 1.8433952331542969, "p90": 8.508995819091796, "max": 15.244659423828125, "pos_frac": 0.71875, "sample": [0.7204437255859375, 0.4713592529296875, 1.2457427978515625, -2.352264404296875, 1.1248016357421875, 1.865234375, 2.5047836303710938, 8.02532958984375, 6.448112487792969, -2.2356796264648438, 0.5922317504882812, 9.60418701171875, -6.828193664550781, 0.7564964294433594, 5.964653015136719, 0.8602218627929688, 1.8215560913085938, -1.8077812194824219, 8.416351318359375, 0.01837158203125, 4.3105316162109375, 3.471149444580078, 9.976043701171875, 4.636388778686523, -2.9624252319335938, 6.653251647949219, 2.0493412017822266, -0.0715789794921875, -1.1198272705078125, 2.9997100830078125, -1.244232177734375, -3.573183059692383, 0.38971710205078125, 2.7027587890625, 4.2980499267578125, 1.615478515625, 3.7144088745117188, 13.7130126953125, -0.5323390960693359, 15.244659423828125, -3.9188690185546875, 5.0731658935546875, 1.0489845275878906, 11.405426025390625, 2.7247161865234375, 3.2232894897460938, 1.3366165161132812, 3.289276123046875, 1.7198562622070312, 10.558624267578125, 2.2989959716796875, 2.5474777221679688, 4.697395324707031, 2.0093994140625, 4.02850341796875, 3.9339065551757812, -0.21834564208984375, 0.9057426452636719, 7.578227996826172, 0.7762699127197266, 4.410129547119141, -0.35742950439453125, -0.21521759033203125, 1.9711360931396484, 12.445877075195312, 3.1434097290039062, 0.25666236877441406, 1.8752727508544922, 1.3362407684326172, 11.984062194824219, 3.2269439697265625, 5.191135406494141, -0.04265594482421875, 0.20709991455078125, -2.1750106811523438, 1.2300872802734375, 0.6461048126220703, -2.772918701171875, -4.7878875732421875, 2.6306915283203125, 3.4327869415283203, 11.334114074707031, 3.228006362915039, -3.240755081176758, 3.936931610107422, 5.404315948486328, 0.4314861297607422, 12.403839111328125, 6.15185546875, 3.2995071411132812, 6.032569885253906, 8.725166320800781, -4.171455383300781, -0.8562088012695312, 2.505340576171875, 12.566123962402344, -5.086334228515625, 9.01593017578125, 4.796905517578125, -5.053497314453125, -0.07441139221191406, 1.2863311767578125, -1.0958938598632812, -2.1956787109375, -1.1873226165771484, -1.090667724609375, 3.59173583984375, 1.6064033508300781, 3.7026519775390625, 3.845104217529297, 7.154022216796875, -2.76788330078125, 3.051258087158203, 1.3325958251953125, 1.19158935546875, 3.0753746032714844, -3.9651145935058594, 1.9677047729492188, 6.445381164550781, -0.6264591217041016, -1.1395339965820312, 0.8287239074707031, -0.0652923583984375, 5.669532775878906, -5.479953765869141, -2.487478256225586, 0.6667098999023438, -0.654571533203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000102.npy"}
{"epoch": 0.3090909090909091, "step": 103, "batch_size": 128, "mean": 1.6018012762069702, "std": 5.0018229484558105, "min": -14.01800537109375, "p10": -4.480928802490234, "median": 1.2967376708984375, "p90": 8.054492950439453, "max": 14.853607177734375, "pos_frac": 0.6328125, "sample": [9.41693115234375, -0.7362518310546875, -0.4850597381591797, -1.405466079711914, 0.7230148315429688, 7.459541320800781, 1.7391929626464844, 4.869781494140625, -2.2158889770507812, 0.890899658203125, 4.0408935546875, 2.5480422973632812, 0.4219226837158203, 7.582756042480469, 0.8116188049316406, 1.4757080078125, 4.36651611328125, 1.8740615844726562, 1.5088958740234375, 4.766754150390625, 4.3238983154296875, 0.110260009765625, -3.6210365295410156, -2.0868263244628906, 7.477458953857422, -3.2461395263671875, -2.5958404541015625, 6.1164703369140625, 0.22758102416992188, 3.9024581909179688, 0.7564849853515625, 7.3775787353515625, 0.25616455078125, 10.792510986328125, 3.336669921875, 1.1671600341796875, 14.853607177734375, 2.7002925872802734, 3.278301239013672, -1.543060302734375, 1.2070388793945312, -0.7185745239257812, -5.773124694824219, -1.0124168395996094, -1.0963363647460938, 5.3487548828125, 0.7504425048828125, -4.046594619750977, -14.01800537109375, 4.937965393066406, 1.9002532958984375, 12.445526123046875, 14.325386047363281, 7.7418212890625, -2.3668060302734375, -4.737602233886719, 0.5847129821777344, -4.339941024780273, 2.8770980834960938, -4.886081695556641, 2.4581527709960938, -4.802549362182617, 6.2522735595703125, -0.2967071533203125, -0.9983463287353516, -10.204452514648438, 3.0067367553710938, 0.6884307861328125, -1.6111469268798828, 1.1361961364746094, 5.643547058105469, 3.4060935974121094, -4.279136657714844, 8.166458129882812, 5.91192626953125, 2.991792678833008, 9.793571472167969, 0.17704010009765625, 2.117584228515625, -8.969009399414062, -1.3502578735351562, 2.3954010009765625, 6.588401794433594, -4.7751007080078125, -5.249107360839844, -2.4367904663085938, -3.0343589782714844, 4.26751708984375, 1.930328369140625, -0.32269287109375, 9.06060791015625, 2.304107666015625, 6.012886047363281, -1.0499496459960938, 2.6314468383789062, 10.159927368164062, -0.8635787963867188, 2.9718704223632812, 2.739910125732422, 9.857040405273438, 2.2770156860351562, -0.3362407684326172, -1.1536788940429688, 13.98797607421875, 2.2243194580078125, 9.954658508300781, 3.0084972381591797, -5.306379318237305, 1.64825439453125, -2.6315841674804688, 3.690582275390625, 8.006507873535156, 1.5833930969238281, 1.3864364624023438, -4.9959869384765625, -5.408958435058594, -4.2824859619140625, -2.3011646270751953, 5.879829406738281, 11.047607421875, -1.2907047271728516, 0.30908966064453125, 0.8169326782226562, 7.73895263671875, -3.3828887939453125, -2.42626953125, -5.429664611816406, -4.3709259033203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000103.npy"}
{"epoch": 0.31212121212121213, "step": 104, "batch_size": 128, "mean": 1.5353858470916748, "std": 4.222602844238281, "min": -8.312705993652344, "p10": -3.509760284423828, "median": 1.3724279403686523, "p90": 6.664980316162109, "max": 17.6094970703125, "pos_frac": 0.6171875, "sample": [11.190093994140625, -1.0849742889404297, -2.0517005920410156, 3.3601150512695312, 2.011676788330078, 1.7413101196289062, -3.718902587890625, -2.7087440490722656, 0.02846527099609375, -0.5533447265625, 1.8221378326416016, 6.3437347412109375, -1.9538764953613281, 3.6775436401367188, 4.0515289306640625, -5.876739501953125, 17.6094970703125, -0.19384765625, 3.6660003662109375, -1.588836669921875, -1.3187694549560547, 5.969097137451172, 1.75042724609375, 0.6422462463378906, 2.095785140991211, 6.8533477783203125, -5.024383544921875, 1.9867744445800781, 2.166015625, 3.3780364990234375, 2.8841400146484375, 12.580543518066406, -2.0525588989257812, 11.192100524902344, 0.6592082977294922, 2.005840301513672, 1.974924087524414, 2.9957618713378906, -0.3762035369873047, -1.6870346069335938, -0.7989940643310547, -3.6874923706054688, -0.4840240478515625, 11.745994567871094, -1.5523910522460938, 2.752941131591797, 5.019554138183594, -0.4216651916503906, 5.7186737060546875, 6.584251403808594, 1.6447601318359375, 2.8563385009765625, -1.2155990600585938, 7.851226806640625, -0.13440704345703125, 2.9149856567382812, 6.8914337158203125, -0.3726043701171875, 8.353973388671875, 2.103483200073242, -5.3602142333984375, 7.6136474609375, -2.32440185546875, 9.459144592285156, 1.6896133422851562, -0.21152305603027344, 3.7757930755615234, -3.665149688720703, 2.7622833251953125, 0.9714317321777344, 0.20163726806640625, -1.7395782470703125, 0.15000152587890625, -2.57855224609375, 0.1400165557861328, -8.312705993652344, -0.4731292724609375, -5.516120910644531, 0.12188720703125, -0.4050140380859375, 5.8287506103515625, -1.297393798828125, -6.7239227294921875, 5.57696533203125, 8.533649444580078, 1.2855377197265625, 1.0659255981445312, 0.23770523071289062, 1.4593181610107422, 1.4878063201904297, -4.276908874511719, 2.3877487182617188, -2.1059341430664062, 2.8069190979003906, -3.443164825439453, 0.09911346435546875, 2.068084716796875, -2.8427085876464844, -1.9411392211914062, 5.934478759765625, 0.6412429809570312, 2.911224365234375, 2.9363632202148438, 2.943563461303711, 1.5446796417236328, 4.897392272949219, 1.1192092895507812, 0.7899913787841797, 2.077993392944336, 9.987190246582031, -0.0003910064697265625, -0.48551177978515625, 6.167732238769531, 3.7094650268554688, 5.885490417480469, -4.339145660400391, 1.9023265838623047, 4.287078857421875, 6.081390380859375, -3.279970169067383, -0.8420562744140625, -0.15003204345703125, -4.176731109619141, -2.9633216857910156, -5.2476043701171875, -1.4458732604980469, 5.3316650390625, 3.5892486572265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000104.npy"}
{"epoch": 0.3151515151515151, "step": 105, "batch_size": 128, "mean": 2.651883602142334, "std": 5.30912971496582, "min": -8.257484436035156, "p10": -4.149735641479492, "median": 2.0988101959228516, "p90": 8.994700622558593, "max": 18.901229858398438, "pos_frac": 0.7109375, "sample": [1.9563674926757812, 3.3807506561279297, 2.109027862548828, 6.838619232177734, -7.911373138427734, 11.533172607421875, 4.353252410888672, 0.02419281005859375, -5.826934814453125, 1.9808349609375, 1.3237190246582031, 16.121780395507812, 0.3704071044921875, -1.6869964599609375, 0.2632293701171875, 5.6440887451171875, 4.1335906982421875, 7.141143798828125, 6.0290679931640625, 0.6662940979003906, 9.005050659179688, -2.2718505859375, -8.257484436035156, 7.530303955078125, 6.162071228027344, 2.7454910278320312, 2.0024852752685547, -3.5030994415283203, -1.4248733520507812, -4.3885345458984375, -8.234954833984375, 0.14879798889160156, 1.457366943359375, 18.901229858398438, 0.5328292846679688, 7.0196380615234375, -5.4715576171875, 2.088592529296875, -1.0997772216796875, -6.032798767089844, 2.1567001342773438, -2.883514404296875, 3.041839599609375, 12.125076293945312, 11.870819091796875, 9.24700927734375, 3.123065948486328, 4.5335235595703125, -1.6572952270507812, 0.2880077362060547, -2.2225112915039062, -2.0926589965820312, 9.209762573242188, 1.8300113677978516, 2.9916305541992188, 5.779327392578125, 8.426315307617188, -5.343994140625, -1.884368896484375, 1.6630668640136719, 8.326099395751953, 8.270092010498047, 2.585531234741211, 0.7463302612304688, 0.9431991577148438, -0.33451080322265625, -4.12579345703125, 3.3663864135742188, -0.4639778137207031, 1.6261825561523438, 11.368461608886719, 12.392822265625, -4.205600738525391, -4.981283187866211, 2.2978973388671875, -4.379741668701172, 4.3886566162109375, 5.527046203613281, 1.8368148803710938, -0.063232421875, 3.3348388671875, 6.10284423828125, 1.5525131225585938, -2.9771881103515625, -1.2561721801757812, 0.7348899841308594, -3.067230224609375, 5.212556838989258, 11.721954345703125, 3.7636260986328125, 2.8242111206054688, 0.8952388763427734, 2.8628921508789062, -6.142753601074219, 0.6357269287109375, 7.19390869140625, 8.949562072753906, 8.990264892578125, 3.8425827026367188, 18.813735961914062, 5.8672332763671875, 16.724929809570312, 8.819656372070312, -0.38686370849609375, 5.347442626953125, 2.3179550170898438, -1.2373237609863281, 1.9406890869140625, 4.448814392089844, 0.7164154052734375, 7.018257141113281, 0.12553977966308594, -0.7497730255126953, 7.807952880859375, 0.51788330078125, -0.8265647888183594, 3.2602577209472656, -0.11350059509277344, 3.259237289428711, -0.27309226989746094, 8.811386108398438, 4.553859710693359, 2.9855175018310547, -4.0637664794921875, 2.8373851776123047, -4.820034027099609, 7.6856231689453125, 2.201627731323242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000105.npy"}
{"epoch": 0.3181818181818182, "step": 106, "batch_size": 128, "mean": 2.7989416122436523, "std": 4.860432147979736, "min": -9.673515319824219, "p10": -2.6088417053222654, "median": 2.6305007934570312, "p90": 8.8774658203125, "max": 16.693634033203125, "pos_frac": 0.7109375, "sample": [-0.36394500732421875, -5.308919906616211, 8.580326080322266, 0.29670143127441406, 2.274799346923828, -0.3822498321533203, -6.143341064453125, 12.110427856445312, 1.3526611328125, -5.404319763183594, 3.9858741760253906, 5.401908874511719, 1.5695819854736328, 3.0350494384765625, 6.561367034912109, -8.589920043945312, 2.826313018798828, -1.8690643310546875, 7.4703521728515625, 3.5640716552734375, 3.9088668823242188, 10.526016235351562, -0.3078041076660156, 5.722114562988281, 3.7348785400390625, 8.964324951171875, -9.673515319824219, 1.2083244323730469, 8.81536865234375, 5.190376281738281, 7.9688873291015625, 5.351110458374023, 10.409698486328125, 0.5912361145019531, 5.4906005859375, -1.2698535919189453, 7.1330413818359375, 12.667533874511719, 5.231220245361328, 4.520833969116211, -0.805694580078125, 0.29488372802734375, -1.1683845520019531, 1.4229736328125, -0.51422119140625, 3.0761661529541016, 2.2262611389160156, -5.9885101318359375, 2.3894004821777344, 16.693634033203125, 8.840240478515625, 1.952157974243164, 0.0520782470703125, 2.4346885681152344, 4.903728485107422, 9.08917236328125, 3.5829696655273438, 4.286434173583984, -3.598604202270508, 7.353240966796875, -2.5421295166015625, -2.1757240295410156, 0.52105712890625, -2.049774169921875, 6.0194244384765625, 6.232982635498047, 5.739646911621094, 6.583911895751953, 1.6420822143554688, 3.9508934020996094, 7.305004119873047, 2.883382797241211, 6.408977508544922, 1.479827880859375, -4.579986572265625, 6.288200378417969, 9.204498291015625, 1.8904342651367188, 0.8674716949462891, -1.2800865173339844, 10.971649169921875, 1.2777252197265625, 6.56744384765625, -2.0971832275390625, -2.4257240295410156, 3.2225799560546875, 0.4714527130126953, 3.1517257690429688, -2.7645034790039062, -5.4910736083984375, -0.33068275451660156, -0.933135986328125, 1.0972976684570312, -0.6887245178222656, 10.565139770507812, 8.61148452758789, -3.00445556640625, 4.523979187011719, -0.3687553405761719, -7.8267974853515625, 4.944114685058594, 15.220367431640625, 6.781593322753906, -1.4293212890625, 1.9677352905273438, -1.7467041015625, 11.218017578125, 1.8679389953613281, 5.127983093261719, 10.69171142578125, 0.2253570556640625, 5.94879150390625, 3.2086029052734375, -1.7224159240722656, 3.0727882385253906, -1.1984405517578125, -2.2861404418945312, 4.684059143066406, 0.9117774963378906, 8.192268371582031, 8.055549621582031, 7.42706298828125, 4.0791473388671875, 1.7599868774414062, -4.610382080078125, 1.977264404296875, -2.3203182220458984, 3.6230392456054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000106.npy"}
{"epoch": 0.3212121212121212, "step": 107, "batch_size": 128, "mean": 1.6656012535095215, "std": 5.862030506134033, "min": -21.475921630859375, "p10": -4.672665977478026, "median": 1.7699089050292969, "p90": 7.708113098144532, "max": 19.900039672851562, "pos_frac": 0.703125, "sample": [2.7836570739746094, -3.7929916381835938, 5.376762390136719, 7.7008209228515625, 0.6746139526367188, 0.8029365539550781, -2.5414962768554688, -2.7098636627197266, 4.463163375854492, 9.937286376953125, -2.3677730560302734, 4.286529541015625, 3.3444271087646484, -3.5201416015625, 12.46466064453125, 10.598068237304688, 1.4584884643554688, 4.536306381225586, 4.3881683349609375, -8.959259033203125, 2.6214160919189453, 0.3167533874511719, 4.309343338012695, 4.183097839355469, 12.007492065429688, -2.3945693969726562, -7.4568023681640625, 0.6928482055664062, -6.8022613525390625, 7.73541259765625, 0.05941009521484375, 7.1978607177734375, -10.66363525390625, 7.725128173828125, 10.3720703125, 1.8204364776611328, 2.978729248046875, -0.18390655517578125, -7.242912292480469, -11.963882446289062, 2.757232666015625, 3.9048519134521484, -0.7876262664794922, 2.34710693359375, 3.4288387298583984, 2.075214385986328, -8.670877456665039, -7.382720947265625, 2.1290969848632812, 7.274284362792969, 6.660242080688477, 1.5403938293457031, 3.0557174682617188, 1.0614471435546875, 0.16551589965820312, -0.47518157958984375, 4.044769287109375, 15.210296630859375, 1.278167724609375, -1.2750091552734375, -5.8825225830078125, 7.000957489013672, 9.534011840820312, 1.000946044921875, 6.721765518188477, 3.5439453125, -1.8081207275390625, 0.8084316253662109, -0.9723358154296875, 4.591075897216797, 3.8465423583984375, 1.5360260009765625, 2.5062685012817383, 1.6910858154296875, 4.7341461181640625, 1.3548660278320312, -3.9446067810058594, 5.6258087158203125, 2.2085189819335938, 0.0931549072265625, -1.7460956573486328, -3.4762954711914062, 4.126258850097656, 5.393169403076172, -21.475921630859375, 1.8106613159179688, 6.502233505249023, 17.403945922851562, -5.719511032104492, -2.375913619995117, -3.0526275634765625, 5.0955963134765625, 0.3712730407714844, -1.2389640808105469, 2.925386428833008, -4.224018096923828, 1.5262680053710938, -3.230072021484375, 4.971714019775391, 0.9719028472900391, 3.2563533782958984, -14.401519775390625, 1.729156494140625, 10.535995483398438, 0.47559356689453125, 5.336826324462891, 1.4430389404296875, 0.6707954406738281, 7.328411102294922, 4.141487121582031, 2.8474502563476562, 1.4789581298828125, -3.7399063110351562, -0.9949798583984375, 3.4129257202148438, 1.4408454895019531, 5.541469573974609, 19.900039672851562, 2.6689071655273438, 3.3592567443847656, 15.649169921875, 2.152904510498047, -7.9571685791015625, -3.3000640869140625, 0.13953399658203125, 3.044696807861328, -3.2674942016601562, -1.0228214263916016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000107.npy"}
{"epoch": 0.3242424242424242, "step": 108, "batch_size": 128, "mean": 2.9374356269836426, "std": 5.17529296875, "min": -7.734405517578125, "p10": -3.1632972717285153, "median": 2.614211082458496, "p90": 10.461327743530271, "max": 18.2530517578125, "pos_frac": 0.6875, "sample": [0.35062599182128906, 1.0369415283203125, -1.8188934326171875, 0.8262176513671875, 1.4619064331054688, -3.2491683959960938, -1.7922134399414062, 0.4066314697265625, 2.3422927856445312, 4.6807098388671875, -1.2953567504882812, 1.7537384033203125, 6.936241149902344, 2.441253662109375, -0.6912193298339844, -2.8767318725585938, -0.5511817932128906, 0.67218017578125, -4.2396697998046875, -6.83660888671875, 3.8966636657714844, 2.8863296508789062, -0.35324859619140625, 12.10009765625, 3.6168813705444336, 11.9227294921875, 7.312704086303711, 4.695976257324219, -1.1522140502929688, 1.2708473205566406, -0.3432788848876953, 1.5856475830078125, 6.640590667724609, 6.7941131591796875, 1.5063323974609375, -3.3509140014648438, -1.504852294921875, 3.5326690673828125, 8.380882263183594, 10.1839599609375, 5.949546813964844, 1.4157352447509766, -5.11761474609375, 3.5468597412109375, 4.391853332519531, -2.0489730834960938, -1.3748245239257812, 6.23553466796875, 2.7892684936523438, -0.12447357177734375, 11.10906982421875, 0.6150283813476562, 14.507644653320312, 10.229255676269531, 8.20489501953125, 8.219940185546875, 16.63275146484375, 18.2530517578125, 2.6894760131835938, -0.9396419525146484, 5.281757354736328, -2.2918567657470703, 3.3884811401367188, 0.9985103607177734, 3.9576416015625, 11.283897399902344, 5.743743896484375, 5.8470611572265625, 9.68887710571289, 6.205322265625, 3.605337142944336, 2.2448959350585938, 0.6472320556640625, 5.608757019042969, -6.233650207519531, 5.472373962402344, 7.27166748046875, 11.41864013671875, 3.1011505126953125, 7.217414855957031, -1.1809463500976562, 3.6249847412109375, 15.218170166015625, 8.5543212890625, -7.6983795166015625, -0.19164657592773438, 2.5393829345703125, -3.909168243408203, -4.7775115966796875, 3.9136199951171875, 10.2774658203125, -1.5805168151855469, 3.3447189331054688, -0.09494209289550781, 11.46978759765625, -0.08835983276367188, 5.5410003662109375, 2.6890392303466797, 2.7549591064453125, 5.9816436767578125, 0.6326980590820312, -6.7022857666015625, 3.1383438110351562, -3.6777267456054688, 0.720611572265625, 0.25635337829589844, 7.2315216064453125, -0.8981876373291016, 1.8267345428466797, 3.498504638671875, 11.337417602539062, -7.734405517578125, -0.9029541015625, 3.1714248657226562, 10.890338897705078, -6.616081237792969, -0.6139602661132812, -0.8547515869140625, 2.9300003051757812, 2.0601940155029297, 0.9601325988769531, 4.6878662109375, -3.126495361328125, 12.892822265625, -1.8869438171386719, 5.6376190185546875, -0.5339736938476562, 6.4580841064453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000108.npy"}
{"epoch": 0.32727272727272727, "step": 109, "batch_size": 128, "mean": 2.508416175842285, "std": 5.605556964874268, "min": -13.008499145507812, "p10": -3.7595497131347657, "median": 2.529677391052246, "p90": 9.402215576171875, "max": 22.351669311523438, "pos_frac": 0.6640625, "sample": [4.57977294921875, 6.54345703125, -4.887779235839844, -1.8689346313476562, 0.2777061462402344, -3.7778892517089844, 2.569429397583008, 22.351669311523438, 11.121986389160156, 0.27931976318359375, 3.2435150146484375, -1.2585678100585938, 1.3021469116210938, 6.593170166015625, 3.7350006103515625, 5.599205017089844, 8.625030517578125, -5.573329925537109, -1.4253921508789062, -3.751689910888672, -0.2545890808105469, 3.2302093505859375, 6.756107330322266, -2.976593017578125, 3.4080734252929688, 14.092559814453125, 1.8430137634277344, 4.360939025878906, 3.1309680938720703, 1.8841304779052734, 9.517059326171875, 10.559661865234375, -1.793609619140625, 9.35784912109375, 10.476638793945312, 8.201042175292969, 1.1699371337890625, 3.3643341064453125, 8.343391418457031, 5.032661437988281, 3.086212158203125, 5.31403923034668, -2.9134864807128906, 7.95751953125, -2.0143585205078125, 6.842094421386719, 4.763713836669922, 4.7844696044921875, -8.619827270507812, 4.837270736694336, -1.4751434326171875, -1.3468017578125, 2.4156417846679688, -1.0821762084960938, -10.126968383789062, 5.186859130859375, 14.099807739257812, 6.71185302734375, 0.46901702880859375, 1.8596210479736328, 2.6697616577148438, 10.046401977539062, -3.2108230590820312, -0.7346420288085938, -5.0587921142578125, 1.5538158416748047, -1.39306640625, 1.8843517303466797, 9.5057373046875, 7.9694671630859375, 7.7791900634765625, -6.391448974609375, 4.8920745849609375, 7.3689117431640625, -10.519073486328125, -0.9992027282714844, 6.6094207763671875, 5.7102203369140625, -0.17061614990234375, 1.2085990905761719, -6.991790771484375, -5.954479217529297, -1.7588844299316406, 6.679164886474609, -3.2284469604492188, -2.8429431915283203, 1.3754310607910156, 0.91729736328125, -0.9782180786132812, -2.0486907958984375, 2.561674118041992, -5.785499572753906, 7.882484436035156, 0.48649024963378906, 2.3173828125, 2.4976806640625, 3.2540435791015625, 6.1065216064453125, 1.5018539428710938, 4.168035507202148, 4.981559753417969, -3.2108840942382812, -0.608642578125, -4.996406555175781, 5.920253753662109, 6.596405029296875, 2.203948974609375, 16.7835693359375, 2.8150863647460938, 10.070770263671875, 0.2271099090576172, -3.664703369140625, 5.990314483642578, 13.687789916992188, -0.07917404174804688, -3.6647071838378906, 1.6821403503417969, -2.3486289978027344, 4.355829238891602, -2.256591796875, 3.3838882446289062, 5.282417297363281, -13.008499145507812, -1.4180374145507812, 2.8693923950195312, 6.0361480712890625, 11.112899780273438, 8.723663330078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000109.npy"}
{"epoch": 0.3303030303030303, "step": 110, "batch_size": 128, "mean": 1.88572359085083, "std": 4.643518924713135, "min": -12.300018310546875, "p10": -3.5023132324218733, "median": 1.6625022888183594, "p90": 7.596527481079099, "max": 18.3541259765625, "pos_frac": 0.703125, "sample": [0.8619384765625, 6.633411407470703, 2.0470123291015625, 1.4936046600341797, -0.0983734130859375, -1.1506500244140625, 0.8349685668945312, 0.9465160369873047, 1.5690193176269531, 1.0365581512451172, -0.5761947631835938, 0.08952522277832031, 1.5680351257324219, -12.300018310546875, -1.4996109008789062, 1.2487831115722656, 2.800090789794922, 6.894077301025391, 2.349353790283203, 8.38897705078125, 1.6469802856445312, 1.740743637084961, -5.860694885253906, -0.34154510498046875, 5.003696441650391, 6.8036041259765625, 4.1561126708984375, 0.6386489868164062, 1.0309715270996094, -1.4178924560546875, -5.5976715087890625, -3.0811500549316406, 1.2570343017578125, -5.08172607421875, 1.044790267944336, 11.667083740234375, 18.3541259765625, -1.5667476654052734, 2.26629638671875, 2.2783737182617188, 2.1198577880859375, -1.704742431640625, -0.4250640869140625, -0.9645061492919922, 10.8837890625, 2.780731201171875, 3.0101280212402344, 1.4439773559570312, 0.6872920989990234, 4.386741638183594, -6.04559326171875, -7.311119079589844, 6.197792053222656, -2.5523834228515625, 3.757671356201172, -8.553466796875, 6.36883544921875, 1.6226921081542969, 2.97747802734375, -1.9345626831054688, 7.02275276184082, 3.52447509765625, 9.679401397705078, -2.444305419921875, -1.5573043823242188, -0.12592315673828125, -0.717315673828125, 2.6855010986328125, 1.1057777404785156, 4.88580322265625, 8.51654052734375, 3.65350341796875, 8.33526611328125, -5.913299560546875, -1.8435897827148438, 3.7852706909179688, 3.951751708984375, 0.15597915649414062, 1.5692901611328125, -2.2275733947753906, 12.017196655273438, 6.898889541625977, 4.816741943359375, -0.07471084594726562, 10.382598876953125, 5.881303787231445, 6.823431015014648, -7.166545867919922, 0.7143878936767578, 1.8973655700683594, 1.1809940338134766, -4.485027313232422, 4.594827651977539, 1.7383251190185547, 3.8231201171875, 4.0590667724609375, 1.4207172393798828, 2.1369857788085938, 2.2361907958984375, 11.075531005859375, 2.168130874633789, -5.832786560058594, -0.2018718719482422, 5.140810012817383, -1.0123748779296875, -5.400417327880859, 1.6780242919921875, 2.1166229248046875, 12.407440185546875, 3.6884841918945312, 0.7741241455078125, 1.9828338623046875, 2.5085411071777344, 2.4519100189208984, -2.4301605224609375, 2.917905807495117, 11.119300842285156, -3.0562744140625, 2.2583465576171875, 0.31373023986816406, -2.1955642700195312, 7.375728607177734, 4.42938232421875, 0.8080234527587891, 8.111724853515625, -5.736501693725586, 4.123863220214844, 2.0266551971435547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000110.npy"}
{"epoch": 0.3333333333333333, "step": 111, "batch_size": 128, "mean": 3.179274559020996, "std": 5.803811550140381, "min": -13.144783020019531, "p10": -3.5350883483886717, "median": 3.027266502380371, "p90": 10.041049957275389, "max": 21.13458251953125, "pos_frac": 0.7109375, "sample": [1.1026248931884766, 1.5943069458007812, 5.504180908203125, 7.361061096191406, -0.3747730255126953, 7.500099182128906, -0.3125801086425781, 7.2257080078125, 0.598480224609375, -1.3003578186035156, 2.4570884704589844, 3.5734634399414062, 8.256189346313477, 7.280494689941406, 3.912841796875, 4.259346008300781, -4.084096908569336, 2.42926025390625, 7.009124755859375, 3.0480308532714844, 5.58905029296875, -13.144783020019531, 3.006502151489258, 10.900070190429688, -2.0996856689453125, -3.5914688110351562, 5.7262725830078125, -9.45999526977539, 3.5675430297851562, 16.87420654296875, 8.581954956054688, -4.563884735107422, -1.0347366333007812, -2.4272384643554688, 0.9126052856445312, -4.6439056396484375, 5.998931884765625, 6.939888000488281, -6.4459075927734375, 0.7333698272705078, 21.13458251953125, -0.786224365234375, 6.1399383544921875, 11.1243896484375, -2.7241592407226562, 1.4531097412109375, 2.752288818359375, 7.216636657714844, -0.18622207641601562, 1.3458480834960938, 3.9942665100097656, 1.1696796417236328, 1.7521743774414062, -1.0724639892578125, -0.3687744140625, 8.290252685546875, 2.7971572875976562, 6.017829895019531, 20.48419189453125, 5.0435943603515625, -1.8870964050292969, 4.30718994140625, 5.194622039794922, -3.7020339965820312, -0.9991912841796875, -6.182281494140625, 4.1664886474609375, 1.52191162109375, 2.5397796630859375, 3.894193649291992, 2.2425994873046875, -0.5417327880859375, 6.8818206787109375, 4.304691314697266, -0.3642139434814453, 7.496952056884766, 2.2715911865234375, -0.4164314270019531, 11.241500854492188, 5.1063690185546875, 13.015869140625, 3.465791702270508, 9.265060424804688, 1.3786087036132812, 3.057771682739258, -1.9007034301757812, -5.685380935668945, -2.2315673828125, 6.7803192138671875, 3.960865020751953, 9.156097412109375, 3.1551551818847656, 4.09967041015625, 9.858390808105469, -3.51092529296875, 4.573402404785156, 5.16693115234375, 11.14352035522461, 7.802825927734375, 4.809120178222656, -10.0570068359375, 2.3448410034179688, -0.35335350036621094, -2.9402847290039062, 0.165374755859375, 7.725475311279297, 10.467254638671875, 17.206146240234375, -2.0222702026367188, 0.6400203704833984, 1.3921337127685547, 0.6989593505859375, -6.136848449707031, 7.451469421386719, -3.3742752075195312, 3.3471298217773438, 4.34808349609375, 12.848846435546875, 0.69561767578125, -8.207656860351562, 5.415294647216797, 5.5223236083984375, 2.9077911376953125, 18.991714477539062, 6.180763244628906, 0.6685447692871094, 12.847610473632812, -0.301483154296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000111.npy"}
{"epoch": 0.33636363636363636, "step": 112, "batch_size": 128, "mean": 2.629899263381958, "std": 4.975649833679199, "min": -7.886383056640625, "p10": -3.4298332214355467, "median": 2.241628646850586, "p90": 8.834660720825195, "max": 19.744873046875, "pos_frac": 0.6953125, "sample": [2.2009830474853516, 7.632001876831055, 2.9426956176757812, 3.8225021362304688, -0.6779632568359375, 5.171255111694336, -1.6411895751953125, 4.1473541259765625, -3.079458236694336, -1.3682708740234375, 10.389892578125, -4.087165832519531, 6.093025207519531, 5.455596923828125, -2.1113014221191406, 0.6264877319335938, 0.693145751953125, 3.26434326171875, 14.63775634765625, 6.837009429931641, 7.172756195068359, 11.320236206054688, -0.5578460693359375, -3.7162628173828125, 2.5326385498046875, 8.964176177978516, 13.533409118652344, -0.41207122802734375, 7.5011138916015625, 8.663131713867188, 4.3895111083984375, 0.6244659423828125, -4.516548156738281, 4.256336212158203, 1.697591781616211, 13.965652465820312, 3.144683837890625, 3.1862030029296875, 0.4968414306640625, 2.7048187255859375, 4.953521728515625, -1.8896865844726562, 2.103118896484375, 17.414627075195312, -2.3692378997802734, -4.704765319824219, 3.0209407806396484, 2.649442672729492, -3.9519195556640625, 1.53277587890625, -2.3127212524414062, 1.6670150756835938, 8.7816162109375, 4.209659576416016, 19.744873046875, 8.22607421875, 8.958431243896484, 6.574283599853516, 0.9351730346679688, -3.3859634399414062, -0.47618865966796875, 0.2327728271484375, 1.9099617004394531, 0.16222000122070312, 0.5881690979003906, 12.866653442382812, 3.071195602416992, 3.2783966064453125, 0.835693359375, 5.438926696777344, 4.367897033691406, -2.7635116577148438, -0.2735595703125, 3.2996673583984375, 6.00421142578125, 2.54052734375, -3.532196044921875, 4.2176361083984375, 4.798736572265625, -1.1048355102539062, 0.11695480346679688, -0.619659423828125, -5.42547607421875, -0.0219879150390625, -1.0050678253173828, -7.0643157958984375, 2.853107452392578, 10.181465148925781, 2.6647567749023438, 1.1685943603515625, -2.344320297241211, 2.2822742462158203, 4.767059326171875, 5.406963348388672, 11.640308380126953, 0.8988418579101562, -4.168235778808594, 6.6009063720703125, 5.1570281982421875, 4.341196060180664, 3.9928627014160156, 6.89874267578125, 0.46535491943359375, -2.4873428344726562, -1.6200790405273438, -0.2850189208984375, 1.8615646362304688, -2.0418243408203125, -7.886383056640625, -5.25286865234375, 3.7883758544921875, -4.519323348999023, 7.770378112792969, 0.23257064819335938, 6.136066436767578, 1.0012264251708984, -5.073089599609375, 1.5935287475585938, 6.309490203857422, 3.3464508056640625, 1.4893150329589844, -2.0593109130859375, -0.04037284851074219, 4.400543212890625, -0.66949462890625, 13.61602783203125, 2.9448699951171875, 1.7652759552001953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000112.npy"}
{"epoch": 0.3393939393939394, "step": 113, "batch_size": 128, "mean": 3.0087146759033203, "std": 5.570771217346191, "min": -14.751983642578125, "p10": -2.8301063537597653, "median": 2.523378372192383, "p90": 9.876105499267577, "max": 19.117401123046875, "pos_frac": 0.6875, "sample": [0.8345279693603516, 1.677947998046875, 0.24171829223632812, 1.556060791015625, 4.999237060546875, 14.10919189453125, -0.19424819946289062, -2.5930118560791016, 2.606201171875, 0.8625411987304688, 5.113531112670898, 7.7008819580078125, 6.330606460571289, 2.318758010864258, 19.117401123046875, 11.192359924316406, 2.4787750244140625, -0.5595741271972656, 8.828231811523438, 9.787750244140625, 2.7993412017822266, -2.292724609375, 17.745330810546875, -2.1996593475341797, 7.738609313964844, 3.3863563537597656, 10.082267761230469, -0.12883758544921875, 3.1959152221679688, 3.2391510009765625, 11.181297302246094, 0.21143341064453125, -0.47276878356933594, 4.117839813232422, 2.097576141357422, 12.152481079101562, 2.567981719970703, 11.286895751953125, 0.21958160400390625, 6.3993377685546875, 16.189346313476562, 1.4511566162109375, 9.694183349609375, 1.9239139556884766, 8.234756469726562, -1.8539848327636719, -0.18959426879882812, -9.950820922851562, 5.953094482421875, 5.2345428466796875, 7.836875915527344, -2.9213714599609375, 6.8993988037109375, -3.7760009765625, -14.751983642578125, 3.2339248657226562, 8.527603149414062, 2.143402099609375, 4.48731803894043, 0.6851520538330078, 3.40496826171875, -5.67962646484375, 2.1815338134765625, -3.5494766235351562, 6.2614288330078125, 2.6298828125, 2.3127098083496094, 5.233711242675781, 4.150993347167969, 4.796661376953125, 7.04559326171875, 3.530071258544922, 1.555826187133789, -1.3268203735351562, 8.105173110961914, 5.176082611083984, 7.819618225097656, -1.526885986328125, -0.16440582275390625, -0.8129806518554688, -1.7223854064941406, -1.5274848937988281, 9.757740020751953, 8.525108337402344, 3.8217849731445312, -7.94989013671875, -2.0191287994384766, -0.025129318237304688, -6.260334014892578, -2.3571624755859375, 0.5143890380859375, 0.34557151794433594, 1.2136077880859375, 4.395166397094727, -4.5464935302734375, 15.131782531738281, 1.0011882781982422, -2.7909927368164062, 2.929595947265625, -1.5105705261230469, 1.6100845336914062, 3.898122787475586, -6.090087890625, -2.337432861328125, 5.831451416015625, 1.0567989349365234, 1.9819374084472656, 4.121917724609375, 11.377693176269531, 4.486053466796875, -3.3627891540527344, 16.908676147460938, -0.6096038818359375, 4.6446685791015625, -1.4323806762695312, -3.35333251953125, 5.0873565673828125, 11.179489135742188, -6.640113830566406, 3.6832275390625, 9.512969970703125, 6.907341003417969, -1.4923553466796875, -0.4723682403564453, 8.262382507324219, -0.3946704864501953, -1.1325836181640625, 5.0254058837890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000113.npy"}
{"epoch": 0.3424242424242424, "step": 114, "batch_size": 128, "mean": 2.692378520965576, "std": 5.2499871253967285, "min": -10.748748779296875, "p10": -3.659261322021484, "median": 2.591653823852539, "p90": 8.940426063537597, "max": 20.99969482421875, "pos_frac": 0.7109375, "sample": [10.048255920410156, 2.950794219970703, -3.0629348754882812, 7.97027587890625, -1.1836090087890625, -2.034027099609375, 5.785514831542969, 8.074623107910156, -0.3030242919921875, -5.325706481933594, 3.6074562072753906, -1.8199615478515625, 3.631134033203125, 4.341377258300781, 2.9590911865234375, 2.830547332763672, 9.794342041015625, 2.910186767578125, 3.728851318359375, 5.788972854614258, 14.271064758300781, 4.165985107421875, -0.49388885498046875, 6.634529113769531, -0.35091400146484375, 0.6921043395996094, -5.494403839111328, 3.8604507446289062, -4.76483154296875, 11.431060791015625, 1.4344844818115234, -0.268035888671875, -3.93023681640625, -1.7304573059082031, 6.970306396484375, -2.8301849365234375, -1.9570541381835938, 6.037073135375977, 1.744903564453125, -4.158710479736328, 14.052757263183594, 2.881561279296875, -2.4152908325195312, 4.385528564453125, 3.135234832763672, -0.11574935913085938, 13.279571533203125, 10.376232147216797, 6.512931823730469, 11.683441162109375, -0.27942657470703125, 4.5376739501953125, 0.6787261962890625, -1.710968017578125, 2.27880859375, 2.0052146911621094, 4.164989471435547, -1.6716804504394531, 2.855123519897461, -0.5314178466796875, 3.275634765625, -9.272560119628906, 5.285730361938477, 7.12164306640625, 1.6753692626953125, 8.907903671264648, 2.3300399780273438, 8.751890182495117, 2.0090560913085938, 4.60491943359375, 1.6776924133300781, 7.4679412841796875, 6.132709503173828, 1.51336669921875, 0.45697021484375, 0.9783916473388672, 3.3233699798583984, 8.776657104492188, -5.513298034667969, 0.7289810180664062, 5.8924560546875, 2.0964622497558594, 3.6176319122314453, -1.2317657470703125, 4.691162109375, 0.36213111877441406, -10.748748779296875, -6.399702072143555, -3.5431289672851562, 1.9950790405273438, 2.5864486694335938, 0.295135498046875, 9.016311645507812, 6.2010955810546875, -1.9786453247070312, 3.1005096435546875, 3.22784423828125, -5.524162292480469, 3.0977439880371094, 0.8582077026367188, 6.877044677734375, -5.889829635620117, -0.4228057861328125, 2.5968589782714844, -8.574737548828125, 1.6893386840820312, 3.0981063842773438, 13.961490631103516, -1.3327388763427734, 4.221668243408203, -0.4463386535644531, -6.3984527587890625, 7.383056640625, 4.145210266113281, 9.062255859375, -3.3889846801757812, 8.736541748046875, 1.0051918029785156, 17.423233032226562, 6.491050720214844, 20.99969482421875, 6.072879791259766, 5.051025390625, 1.6300239562988281, 0.639251708984375, 0.3782615661621094, 1.7279205322265625, 1.9850921630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000114.npy"}
{"epoch": 0.34545454545454546, "step": 115, "batch_size": 128, "mean": 2.1154322624206543, "std": 5.4158830642700195, "min": -23.376739501953125, "p10": -3.3197490692138674, "median": 2.2917299270629883, "p90": 7.716845893859862, "max": 18.68524169921875, "pos_frac": 0.703125, "sample": [2.889434814453125, 4.107570648193359, 1.2260055541992188, 1.9696235656738281, -2.1814804077148438, 5.588615417480469, 7.241828918457031, 1.3134002685546875, 3.583221435546875, -3.32415771484375, 4.306224822998047, 5.5, 7.179645538330078, 0.24474716186523438, 2.4292240142822266, 8.549476623535156, -7.915840148925781, 4.2297821044921875, 3.765960693359375, 0.47309112548828125, 8.869880676269531, -3.167236328125, -0.32975006103515625, 7.0254364013671875, 0.6890106201171875, 3.3612537384033203, 1.0310287475585938, -3.0263824462890625, 7.020336151123047, -1.6304550170898438, 4.087425231933594, 5.058441162109375, 2.0884342193603516, 2.4794654846191406, -1.3488693237304688, 8.0496826171875, 5.2054901123046875, -3.0947036743164062, 7.07080078125, 6.101774215698242, -8.492691040039062, 5.1330718994140625, -10.003372192382812, -3.317859649658203, -0.5381488800048828, -3.1415061950683594, 8.442344665527344, 16.4293212890625, 10.368934631347656, 2.511432647705078, -1.5365486145019531, -2.2408523559570312, 5.137317657470703, 1.59423828125, 7.574201583862305, 2.6286563873291016, -2.6489028930664062, 0.9497661590576172, 10.255197525024414, 13.1676025390625, 6.343467712402344, 1.3760185241699219, -0.021038055419921875, -3.9430694580078125, -3.419689178466797, 6.1300811767578125, 4.938207626342773, 0.1839752197265625, 4.649444580078125, -23.376739501953125, 3.1106510162353516, 1.2785110473632812, 4.700092315673828, 4.4457550048828125, -7.969169616699219, -0.8458023071289062, -7.847377777099609, 1.2402610778808594, 0.888458251953125, 1.2514762878417969, -2.3987998962402344, 2.855602264404297, -3.0948715209960938, -5.7609405517578125, 2.420360565185547, 3.4147300720214844, 18.68524169921875, 1.0340957641601562, -3.1388015747070312, -2.982166290283203, 11.362106323242188, -8.0831298828125, 5.247215270996094, 6.034215927124023, 3.395536422729492, 1.0945510864257812, -2.7800521850585938, 2.7702255249023438, 5.152748107910156, 5.563896179199219, 5.239051818847656, 5.3040618896484375, 1.9998397827148438, 1.2337989807128906, 6.250724792480469, -1.3011322021484375, 5.041839599609375, -4.4452362060546875, 4.287956237792969, 9.476570129394531, 1.0158863067626953, 1.0957260131835938, 4.141155242919922, 3.4562149047851562, -0.24100875854492188, 0.8640975952148438, 15.511894226074219, 6.7041778564453125, -5.693305969238281, 4.666656494140625, 2.896730422973633, 2.0783214569091797, 2.1630992889404297, -0.5598068237304688, 0.1440105438232422, 8.604324340820312, -0.50189208984375, -1.5533218383789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000115.npy"}
{"epoch": 0.3484848484848485, "step": 116, "batch_size": 128, "mean": 2.3912806510925293, "std": 5.715372562408447, "min": -8.609390258789062, "p10": -4.1301734924316404, "median": 1.7125091552734375, "p90": 9.130580902099608, "max": 21.8211669921875, "pos_frac": 0.671875, "sample": [21.8211669921875, 2.4469070434570312, 7.79815673828125, -2.594818115234375, 2.8334579467773438, 4.7067413330078125, 1.3829803466796875, 9.6749267578125, 0.8519439697265625, 0.3275566101074219, 14.77978515625, -1.6653671264648438, 4.2209014892578125, -3.62628173828125, 4.9844207763671875, 7.593040466308594, 1.4487228393554688, 5.1428985595703125, 6.169092178344727, -6.461029052734375, 0.6363697052001953, -0.40199851989746094, 1.168853759765625, -5.849678039550781, 0.8398208618164062, -4.371978759765625, 3.540546417236328, 7.682220458984375, 3.2671585083007812, 5.208885192871094, 5.241359710693359, 9.371360778808594, 2.6343536376953125, 0.1758251190185547, 2.3532848358154297, 2.6193389892578125, 5.527923583984375, 6.399772644042969, -8.609390258789062, -3.577880859375, 3.511760711669922, 12.899887084960938, 0.47603607177734375, -4.751630783081055, 8.206729888916016, 6.42851448059082, 5.7555694580078125, -1.7169647216796875, 11.060195922851562, -0.329986572265625, 0.9144420623779297, -0.38747406005859375, 11.335357666015625, -0.14576148986816406, -0.719207763671875, -3.422780990600586, 1.805694580078125, 2.7058849334716797, -4.9019317626953125, 14.037551879882812, -7.60345458984375, -1.0156269073486328, 7.794036865234375, 7.649259567260742, -0.9929351806640625, 5.288890838623047, 3.246856689453125, 14.160713195800781, 1.850687026977539, -1.324188232421875, 0.37808990478515625, 8.166240692138672, -1.3017444610595703, -1.7131214141845703, 2.9617462158203125, 3.678081512451172, -4.6077728271484375, 5.9695281982421875, -3.1045074462890625, 1.0563812255859375, -2.639677047729492, -0.30841636657714844, 1.61932373046875, -4.0172119140625, 0.5734138488769531, -3.5297164916992188, 10.209548950195312, 0.9711227416992188, -3.1373329162597656, 1.0597858428955078, 6.0332489013671875, 2.3330078125, 0.7394561767578125, 3.847747802734375, 2.4757614135742188, -4.026542663574219, -6.483699798583984, -6.621055603027344, -5.6296234130859375, 5.970304489135742, -1.5156097412109375, 3.1096420288085938, -1.93682861328125, 4.434535980224609, 4.2302398681640625, 4.097713470458984, -8.075719833374023, 7.476476669311523, -1.0531005859375, 9.027389526367188, 1.2880859375, 3.38262939453125, -2.1735877990722656, 13.783203125, -3.264404296875, 21.390960693359375, 2.9406890869140625, 19.204498291015625, -3.3136043548583984, 0.649169921875, 0.46137046813964844, 6.3789520263671875, 0.6777267456054688, 2.8886337280273438, 0.520965576171875, -7.867095947265625, 7.803619384765625, 3.107524871826172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000116.npy"}
{"epoch": 0.3515151515151515, "step": 117, "batch_size": 128, "mean": 2.295393943786621, "std": 5.261518478393555, "min": -10.272083282470703, "p10": -4.648223114013672, "median": 1.7917518615722656, "p90": 8.521080780029296, "max": 19.895553588867188, "pos_frac": 0.671875, "sample": [11.879257202148438, 2.657867431640625, 7.197486877441406, -0.5401992797851562, 5.005859375, 7.239385604858398, 4.470550537109375, 5.050533294677734, 2.8772411346435547, -1.3187179565429688, 3.5984420776367188, 0.7948894500732422, 11.92633056640625, 6.142478942871094, 4.287624359130859, 0.653656005859375, 9.750411987304688, 5.455654144287109, -1.692596435546875, -4.3458099365234375, 8.913223266601562, 4.14158821105957, 4.649234771728516, -0.045581817626953125, 5.292610168457031, 6.139707565307617, 1.808746337890625, -1.9276618957519531, 1.404348373413086, 4.62286376953125, 8.353019714355469, 10.878143310546875, -1.6361312866210938, -3.301910400390625, 0.5878143310546875, 11.965118408203125, 0.66571044921875, 0.3142433166503906, 0.1674957275390625, 4.618255615234375, -4.587127685546875, 4.034248352050781, -2.9298553466796875, 1.9889106750488281, -8.021438598632812, -7.443702697753906, 7.837158203125, -1.2095279693603516, 7.763084411621094, -5.292915344238281, 19.895553588867188, -0.46971893310546875, 1.1261367797851562, -1.9799728393554688, -1.9379730224609375, -5.4114990234375, 1.671792984008789, -6.027069091796875, 10.265396118164062, -3.596649169921875, -1.1550750732421875, -0.9824066162109375, -1.5858306884765625, 7.971748352050781, -5.1937713623046875, 6.27178955078125, 4.236175537109375, -7.70245361328125, 2.6234512329101562, -1.376007080078125, 1.9002418518066406, -4.790779113769531, 3.9476547241210938, -4.814849853515625, 10.802902221679688, -2.3010406494140625, -0.16170120239257812, 1.7624969482421875, 6.086709976196289, 13.667755126953125, 4.1727447509765625, 6.454418182373047, 1.0449275970458984, -4.8803253173828125, 5.07464599609375, -1.2000579833984375, 2.9084739685058594, 0.00742340087890625, 1.3991851806640625, 0.9942855834960938, -8.425949096679688, 5.95103645324707, -10.272083282470703, 11.825325012207031, -2.4287281036376953, -2.7312793731689453, 7.1151885986328125, -3.4501800537109375, 6.871173858642578, 1.8276863098144531, 6.9887847900390625, 10.114675521850586, 6.1221923828125, 2.5617218017578125, -1.4458560943603516, -3.9232711791992188, 5.21875, 1.3422698974609375, -7.90234375, 2.232769012451172, 7.1915130615234375, 8.217620849609375, 0.5389175415039062, 9.917098999023438, 1.7747573852539062, 6.2819671630859375, 6.219438552856445, 0.7438812255859375, 1.5983085632324219, 8.336494445800781, 0.17862701416015625, 4.911609649658203, -2.425750732421875, 4.041507720947266, -0.343994140625, 8.225894927978516, 0.3304100036621094, 0.9214935302734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000117.npy"}
{"epoch": 0.35454545454545455, "step": 118, "batch_size": 128, "mean": 2.4072954654693604, "std": 5.830267429351807, "min": -19.40179443359375, "p10": -3.0255971908569337, "median": 1.869110107421875, "p90": 9.503053665161133, "max": 20.915374755859375, "pos_frac": 0.6875, "sample": [0.5137138366699219, 9.337196350097656, 4.101959228515625, 5.257242202758789, 12.911941528320312, -5.137443542480469, -2.3837661743164062, 0.36346435546875, 2.3503875732421875, 0.4315376281738281, -1.1257266998291016, 1.1776542663574219, -1.191436767578125, 4.445934295654297, -12.486968994140625, 3.8655014038085938, 7.940889358520508, -4.03436279296875, 5.275749206542969, 2.8920211791992188, 10.106903076171875, 1.87945556640625, 11.984466552734375, -0.2340850830078125, 9.525070190429688, 2.492490768432617, 1.3644790649414062, -1.905609130859375, 6.12359619140625, 1.139129638671875, 5.013816833496094, -1.2533607482910156, 1.573944091796875, -2.72821044921875, 1.541107177734375, 3.4481277465820312, 1.9330558776855469, 1.1749305725097656, -1.5008926391601562, 4.329549789428711, 7.419166564941406, 5.496437072753906, -0.20684432983398438, -5.4768829345703125, 9.49361801147461, 0.3538684844970703, 5.029876708984375, -0.5456485748291016, 1.332509994506836, 5.806528091430664, 0.2282867431640625, 3.899860382080078, 6.5437164306640625, 0.8961830139160156, -0.5938491821289062, 1.5346031188964844, 10.0325927734375, 2.0217208862304688, 9.816543579101562, 1.8587646484375, 10.419319152832031, 0.7971839904785156, -2.1614761352539062, -0.3990745544433594, 2.4880142211914062, -1.8709335327148438, -0.035648345947265625, 4.106132507324219, 6.932231903076172, 5.5034942626953125, 7.574893951416016, 14.417617797851562, 1.7254829406738281, 5.5123443603515625, 12.205978393554688, 9.156850814819336, -2.6222915649414062, -1.359771728515625, 1.4762420654296875, -3.6403465270996094, -14.7926025390625, 6.843658447265625, 8.69720458984375, 3.6222991943359375, -4.779144287109375, 20.915374755859375, -0.9483909606933594, 0.5848026275634766, 8.341217041015625, 0.4483222961425781, -0.4853363037109375, -13.506912231445312, -19.40179443359375, 4.361705780029297, 11.780624389648438, 7.739067077636719, 0.598846435546875, 5.932271957397461, 4.105583190917969, 7.287872314453125, 0.05941581726074219, -2.5559921264648438, -2.320354461669922, -1.3154449462890625, 4.060089111328125, 8.050132751464844, 3.4962215423583984, 4.35748291015625, 9.270931243896484, 2.6221694946289062, 5.446311950683594, -5.28533935546875, -3.0101871490478516, -1.2179031372070312, 9.528160095214844, 8.768779754638672, -3.061553955078125, 1.6051597595214844, 7.759559631347656, -12.194023132324219, 13.254852294921875, -1.5803070068359375, -0.68798828125, -1.8546028137207031, 0.472198486328125, -4.817604064941406, 2.0743331909179688, 4.1838836669921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000118.npy"}
{"epoch": 0.3575757575757576, "step": 119, "batch_size": 128, "mean": 2.4413509368896484, "std": 5.183896064758301, "min": -11.661613464355469, "p10": -3.6299102783203123, "median": 2.4382753372192383, "p90": 8.633446502685548, "max": 18.352737426757812, "pos_frac": 0.7109375, "sample": [6.138980865478516, 4.441986083984375, 5.06646728515625, 8.73552131652832, 4.618568420410156, 0.1703033447265625, 3.4537620544433594, 4.454013824462891, -2.4804744720458984, 3.0170516967773438, -2.9079132080078125, 1.87713623046875, -11.661613464355469, 3.278400421142578, 8.292633056640625, -1.8144683837890625, 4.351963043212891, -5.20452880859375, 0.2674713134765625, -3.3739280700683594, 4.717185974121094, -6.236473083496094, 16.120895385742188, -2.927583694458008, 0.0218505859375, 4.0324554443359375, 1.2722358703613281, -0.8667831420898438, 2.6836013793945312, 18.352737426757812, -3.527801513671875, 1.0957984924316406, 2.0697975158691406, 4.6451263427734375, 3.8837738037109375, 2.7371597290039062, -8.614997863769531, -1.2945785522460938, -0.944580078125, 8.651321411132812, 13.794845581054688, -4.624773025512695, 6.22650146484375, 4.848745346069336, -10.285964965820312, -1.3146896362304688, 3.9550437927246094, -1.66912841796875, 5.211894989013672, 0.7506790161132812, 3.5484848022460938, -2.6850967407226562, 5.9488983154296875, -0.5361785888671875, 7.368904113769531, 5.1947021484375, 0.397796630859375, 7.145454406738281, 2.689922332763672, 4.8566131591796875, 0.1733245849609375, -1.8945236206054688, -6.192535400390625, 7.845127105712891, 0.333740234375, 5.7818756103515625, 6.4312591552734375, -5.328399658203125, 5.004371643066406, 14.311954498291016, 0.611236572265625, 0.7211227416992188, 1.0873088836669922, -3.8681640625, 3.6764755249023438, 7.25456428527832, 1.9031143188476562, 9.910736083984375, 1.2887763977050781, 2.389629364013672, 5.923467636108398, 4.321128845214844, 0.2359638214111328, 2.3471240997314453, 8.048980712890625, 1.0586204528808594, 4.248603820800781, 9.012954711914062, 1.8387298583984375, -0.839874267578125, 3.785980224609375, 5.698276519775391, 7.860954284667969, -9.513801574707031, -1.3260650634765625, 1.9139518737792969, 8.176620483398438, 3.2233619689941406, -4.324668884277344, -1.171295166015625, 5.113311767578125, 1.38433837890625, 4.815610885620117, 5.526897430419922, 1.356222152709961, 4.433378219604492, 6.452323913574219, -1.5220870971679688, -1.5217628479003906, -1.1405792236328125, 8.713722229003906, 4.907070159912109, 1.4676055908203125, -2.141864776611328, -3.4209442138671875, -3.030853271484375, 8.925025939941406, 2.0771865844726562, 10.759048461914062, 10.18487548828125, -4.9864654541015625, 8.625785827636719, 6.878948211669922, 0.4150238037109375, 13.928802490234375, -0.8649749755859375, -6.704780578613281, 2.4869213104248047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000119.npy"}
{"epoch": 0.3606060606060606, "step": 120, "batch_size": 128, "mean": 3.637329578399658, "std": 6.079004764556885, "min": -15.160736083984375, "p10": -3.1869792938232413, "median": 3.9618301391601562, "p90": 11.027996826171876, "max": 19.19879150390625, "pos_frac": 0.6953125, "sample": [4.538364410400391, 3.98638916015625, 4.68896484375, -0.8494834899902344, 11.831581115722656, 0.3733673095703125, -1.6001510620117188, 0.4204254150390625, -1.7284164428710938, -6.779674530029297, 8.06866455078125, 15.15264892578125, -1.8386421203613281, 6.7145538330078125, 6.825862884521484, 1.608612060546875, -2.9595565795898438, -6.184473037719727, 1.3700408935546875, 10.822860717773438, -2.3354549407958984, 4.907012939453125, -2.551239013671875, 9.0533447265625, 9.443923950195312, -1.295684814453125, -1.1199893951416016, 0.7144012451171875, -3.717632293701172, 3.9372711181640625, 6.562591552734375, 6.046226501464844, -6.358551025390625, 5.500823974609375, -12.586349487304688, -0.02838897705078125, -0.33890533447265625, 11.114151000976562, 3.7474517822265625, 6.0045623779296875, 14.792030334472656, 1.3173599243164062, 6.238008499145508, 6.794849395751953, 4.099803924560547, 4.094120025634766, 6.815666198730469, -1.8020362854003906, -0.24143600463867188, 5.730865478515625, 4.323631286621094, 12.834991455078125, -0.5977973937988281, 5.109371185302734, 9.016342163085938, 19.19879150390625, 4.181556701660156, 0.4059581756591797, 5.1465606689453125, 17.595779418945312, -0.3191986083984375, 2.3608245849609375, 1.2193832397460938, -1.2101898193359375, 7.989448547363281, -5.047386169433594, 15.005950927734375, -4.059242248535156, 6.5039520263671875, 9.008193969726562, -4.7694091796875, 8.040847778320312, 6.698417663574219, -3.9206085205078125, 7.296909332275391, 8.111373901367188, -1.4423980712890625, 8.541000366210938, 0.7768363952636719, -15.160736083984375, 4.0522918701171875, -9.560791015625, -1.7157211303710938, -1.438201904296875, 1.7044944763183594, 14.879913330078125, 10.235389709472656, 0.8386020660400391, -0.7018985748291016, 1.673980712890625, 3.0190162658691406, 4.475080490112305, 0.6046676635742188, 5.3970794677734375, -2.5121536254882812, 1.1173896789550781, -3.8124542236328125, 1.6535758972167969, 1.55029296875, -0.5056953430175781, 5.647773742675781, 9.925518035888672, 13.859901428222656, 3.293548583984375, 9.607383728027344, 17.5623779296875, 3.6723175048828125, 8.083602905273438, 12.3319091796875, 4.1541748046875, 9.335838317871094, 11.011489868164062, 10.4453125, 4.2543182373046875, -0.71337890625, 11.066513061523438, 10.953804016113281, -6.626579284667969, 1.0598487854003906, -1.5861053466796875, 8.856346130371094, 10.165435791015625, -0.43115997314453125, 4.084247589111328, 8.263748168945312, -0.573394775390625, 1.9505081176757812, 3.128143310546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000120.npy"}
{"epoch": 0.36363636363636365, "step": 121, "batch_size": 128, "mean": 2.7659506797790527, "std": 5.317994117736816, "min": -10.101318359375, "p10": -3.2824283599853508, "median": 2.1609344482421875, "p90": 10.288151168823239, "max": 23.421218872070312, "pos_frac": 0.703125, "sample": [-3.10797119140625, 3.0551834106445312, 4.761358261108398, -3.689495086669922, 1.4470062255859375, 4.9826812744140625, -0.5806388854980469, 3.45123291015625, 1.0967559814453125, 3.6614418029785156, -0.0162811279296875, 3.7722930908203125, 4.017276763916016, 1.5112152099609375, -5.7564697265625, 0.116455078125, 12.850830078125, -0.12436294555664062, -2.236175537109375, -6.4712677001953125, 1.113189697265625, 12.196441650390625, 3.4097061157226562, 1.1228256225585938, 0.8851470947265625, -0.7309799194335938, 14.130340576171875, 0.74615478515625, -0.6586151123046875, 7.88861083984375, 4.21422004699707, -4.2015228271484375, 4.35841178894043, 11.577957153320312, 1.4195709228515625, 5.5612335205078125, -3.085268020629883, -1.0430145263671875, 5.40692138671875, 1.6284713745117188, 5.5724334716796875, -1.6557292938232422, 7.34234619140625, -0.41779136657714844, -2.1851119995117188, -6.140144348144531, 5.5382537841796875, 9.54498291015625, -3.7557449340820312, -0.9121856689453125, -0.1895275115966797, 11.186614990234375, 2.692781448364258, 0.9835090637207031, -10.101318359375, 12.115737915039062, -0.9876251220703125, 6.061595916748047, 4.579978942871094, 0.9648876190185547, -1.2985343933105469, 3.5847015380859375, 0.18129348754882812, 0.1443195343017578, -2.2838306427001953, 5.412178039550781, 9.518951416015625, 4.602439880371094, 0.742095947265625, -2.2177352905273438, 9.903095245361328, 4.593441009521484, 11.391374588012695, -4.2921905517578125, 0.21312713623046875, 2.560028076171875, 2.3074493408203125, -0.49956512451171875, 9.059362411499023, 2.3587570190429688, 18.329795837402344, 0.4685802459716797, 7.299654006958008, 2.0144195556640625, 0.110443115234375, -2.4845123291015625, 23.421218872070312, -0.2550773620605469, 6.430732727050781, 3.744546890258789, 11.727958679199219, 4.947301864624023, 12.657600402832031, 1.6194801330566406, -0.4660472869873047, 7.008720397949219, 5.233295440673828, 5.2557220458984375, 1.7024917602539062, 4.128816604614258, 0.08389854431152344, 14.116836547851562, 3.87982177734375, -5.36151123046875, 2.4125595092773438, 1.7287750244140625, -3.828399658203125, 4.327236175537109, 4.220272064208984, 7.0428466796875, 4.487064361572266, 0.8106346130371094, -4.665672302246094, 11.852783203125, -1.9236984252929688, 0.831787109375, 6.302131652832031, 0.23511123657226562, 4.078943252563477, -1.4133148193359375, 3.342071533203125, 2.6243152618408203, 5.735237121582031, -9.51092529296875, 6.903289794921875, -0.4145622253417969, -5.121002197265625, 3.4704341888427734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000121.npy"}
{"epoch": 0.36666666666666664, "step": 122, "batch_size": 128, "mean": 2.6359596252441406, "std": 4.9251298904418945, "min": -12.166534423828125, "p10": -3.149394989013672, "median": 2.615309715270996, "p90": 8.722308349609374, "max": 15.482192993164062, "pos_frac": 0.7421875, "sample": [3.26708984375, 5.922794342041016, -6.698631286621094, -7.360221862792969, 4.699378967285156, 4.127655029296875, -6.3413543701171875, 3.1297531127929688, 0.7363548278808594, -3.111175537109375, 6.944881439208984, 5.177154541015625, -1.8422775268554688, 11.42718505859375, 4.3408203125, -1.5304450988769531, -2.4082794189453125, -7.16363525390625, 2.982217788696289, 3.646787643432617, 5.09991455078125, 5.379009246826172, -12.166534423828125, 3.205230712890625, 8.476024627685547, 8.827774047851562, 1.7521820068359375, 4.372528076171875, 1.8858871459960938, 6.587944030761719, 0.17137527465820312, 5.48577880859375, 2.938751220703125, -3.1216201782226562, 6.6016845703125, 1.49176025390625, -3.214202880859375, 2.2081146240234375, 0.4290771484375, 4.6304931640625, -5.760307312011719, 5.587551116943359, -0.5995330810546875, 7.368722915649414, 5.453315734863281, 6.312675476074219, 0.7964019775390625, -6.172283172607422, 11.052169799804688, -0.48023223876953125, 5.886852264404297, 8.677108764648438, 0.209991455078125, -2.3735923767089844, -3.7730064392089844, 3.127267837524414, -3.549083709716797, 4.264434814453125, 0.21590614318847656, -1.8217315673828125, -7.6607818603515625, 5.58843994140625, 2.3390121459960938, 6.638816833496094, 4.824699401855469, 10.968505859375, 6.0918121337890625, -2.2630462646484375, 2.579345703125, 1.8936996459960938, -2.239076614379883, 4.2138824462890625, 4.805385589599609, 12.282173156738281, -1.4683837890625, 5.893548965454102, 12.502944946289062, 2.7042617797851562, -2.9718074798583984, 5.106914520263672, 0.9654083251953125, 0.5108489990234375, 0.7739486694335938, 0.6369247436523438, -0.7225322723388672, 0.5625839233398438, 13.900115966796875, 15.482192993164062, 2.291473388671875, 2.3627853393554688, 2.6208553314208984, -2.1968154907226562, -0.481201171875, 5.709598541259766, 9.65203857421875, 3.71319580078125, 3.158304214477539, 0.9627799987792969, 0.7815055847167969, 7.651828765869141, -0.813232421875, 5.2545166015625, 0.8322982788085938, -5.972442626953125, 1.3892364501953125, 9.80120849609375, 4.991090774536133, 6.621284484863281, 10.961349487304688, 4.32484245300293, 13.950614929199219, -4.703544616699219, -1.4786243438720703, 12.512908935546875, 2.6097640991210938, -1.37725830078125, 2.2845916748046875, 2.431446075439453, 1.8716812133789062, 7.037193298339844, -2.44598388671875, 1.42706298828125, 2.8152389526367188, 2.664348602294922, 1.8932876586914062, 2.221170425415039, 3.1126022338867188, 7.61016845703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000122.npy"}
{"epoch": 0.3696969696969697, "step": 123, "batch_size": 128, "mean": 3.657209634780884, "std": 5.899392127990723, "min": -12.452865600585938, "p10": -3.191875457763671, "median": 3.7956180572509766, "p90": 10.771951293945312, "max": 23.555877685546875, "pos_frac": 0.75, "sample": [0.88311767578125, 8.114471435546875, 5.127769470214844, 13.819778442382812, 2.4636592864990234, -0.9307041168212891, 1.4920883178710938, 4.5760650634765625, -0.6076431274414062, 8.328897476196289, -11.694259643554688, 15.368331909179688, 15.257942199707031, 6.119209289550781, 9.36529541015625, 4.585941314697266, 2.086864471435547, 3.3323516845703125, -5.287315368652344, 1.1679611206054688, 1.793914794921875, -1.363973617553711, 4.832050323486328, 17.11737823486328, -5.820068359375, -1.4555206298828125, 0.4152374267578125, 2.7747879028320312, 5.63189697265625, 4.996702194213867, 7.146568298339844, 2.341339111328125, 3.9972000122070312, 0.47467803955078125, -2.1939697265625, 5.59906005859375, 4.291265487670898, 3.6443557739257812, 6.559711456298828, 3.9857120513916016, 5.896617889404297, 11.190780639648438, 7.98638916015625, -0.2765636444091797, 5.8424835205078125, 1.076131820678711, 11.397956848144531, -2.9683151245117188, 2.7986984252929688, 2.090179443359375, 7.545478820800781, 4.1493377685546875, -4.0879669189453125, 1.2378501892089844, 12.994241714477539, 9.097949981689453, -6.301948547363281, 5.654653549194336, 1.27032470703125, -0.3019695281982422, 3.1119842529296875, 8.024002075195312, 2.2898330688476562, 5.641815185546875, 2.7659034729003906, 7.155941009521484, 9.404224395751953, 8.942890167236328, 4.552581787109375, 3.946880340576172, -4.4379119873046875, 5.1258087158203125, 4.340995788574219, -6.245601654052734, -3.9090576171875, -1.204833984375, 10.681671142578125, 8.209070205688477, -2.1279296875, 7.067104339599609, 4.203529357910156, 0.3948326110839844, -0.2998046875, 3.282989501953125, -6.437671661376953, 11.474773406982422, 5.6244049072265625, -2.2651824951171875, -0.0207977294921875, -12.452865600585938, 0.1092987060546875, 2.6556873321533203, 2.4632301330566406, 16.219886779785156, 7.376747131347656, 5.484256744384766, 0.6104316711425781, -1.7780647277832031, 7.5801239013671875, 8.411865234375, 2.1770248413085938, -3.7135162353515625, -2.179229736328125, 3.5700111389160156, 22.491775512695312, 5.08203125, 6.0430450439453125, 23.555877685546875, 0.27849578857421875, 8.172119140625, 1.3605461120605469, 6.078189849853516, 7.729209899902344, -4.451057434082031, 2.1002883911132812, 6.7887725830078125, -1.9947376251220703, -9.080856323242188, 9.792152404785156, -0.34771728515625, 4.247108459472656, 12.441970825195312, 1.662322998046875, 10.98260498046875, 4.470357894897461, -2.2598724365234375, -1.7556190490722656, 4.278038024902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000123.npy"}
{"epoch": 0.37272727272727274, "step": 124, "batch_size": 128, "mean": 1.502960205078125, "std": 5.727512836456299, "min": -16.563201904296875, "p10": -5.2342288970947255, "median": 1.1261920928955078, "p90": 8.621133804321289, "max": 16.64422607421875, "pos_frac": 0.609375, "sample": [-5.833625793457031, 4.3141326904296875, 0.0483856201171875, 3.4020538330078125, 5.338260650634766, -2.4230270385742188, -2.576080322265625, 4.9732666015625, 3.4058990478515625, 9.171379089355469, -1.4308452606201172, -6.992584228515625, 11.288116455078125, 9.081428527832031, -9.414501190185547, -1.409637451171875, -2.1089324951171875, 2.0758438110351562, 5.4494476318359375, -2.1196956634521484, 4.211757659912109, -15.561286926269531, -5.974723815917969, 0.04053497314453125, 5.089649200439453, 0.0414581298828125, -0.0276947021484375, 6.511615753173828, -0.0027313232421875, 4.522060394287109, -0.7785110473632812, 1.7555732727050781, 9.180191040039062, 1.4751434326171875, 12.0477294921875, 3.8211822509765625, 1.3617706298828125, 1.431844711303711, 1.577545166015625, 13.895965576171875, 2.1215896606445312, -3.4508209228515625, -0.33112525939941406, -2.141254425048828, 0.48517608642578125, 0.5199203491210938, -1.0103988647460938, -2.332630157470703, -0.6437931060791016, 0.192138671875, -8.459465026855469, 6.533958435058594, -0.7764892578125, -0.8651466369628906, -4.014289855957031, -2.03570556640625, 0.9453315734863281, 1.5586071014404297, -3.232666015625, -8.63922119140625, -1.770233154296875, 8.059730529785156, 5.591773986816406, 2.761007308959961, 4.334678649902344, 6.10626220703125, -9.5426025390625, -16.563201904296875, 8.422332763671875, -1.24786376953125, 7.1760101318359375, 4.54296875, 16.64422607421875, -6.071258544921875, -4.977344512939453, -0.7894363403320312, -1.462615966796875, -0.08980178833007812, -1.347412109375, 2.32568359375, -0.8186893463134766, -2.6493911743164062, 8.04079818725586, 5.487035751342773, 7.030242919921875, 2.0272750854492188, 2.359254837036133, -0.19101715087890625, 3.091585159301758, -4.136772155761719, 13.70751953125, 1.4341392517089844, 8.554183959960938, 2.2735748291015625, -3.0027084350585938, 0.5974235534667969, 0.5850811004638672, -1.1864871978759766, -0.46787452697753906, 9.076663970947266, 7.62542724609375, -0.8307952880859375, 8.616104125976562, 5.535152435302734, 3.7187957763671875, 1.09271240234375, 0.6322956085205078, 0.20515060424804688, 3.400249481201172, 8.632869720458984, -10.499588012695312, 10.512565612792969, 0.5055160522460938, 6.85919189453125, 0.74176025390625, 12.298576354980469, 12.19830322265625, 5.207130432128906, 2.8923683166503906, 1.1596717834472656, -10.016754150390625, 7.272151947021484, 7.616180419921875, 1.3987808227539062, -0.6739997863769531, 1.9158973693847656, -10.835952758789062, -1.9976806640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000124.npy"}
{"epoch": 0.37575757575757573, "step": 125, "batch_size": 128, "mean": 1.987923264503479, "std": 6.5134968757629395, "min": -19.487884521484375, "p10": -5.254790878295898, "median": 2.274923324584961, "p90": 8.264184570312498, "max": 25.123138427734375, "pos_frac": 0.65625, "sample": [1.6846542358398438, -2.577657699584961, 9.010017395019531, 0.3279876708984375, -0.9256973266601562, -8.551116943359375, -4.524742126464844, 0.6284332275390625, 5.019462585449219, -12.868438720703125, -1.30096435546875, -3.9609298706054688, 2.934762954711914, -0.18819427490234375, -1.3991546630859375, 3.1220779418945312, 0.20744705200195312, -1.9954032897949219, 5.876873016357422, -7.287139892578125, 4.057975769042969, -2.62103271484375, 2.040546417236328, -13.835205078125, 3.3395423889160156, 6.9957733154296875, 2.4603919982910156, 4.281272888183594, 4.85260009765625, 1.6140022277832031, 6.455230712890625, 7.042083740234375, 11.05242919921875, 0.1281585693359375, 3.1284637451171875, 0.9616661071777344, 2.3435821533203125, -0.3644866943359375, -10.542221069335938, -11.748214721679688, -0.36882781982421875, 4.691307067871094, 0.8550319671630859, -2.2399978637695312, 7.0126495361328125, 3.021270751953125, -8.4403076171875, 5.512994766235352, 5.073841094970703, 9.63212776184082, 3.0867385864257812, 7.902305603027344, 4.5942840576171875, 3.06689453125, -5.6805572509765625, 8.66042709350586, -2.9376068115234375, 3.5392112731933594, 6.876285552978516, -15.819549560546875, 3.206745147705078, -3.0635643005371094, -0.1693878173828125, 6.0319366455078125, 5.460578918457031, -0.9404144287109375, -1.828908920288086, 4.3472137451171875, 8.9927978515625, -2.0839309692382812, 5.909828186035156, 10.726608276367188, -0.93463134765625, -19.487884521484375, 6.136089324951172, 4.19390869140625, 2.2062644958496094, -7.408212661743164, 6.478366851806641, 0.2028045654296875, 6.086477279663086, -0.3098716735839844, 5.648763656616211, 1.2816810607910156, 1.8538055419921875, 5.752418518066406, 18.227867126464844, -2.2174224853515625, 8.125358581542969, 3.429410934448242, -0.479095458984375, -4.7893524169921875, 4.586753845214844, 1.5345897674560547, -5.4681549072265625, 4.128456115722656, 5.320465087890625, 0.103607177734375, 8.588111877441406, -1.0824508666992188, 7.7895050048828125, 0.8404388427734375, 6.833885192871094, -0.29205322265625, -1.8007678985595703, 4.068023681640625, 11.85940170288086, 6.728704452514648, -1.67388916015625, 14.256416320800781, -5.4252166748046875, 0.20072174072265625, -5.181751251220703, -4.427101135253906, 5.3098907470703125, -3.6208724975585938, 14.536376953125, 1.0353584289550781, 24.198455810546875, 4.097919464111328, 25.123138427734375, 1.8145294189453125, 4.936470031738281, 0.2199878692626953, -2.2515716552734375, 4.338111877441406, 3.375844955444336, 6.353237152099609], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000125.npy"}
{"epoch": 0.3787878787878788, "step": 126, "batch_size": 128, "mean": 2.434814453125, "std": 6.032659530639648, "min": -16.8807373046875, "p10": -3.5042045593261717, "median": 1.7925634384155273, "p90": 10.035765647888182, "max": 23.312896728515625, "pos_frac": 0.6796875, "sample": [1.3840503692626953, 9.821235656738281, 13.599464416503906, -0.3756828308105469, 1.578399658203125, 0.5478172302246094, -3.66552734375, 7.429901123046875, 2.0876598358154297, 9.26190185546875, 0.9842643737792969, -12.66925048828125, 2.4344444274902344, 11.13882064819336, 5.480899810791016, -2.8614501953125, -0.06458282470703125, 3.024538040161133, 5.285087585449219, 4.2556915283203125, 6.531368255615234, 2.4935150146484375, -2.2635955810546875, 0.04900932312011719, 9.58258056640625, 12.698379516601562, 11.274765014648438, -1.3895263671875, 6.1780853271484375, 0.3414764404296875, 0.4687957763671875, -2.583881378173828, -0.5548858642578125, 17.581405639648438, -4.54827880859375, -8.325752258300781, 4.979705810546875, -2.1623382568359375, 0.9839382171630859, -3.4350662231445312, 1.7205772399902344, -3.059864044189453, 7.6566619873046875, 1.844635009765625, -0.29754638671875, -1.8586692810058594, 6.592700958251953, 1.7495193481445312, 2.634174346923828, 1.7338714599609375, 7.4467010498046875, 10.305177688598633, 3.5123062133789062, -2.9244766235351562, 5.8578338623046875, 3.7737998962402344, 3.624826431274414, -10.17156982421875, 0.7689971923828125, -1.6004562377929688, -2.1753807067871094, 5.705078125, 6.5675506591796875, 1.8356075286865234, -2.6751785278320312, -9.916015625, -5.085319519042969, -16.8807373046875, 10.969749450683594, -0.8995285034179688, 6.1380157470703125, 23.312896728515625, 11.59515380859375, 0.7044715881347656, -3.7409591674804688, 0.5650615692138672, -2.556276321411133, 2.3971214294433594, 12.398147583007812, 6.997379302978516, 5.12244987487793, 1.9106597900390625, -1.6706390380859375, -0.6108894348144531, 11.364593505859375, 1.4988479614257812, 4.1543426513671875, 0.860992431640625, -13.797222137451172, -0.09252166748046875, -0.175079345703125, 1.3347320556640625, 0.5032386779785156, 2.6414318084716797, 4.728736877441406, -0.052764892578125, 1.4137592315673828, 9.920303344726562, 4.654872894287109, 2.831838607788086, -1.3319473266601562, 4.7532958984375, 9.490520477294922, 2.9630813598632812, 0.6116943359375, 1.3209228515625, 0.3865814208984375, -6.939838409423828, 4.852917671203613, 3.4697952270507812, 4.577836990356445, 8.641036987304688, 4.76654052734375, 3.4975051879882812, -1.2157554626464844, -1.0887298583984375, -4.21563720703125, 21.208404541015625, -3.878498077392578, -0.9477920532226562, 3.4382781982421875, 6.8513946533203125, 2.4217147827148438, 4.779926300048828, 10.92999267578125, 6.1122283935546875, -2.5697593688964844, 1.0854301452636719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000126.npy"}
{"epoch": 0.38181818181818183, "step": 127, "batch_size": 128, "mean": 2.2296650409698486, "std": 5.035611629486084, "min": -8.431510925292969, "p10": -3.8804576873779295, "median": 1.8882827758789062, "p90": 8.981806373596191, "max": 14.333999633789062, "pos_frac": 0.671875, "sample": [2.457357406616211, -2.0251922607421875, 11.01266860961914, 4.1443939208984375, 0.17154693603515625, 1.1418304443359375, 4.539009094238281, 8.515607833862305, -1.216226577758789, 5.78126335144043, -5.4917449951171875, 0.37087249755859375, 4.7920684814453125, 2.9017066955566406, -2.4205703735351562, 10.831268310546875, 4.204357147216797, 2.1825904846191406, -1.5145263671875, 10.120635986328125, -7.967315673828125, -3.8591041564941406, 8.692146301269531, 2.0557174682617188, 8.991941452026367, -2.116029739379883, 7.80859375, 0.78582763671875, 12.606201171875, 5.4795989990234375, 3.9514198303222656, -4.405006408691406, 4.944362640380859, -1.4342613220214844, 3.772674560546875, -2.3794097900390625, 4.574573516845703, 1.5561065673828125, 1.1344032287597656, -0.8056869506835938, 2.1239013671875, -3.9302825927734375, 8.977462768554688, -3.4338912963867188, -2.7151336669921875, 2.2143096923828125, 1.465667724609375, -0.8148841857910156, 5.111030578613281, -3.577587127685547, 3.9095458984375, -4.091327667236328, 6.321186065673828, -2.1779327392578125, -1.0914592742919922, 3.533935546875, -2.6961097717285156, -4.203100204467773, 13.644645690917969, -3.4055099487304688, -2.8157577514648438, 0.6579170227050781, 1.1849021911621094, 0.817718505859375, -3.2766342163085938, 1.8011016845703125, 10.353546142578125, -1.9795684814453125, 3.2308616638183594, 8.016799926757812, 6.720157623291016, 6.7801666259765625, 14.333999633789062, 8.054061889648438, -8.431510925292969, 0.6924514770507812, 0.6362228393554688, 13.899749755859375, -2.67474365234375, 7.91583251953125, -3.620361328125, -0.6051406860351562, -5.282236099243164, 6.592323303222656, -3.5596923828125, 2.3827743530273438, 0.017795562744140625, 5.41668701171875, 3.9513702392578125, -1.4748573303222656, 5.9063720703125, -5.251838684082031, -2.7389984130859375, 8.307022094726562, 11.0672607421875, 7.784183502197266, 0.4718284606933594, 0.15414810180664062, 0.29943084716796875, 7.0038299560546875, 1.5836563110351562, 4.823284149169922, 3.5379905700683594, 7.169532775878906, -5.90814208984375, -3.0069313049316406, 3.0871734619140625, 5.245113372802734, 12.292381286621094, -4.315570831298828, 9.457904815673828, -5.9746856689453125, 4.561622619628906, 0.1708545684814453, 3.2509021759033203, 0.239593505859375, -3.5521316528320312, 8.769264221191406, -1.9388351440429688, 9.762001037597656, 0.21135711669921875, 1.559427261352539, 3.5553054809570312, 6.723579406738281, 2.284189224243164, 1.9754638671875, 2.664398193359375, -4.620880126953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000127.npy"}
{"epoch": 0.38484848484848483, "step": 128, "batch_size": 128, "mean": 2.972583055496216, "std": 5.865764617919922, "min": -13.89168930053711, "p10": -4.2595054626464846, "median": 2.381715774536133, "p90": 11.003567886352537, "max": 19.531814575195312, "pos_frac": 0.671875, "sample": [7.409646987915039, -6.465076446533203, 13.059524536132812, 15.9705810546875, 7.592529296875, 11.97784423828125, -1.7660255432128906, -1.1400642395019531, 8.975143432617188, 0.34616851806640625, 2.9881324768066406, -6.863407135009766, 2.528820037841797, -5.369911193847656, 6.322052001953125, 2.7061386108398438, -1.5092086791992188, 3.1130599975585938, 1.97760009765625, 5.226484298706055, 6.219352722167969, 0.10228729248046875, 12.008636474609375, 1.5300750732421875, -6.101478576660156, 3.9848403930664062, 0.5634956359863281, 9.548637390136719, 1.4855422973632812, 12.943273544311523, 3.828235626220703, 2.773355484008789, -2.4596710205078125, -4.5828704833984375, -1.2216720581054688, -1.0820274353027344, -0.1373291015625, 1.9774131774902344, -2.7632503509521484, 3.6295166015625, 5.201873779296875, 0.9616241455078125, -1.7621307373046875, -3.6463546752929688, 14.47064208984375, 3.17437744140625, 10.595954895019531, 9.698822021484375, 1.0028572082519531, 4.687652587890625, 5.28973388671875, -3.6727752685546875, 1.051788330078125, -1.8612117767333984, -0.5274429321289062, 16.118812561035156, 4.451560974121094, 10.680671691894531, 7.11395263671875, -6.698692321777344, -1.6842193603515625, -0.8165054321289062, 7.142852783203125, -4.8323211669921875, -4.250160217285156, -2.36676025390625, 6.576467514038086, 6.551174163818359, 9.220413208007812, 6.5917510986328125, -2.8938446044921875, 5.970771789550781, -5.151760101318359, -0.5780487060546875, -13.89168930053711, 3.790924072265625, 1.4548912048339844, 8.7880859375, -1.9574127197265625, 9.253059387207031, 1.1992874145507812, 8.158830642700195, 3.0887813568115234, 3.4034652709960938, 2.3336868286132812, 7.765277862548828, 3.4070587158203125, 11.75699234008789, 2.5317153930664062, 13.80645751953125, 8.107162475585938, 5.5465545654296875, -1.3341827392578125, -2.4105377197265625, 9.75531005859375, -4.609722137451172, 1.350198745727539, -1.2644367218017578, -1.0846748352050781, 3.1162776947021484, 0.8515548706054688, 9.68072509765625, -0.955322265625, 19.531814575195312, -6.8143157958984375, -1.9657745361328125, 2.365680694580078, -1.158843994140625, 4.141559600830078, 2.2362747192382812, -4.28131103515625, 3.918243408203125, 10.097129821777344, 1.4171943664550781, 14.09185791015625, 0.1180572509765625, 7.500844955444336, 5.010524749755859, 12.135345458984375, 7.9400634765625, 1.762908935546875, -3.7919235229492188, 0.15173721313476562, 13.700790405273438, -1.1810665130615234, 2.3977508544921875, -6.5333251953125, 0.9231796264648438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000128.npy"}
{"epoch": 0.3878787878787879, "step": 129, "batch_size": 128, "mean": 3.5720789432525635, "std": 5.7973456382751465, "min": -9.774581909179688, "p10": -2.3026388168334955, "median": 2.8769683837890625, "p90": 10.717543029785157, "max": 22.620880126953125, "pos_frac": 0.765625, "sample": [-0.1007986068725586, 5.583793640136719, -3.42279052734375, 22.620880126953125, 15.425369262695312, 6.953754425048828, 0.49817657470703125, 8.985027313232422, 2.1309814453125, 2.9603195190429688, -9.774581909179688, 0.731475830078125, -0.23282814025878906, 1.5423469543457031, -7.747467041015625, 2.4587764739990234, 6.2498016357421875, 0.9169464111328125, 4.86700439453125, 1.6026763916015625, -0.055267333984375, 8.500213623046875, 14.62429428100586, 1.3928909301757812, 9.688682556152344, -1.1732406616210938, 4.981815338134766, 0.9087677001953125, 6.912635803222656, 0.7801971435546875, 4.120880126953125, 16.268218994140625, 0.9234046936035156, 6.164861679077148, 6.8492584228515625, 5.138603210449219, 6.084568023681641, -4.032688140869141, 4.128631591796875, 10.087417602539062, 1.2884063720703125, 2.0667572021484375, 2.44256591796875, 7.3972015380859375, 10.564102172851562, -1.735015869140625, 3.318115234375, 12.249580383300781, -4.758308410644531, 18.45965576171875, 2.518535614013672, 1.9469375610351562, -2.7646942138671875, -1.6996955871582031, 3.4887657165527344, 2.19647216796875, -0.6481552124023438, -6.964622497558594, 12.306793212890625, 2.5464324951171875, 2.749317169189453, 0.41217041015625, 7.54901123046875, 3.1514739990234375, -0.745849609375, 6.259635925292969, -2.1093711853027344, -2.1470203399658203, 6.7325439453125, 8.368585586547852, 11.297607421875, -2.117584228515625, -1.3158645629882812, -2.091188430786133, -0.8916587829589844, -2.6657485961914062, 6.197029113769531, -7.4487762451171875, -9.716102600097656, 8.943321228027344, 3.2510414123535156, 5.25593376159668, 1.2673120498657227, -5.8742218017578125, 0.7448787689208984, 2.3124847412109375, 10.881256103515625, 18.134689331054688, 5.644386291503906, 6.613908767700195, 0.2553558349609375, 4.427894592285156, 2.7936172485351562, -1.6791496276855469, -1.2922706604003906, 3.571878433227539, 7.0410614013671875, 0.8419284820556641, 3.1168670654296875, 0.5747756958007812, -7.504150390625, 7.576545715332031, 4.709617614746094, 10.695327758789062, 3.9740753173828125, 3.5818862915039062, 10.605644226074219, 3.4991798400878906, 2.1897048950195312, 3.0676956176757812, 8.177017211914062, 0.7144870758056641, 1.1848793029785156, 0.04766082763671875, 9.368751525878906, 9.098281860351562, 8.415252685546875, 5.487213134765625, 16.37677001953125, 2.196321487426758, 10.769378662109375, -8.703109741210938, 2.3977317810058594, -1.1315898895263672, 6.775173187255859, 1.4710121154785156, 12.153535842895508, 2.973794937133789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000129.npy"}
{"epoch": 0.39090909090909093, "step": 130, "batch_size": 128, "mean": 4.548861503601074, "std": 6.370872974395752, "min": -11.97210693359375, "p10": -4.007698059082031, "median": 5.009308815002441, "p90": 12.961298370361327, "max": 20.992996215820312, "pos_frac": 0.734375, "sample": [-3.9453125, -11.97210693359375, -6.2938232421875, 5.929277420043945, 7.9055023193359375, 11.591426849365234, 4.347251892089844, 20.992996215820312, -6.881864547729492, 1.6227149963378906, 17.427703857421875, 12.865921020507812, -1.049652099609375, 7.099342346191406, 11.132516860961914, 7.498558044433594, -6.045343399047852, 11.608749389648438, 0.913330078125, 7.6160125732421875, 4.37579345703125, 7.5503997802734375, 6.8793792724609375, 15.104995727539062, -2.0841293334960938, 1.0019378662109375, 11.269500732421875, 13.204261779785156, 12.1112060546875, 3.50860595703125, 8.340789794921875, 8.834129333496094, 12.265892028808594, -7.4915771484375, 12.008270263671875, -1.8814620971679688, 5.907505035400391, 16.467529296875, 7.437519073486328, -2.029876708984375, 5.250053405761719, 3.302032470703125, -0.8218154907226562, 3.0208969116210938, 5.702671051025391, 2.422740936279297, 5.219646453857422, 13.183845520019531, 13.202880859375, 10.355033874511719, -1.4515724182128906, 15.719955444335938, 3.652984619140625, 2.5426788330078125, -4.29155158996582, -2.3851394653320312, 5.685028076171875, 4.521697998046875, 3.5639381408691406, 8.459053039550781, 11.794219970703125, 9.955360412597656, 5.281406402587891, 5.294748306274414, 8.050512313842773, -6.9483489990234375, 6.0023193359375, -2.0185394287109375, -0.6401100158691406, 15.575149536132812, 3.4846343994140625, 6.5271148681640625, 8.090507507324219, 3.513002395629883, 8.316732406616211, 15.70648193359375, 14.472091674804688, -1.04693603515625, 9.86092758178711, 9.075508117675781, 0.6123199462890625, 3.7923965454101562, -0.27828025817871094, 8.99346923828125, 0.0781707763671875, -4.384735107421875, -7.690793991088867, 3.3756942749023438, 7.603425979614258, 5.6982421875, -4.659809112548828, 5.420581817626953, 2.55419921875, 3.5816192626953125, 5.01548957824707, 8.546417236328125, 11.378684997558594, -3.0035552978515625, 13.83111572265625, 4.976579666137695, 5.9085235595703125, 1.5376739501953125, 6.9180450439453125, -0.09632110595703125, -3.3390884399414062, -0.6460170745849609, 6.388008117675781, 8.334819793701172, -3.9546661376953125, 3.5763092041015625, 6.077630996704102, 8.51190185546875, 1.3707733154296875, 0.956939697265625, -2.2908477783203125, -6.221157073974609, 5.0031280517578125, 0.11232185363769531, 12.79519271850586, -1.3776168823242188, 4.9578857421875, 4.882568359375, 14.387283325195312, -7.440608978271484, 6.253986358642578, -3.311492919921875, -4.131439208984375, -0.6964263916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000130.npy"}
{"epoch": 0.3939393939393939, "step": 131, "batch_size": 128, "mean": 2.327828884124756, "std": 5.726266860961914, "min": -11.770500183105469, "p10": -4.958610534667969, "median": 2.3593406677246094, "p90": 9.071018981933593, "max": 20.937103271484375, "pos_frac": 0.671875, "sample": [1.3047447204589844, 0.01690673828125, -0.2364215850830078, -5.55340576171875, -4.8440093994140625, 3.555257797241211, -1.990133285522461, 6.972583770751953, -1.4629364013671875, 6.6150360107421875, 0.9009571075439453, 8.181983947753906, 6.29486083984375, 4.866920471191406, -6.032646179199219, -9.8299560546875, 9.141098022460938, 10.64013671875, 1.647195816040039, 1.451559066772461, 11.352642059326172, -8.47509765625, -11.770500183105469, -0.10189247131347656, -4.460582733154297, 1.4141769409179688, 2.2745361328125, 0.16449737548828125, 13.264755249023438, 8.745107650756836, 4.358551025390625, -7.18719482421875, 6.668540954589844, -1.2884330749511719, -8.724895477294922, -8.909759521484375, -5.22601318359375, 10.119941711425781, 5.190677642822266, -4.4545745849609375, 12.321533203125, 3.3294219970703125, 14.032196044921875, 2.4085693359375, -6.9282989501953125, 2.9261531829833984, 7.913656234741211, 0.6714935302734375, 4.627899169921875, -2.7095489501953125, 2.6998329162597656, 0.449310302734375, -0.1351490020751953, 1.3340682983398438, -1.4257736206054688, -10.6746826171875, 3.193450927734375, 2.699798583984375, -1.0585708618164062, -2.8777313232421875, 8.070579528808594, 5.143772125244141, 10.253036499023438, 0.8459606170654297, -2.9080352783203125, 7.647468566894531, 5.5706939697265625, 0.1798229217529297, 7.5558319091796875, 4.33049201965332, -0.7079925537109375, 1.5228042602539062, -1.3456478118896484, 3.1967086791992188, 9.040985107421875, 0.6355743408203125, -1.0580520629882812, -2.01739501953125, -1.9600067138671875, -2.280426025390625, 8.0755615234375, 7.324607849121094, -4.735504150390625, 6.313499450683594, 3.136289596557617, 3.086029052734375, 15.40765380859375, 4.305938720703125, -3.9012680053710938, -0.6347312927246094, 2.5115585327148438, 1.7077407836914062, 6.856529235839844, 2.3101119995117188, 5.701726913452148, -0.30109405517578125, 4.290874481201172, 5.624664306640625, 0.7814559936523438, 9.039112091064453, 0.9499588012695312, 0.7050132751464844, 8.779800415039062, -4.360250473022461, 4.9458770751953125, 11.884138107299805, 5.42413330078125, 7.674934387207031, 4.796875, 1.6232070922851562, 3.2635040283203125, 2.7475662231445312, -0.1933116912841797, 1.732025146484375, -4.192089080810547, 6.43182373046875, 20.937103271484375, 4.4431915283203125, -0.6554031372070312, 2.534679412841797, -5.476470947265625, 11.121246337890625, 5.441436767578125, 11.341400146484375, 3.342632293701172, 6.81207275390625, -4.7390289306640625, -5.362762451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000131.npy"}
{"epoch": 0.396969696969697, "step": 132, "batch_size": 128, "mean": 3.4777467250823975, "std": 5.724380016326904, "min": -8.102493286132812, "p10": -3.0475465774536126, "median": 3.0459728240966797, "p90": 11.588770294189453, "max": 21.975357055664062, "pos_frac": 0.6953125, "sample": [0.7852554321289062, 4.345462799072266, -1.7831878662109375, 0.6981887817382812, -2.1998634338378906, 1.7029914855957031, 2.4101638793945312, 2.7751693725585938, 3.6154823303222656, -2.6078643798828125, 0.18505859375, -6.340911865234375, 11.558349609375, 5.051898956298828, 4.304618835449219, 0.6806640625, -1.5772552490234375, 1.7124481201171875, -3.916423797607422, -2.0045547485351562, -2.18511962890625, 10.67916488647461, 5.048435211181641, 7.348060607910156, 5.879081726074219, 4.076202392578125, -0.1394500732421875, 15.192115783691406, 4.712890625, -1.0499649047851562, 14.1929931640625, 2.2909679412841797, 5.412509918212891, 11.659751892089844, 14.14697265625, 4.290689468383789, -4.0730133056640625, 7.632537841796875, -2.5013656616210938, -6.742549896240234, -2.4175148010253906, 1.8811454772949219, 14.609146118164062, 2.593395233154297, 11.746467590332031, 5.7232666015625, 11.074945449829102, 5.750938415527344, 9.148300170898438, -1.9115791320800781, 7.78228759765625, 8.974838256835938, -0.35523223876953125, 4.241676330566406, 10.89495849609375, 9.978769302368164, 8.681070327758789, 5.438209533691406, 3.5674476623535156, -3.935457229614258, 4.9658355712890625, -3.916025161743164, 3.2428951263427734, 3.104419708251953, 12.2520751953125, -8.102493286132812, 3.7884597778320312, -0.4200096130371094, 1.4728374481201172, -0.5188522338867188, 6.149139404296875, 0.3607635498046875, 6.253597259521484, 8.57281494140625, 1.931182861328125, 9.775245666503906, 3.114208221435547, 4.467660903930664, -5.942474365234375, 9.097320556640625, 14.748725891113281, 1.7450103759765625, 0.8571243286132812, -0.3673553466796875, 12.647201538085938, -7.041473388671875, 1.585113525390625, 6.247222900390625, -2.4179916381835938, 0.10564422607421875, -3.6256866455078125, 4.392345428466797, 13.436042785644531, 6.763542175292969, 2.9875259399414062, -0.9919033050537109, 3.4958553314208984, 1.9665298461914062, -2.799772262573242, -0.41657257080078125, -4.788108825683594, 9.07821273803711, -0.9385528564453125, -0.3595733642578125, 1.045318603515625, 13.275409698486328, -2.387859344482422, 6.937164306640625, -1.6238861083984375, 1.381082534790039, 15.417404174804688, -2.6599197387695312, 0.12854766845703125, -6.794319152832031, -0.234039306640625, 7.6754150390625, 9.050682067871094, 2.69097900390625, -5.5006866455078125, 4.8139190673828125, 0.6487655639648438, 5.7799072265625, 9.429931640625, -1.7718505859375, 7.3035125732421875, 21.975357055664062, 8.652137756347656, 5.227210998535156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000132.npy"}
{"epoch": 0.4, "step": 133, "batch_size": 128, "mean": 3.9036622047424316, "std": 6.177172660827637, "min": -13.658447265625, "p10": -3.1803401947021483, "median": 3.975632667541504, "p90": 10.7994384765625, "max": 25.9222412109375, "pos_frac": 0.7578125, "sample": [-2.2662925720214844, 0.9067363739013672, 0.18304061889648438, 1.840179443359375, 4.769554138183594, -3.0957489013671875, 1.5475444793701172, 11.99053955078125, 4.181467056274414, 5.555807113647461, 5.16645622253418, -8.648092269897461, 1.4800529479980469, 15.514923095703125, 5.859489440917969, 2.8647117614746094, -3.5541152954101562, 9.502347946166992, 4.522697448730469, 15.1251220703125, 1.926309585571289, -9.743751525878906, -2.3273277282714844, -1.84857177734375, 3.3630752563476562, -7.976469039916992, 8.890472412109375, -1.1578140258789062, -0.7217674255371094, 14.356468200683594, 15.393867492675781, 9.470563888549805, -7.888824462890625, 11.059932708740234, 0.8914604187011719, 7.867485046386719, 7.652046203613281, 8.20059585571289, -1.151458740234375, 1.4596939086914062, -0.6893978118896484, 2.492046356201172, 10.964836120605469, 4.917194366455078, 4.647035598754883, -5.074834823608398, 13.385490417480469, 9.756454467773438, 5.058349609375, 4.905817031860352, 0.7039813995361328, 11.066764831542969, 10.728553771972656, 7.6730499267578125, 9.710922241210938, 8.700935363769531, 9.351722717285156, -7.632545471191406, 3.0581912994384766, -1.3285369873046875, 9.708892822265625, 0.043853759765625, 10.048248291015625, -1.9497222900390625, 1.0710906982421875, 10.529003143310547, 7.912147521972656, 3.7697982788085938, 3.1453018188476562, -0.6768550872802734, 0.9787750244140625, 1.3084793090820312, -2.7283554077148438, 25.9222412109375, 2.5073471069335938, 3.092620849609375, 11.249107360839844, -0.49896240234375, 3.3733787536621094, 7.809774398803711, 6.334930419921875, -3.5068588256835938, -0.13400840759277344, -5.024635314941406, 6.157783508300781, 20.354568481445312, 8.578632354736328, -2.065174102783203, 4.907440185546875, 1.3049240112304688, -8.145660400390625, 2.499906539916992, 1.743133544921875, 7.174713134765625, 8.015830993652344, 4.873435974121094, 4.626110076904297, -3.3777198791503906, 4.505645751953125, 7.7321014404296875, 0.6706771850585938, 3.1660823822021484, 0.10229682922363281, 10.23919677734375, 7.98585319519043, 5.5962066650390625, -7.491832733154297, 4.90679931640625, 7.691495895385742, 9.244544982910156, 2.9584197998046875, 0.26314544677734375, 4.5605010986328125, -0.3012847900390625, 9.1981201171875, 0.2951011657714844, -13.658447265625, 6.898612976074219, 7.663730621337891, 5.3416290283203125, 15.779632568359375, -2.825237274169922, 4.7457122802734375, 3.6301498413085938, 8.333677291870117, 3.1677093505859375, 6.199260711669922, -1.4226837158203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000133.npy"}
{"epoch": 0.403030303030303, "step": 134, "batch_size": 128, "mean": 2.7626774311065674, "std": 5.768915176391602, "min": -11.414676666259766, "p10": -4.032136344909668, "median": 2.1806020736694336, "p90": 9.919091796874998, "max": 17.477577209472656, "pos_frac": 0.671875, "sample": [0.1873016357421875, 2.704425811767578, 0.10056686401367188, -8.745044708251953, 7.7975311279296875, 0.2654266357421875, 6.1879425048828125, 9.723617553710938, 6.31658935546875, -3.592845916748047, 0.662445068359375, -5.0775909423828125, -2.612285614013672, -4.876529693603516, -2.2216567993164062, 5.164493560791016, 7.598529815673828, 4.746604919433594, -4.060386657714844, 4.245521545410156, 16.7442626953125, -8.273406982421875, -0.317138671875, 15.423568725585938, -10.122928619384766, 9.499961853027344, 3.5562973022460938, -6.6770172119140625, -1.6932754516601562, 0.012739181518554688, 6.201133728027344, 0.2727928161621094, 6.811676025390625, 0.7062454223632812, -0.05260467529296875, 7.0428466796875, 6.173391342163086, 17.477577209472656, 2.143789291381836, -3.064605712890625, 13.770416259765625, 10.781373977661133, -1.7808265686035156, -4.020029067993164, 6.1538543701171875, 1.5155601501464844, 3.8690109252929688, 1.5931129455566406, 8.682357788085938, -0.44347190856933594, 13.167388916015625, 2.4925079345703125, 8.385108947753906, 3.189472198486328, 7.4759521484375, 6.619834899902344, 4.33903694152832, 2.1082801818847656, 4.437385559082031, -6.7698516845703125, 2.949880599975586, 6.1470184326171875, 6.410652160644531, 1.307088851928711, -0.313690185546875, 10.375198364257812, 2.367321014404297, -3.015899658203125, -1.12579345703125, 6.4592437744140625, -4.712837219238281, -3.0040512084960938, 3.5515823364257812, 1.5440521240234375, -0.024248123168945312, 8.348182678222656, 6.262519836425781, 11.082275390625, -0.8800716400146484, 3.9955291748046875, -0.38021087646484375, 0.4799842834472656, -3.38262939453125, 8.447158813476562, -2.778900146484375, -11.414676666259766, 0.764801025390625, 3.688322067260742, 3.7675399780273438, -3.8671798706054688, -8.403274536132812, -2.1231613159179688, 6.6307373046875, 1.6276988983154297, 8.940994262695312, 8.590511322021484, -1.8209991455078125, 9.364036560058594, 12.301101684570312, 2.82354736328125, -0.4428539276123047, -1.2817306518554688, 2.2174148559570312, 1.7740345001220703, 1.6814117431640625, -4.679962158203125, -1.1035232543945312, 0.88140869140625, 4.403968811035156, -3.7184181213378906, 9.18060302734375, -2.3405418395996094, 10.890342712402344, 0.2157917022705078, 8.262931823730469, 16.9312744140625, 7.914405822753906, 8.614410400390625, 10.398109436035156, -2.7693328857421875, -4.7281951904296875, 4.559173583984375, 4.9520263671875, 1.6726398468017578, 0.1947650909423828, 11.593734741210938, 5.780818939208984, -0.3577919006347656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000134.npy"}
{"epoch": 0.40606060606060607, "step": 135, "batch_size": 128, "mean": 3.3117198944091797, "std": 6.116799354553223, "min": -11.437744140625, "p10": -3.8353591918945313, "median": 3.195218086242676, "p90": 10.221344947814941, "max": 24.6243896484375, "pos_frac": 0.765625, "sample": [0.28692626953125, -9.121959686279297, 5.858245849609375, 9.507080078125, 8.465118408203125, -1.9135284423828125, -7.37078857421875, 15.626340866088867, 2.40374755859375, 4.2289276123046875, 11.522293090820312, -9.815906524658203, -1.1942062377929688, 5.810272216796875, 8.539932250976562, 2.431854248046875, 0.11539649963378906, 3.681365966796875, -5.68768310546875, -0.06192588806152344, 3.6628952026367188, 2.6020355224609375, -0.1578826904296875, 9.290740966796875, -3.6032867431640625, 3.2303085327148438, 7.943023681640625, -1.7659626007080078, 20.07763671875, -0.4616851806640625, 0.736572265625, -11.018623352050781, 2.7476272583007812, -2.2605056762695312, -8.267616271972656, 4.891361236572266, 3.907245635986328, 1.650970458984375, 9.068187713623047, 5.967216491699219, -4.00714111328125, 12.037334442138672, 2.277202606201172, 24.6243896484375, 2.354022979736328, 10.495229721069336, 2.7846412658691406, 2.835489273071289, 10.169729232788086, 4.055488586425781, 1.1260757446289062, 5.962272644042969, 9.973121643066406, -2.2559146881103516, 4.997697830200195, 2.9220504760742188, 2.389862060546875, 7.753654479980469, 4.030372619628906, 4.649955749511719, 5.255901336669922, 7.6236572265625, 4.892698287963867, 0.7952384948730469, 7.55976676940918, 5.867189407348633, -3.284639358520508, 18.11029052734375, 1.8573360443115234, 5.4637451171875, 6.474023818969727, 0.21637725830078125, 8.348312377929688, 4.729427337646484, -0.34915924072265625, 4.297382354736328, -0.33416748046875, 1.5379638671875, 5.07769775390625, 3.8787612915039062, 2.9065628051757812, 0.9505386352539062, -2.7948455810546875, 5.263278961181641, 5.257499694824219, -3.8243408203125, 2.4272918701171875, -11.155731201171875, 4.864036560058594, 1.1763420104980469, 6.487518310546875, 0.5261096954345703, -10.438751220703125, 9.245307922363281, 0.39160919189453125, 0.184722900390625, 13.209259033203125, -3.8610687255859375, 13.730728149414062, 4.899810791015625, 3.6724319458007812, 5.732566833496094, 3.4183692932128906, -3.6170272827148438, 10.167831420898438, 5.210193634033203, 3.160127639770508, -11.437744140625, 10.341781616210938, 1.34930419921875, 4.5686798095703125, 1.139678955078125, 13.645248413085938, 11.619804382324219, 9.201248168945312, 0.7610225677490234, 6.09370231628418, 7.862190246582031, -4.080101013183594, -1.7763290405273438, -2.5358734130859375, -5.716194152832031, 11.603378295898438, 0.4692344665527344, 1.80975341796875, 0.5513648986816406, 2.48822021484375, 10.006301879882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000135.npy"}
{"epoch": 0.4090909090909091, "step": 136, "batch_size": 128, "mean": 3.3401005268096924, "std": 6.356526851654053, "min": -8.32171630859375, "p10": -4.274361038208007, "median": 2.7646217346191406, "p90": 10.89661407470703, "max": 31.54364013671875, "pos_frac": 0.6640625, "sample": [10.077995300292969, 6.276824951171875, 1.0837860107421875, -5.187889099121094, 9.491020202636719, 8.79294204711914, 3.222249984741211, 2.785930633544922, 3.8919906616210938, -0.8691253662109375, 2.6138229370117188, 16.615982055664062, -4.176597595214844, 4.970096588134766, 7.421154022216797, -5.128166198730469, -4.9248199462890625, -1.5118522644042969, 0.99151611328125, 7.1043243408203125, 0.5630455017089844, -1.9374771118164062, 9.772834777832031, 11.45033073425293, -7.026454925537109, -6.2713623046875, 3.7683792114257812, 1.1589508056640625, 6.141704559326172, 8.812810897827148, 3.4568557739257812, -3.193084716796875, 9.570369720458984, -1.591400146484375, 16.004608154296875, -2.1128177642822266, 19.467514038085938, -1.481515884399414, 11.430587768554688, 10.1246337890625, -1.234283447265625, 1.268890380859375, -2.606964111328125, 3.3846168518066406, -1.61651611328125, -1.4171028137207031, 0.2582416534423828, 3.1877174377441406, -0.0988311767578125, -6.2575531005859375, 31.54364013671875, -0.6144485473632812, -2.4688568115234375, 0.36688232421875, -2.49468994140625, -4.693000793457031, -8.32171630859375, 9.781150817871094, 1.36590576171875, 0.9286689758300781, -4.068326950073242, 6.395965576171875, 16.592973709106445, 3.82965087890625, 1.5743541717529297, -6.991546630859375, -5.821174621582031, 6.189569473266602, 3.4830894470214844, 11.352699279785156, 6.73876953125, 15.823463439941406, 1.3109130859375, -1.0836334228515625, 7.404441833496094, 9.711071014404297, 6.762331008911133, 3.0027637481689453, 7.109907150268555, 2.1462554931640625, 5.399932861328125, 0.009881973266601562, 5.64569091796875, 1.4219703674316406, -1.4774303436279297, 2.3918380737304688, 4.227329254150391, -4.502475738525391, 7.968875885009766, 7.4953460693359375, 5.6710968017578125, 7.564914703369141, 0.8697891235351562, -3.252349853515625, 1.0579833984375, 6.92718505859375, 15.6124267578125, 7.9191436767578125, 6.458404541015625, 6.164543151855469, -1.7057876586914062, -1.4750518798828125, 2.7433128356933594, 1.5192852020263672, 10.701148986816406, -0.7098617553710938, 1.4608612060546875, -6.892669677734375, 4.812080383300781, 11.78091049194336, 3.9548492431640625, 15.850418090820312, 6.6507110595703125, 12.605995178222656, 5.217475891113281, -0.37946319580078125, 3.21966552734375, -0.23480796813964844, -6.7682037353515625, -2.082672119140625, -0.686004638671875, 7.3868865966796875, -1.7280197143554688, 2.976408004760742, -1.9829082489013672, -0.589752197265625, 10.09170150756836, 4.847284317016602], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000136.npy"}
{"epoch": 0.4121212121212121, "step": 137, "batch_size": 128, "mean": 3.5412509441375732, "std": 6.311209201812744, "min": -14.166397094726562, "p10": -4.492698860168457, "median": 3.3249998092651367, "p90": 11.0029296875, "max": 23.5577392578125, "pos_frac": 0.7109375, "sample": [0.13615036010742188, 4.843475341796875, 6.2534942626953125, -4.040317535400391, 11.284881591796875, 1.3293533325195312, -0.36553955078125, 10.964088439941406, 7.036285400390625, 0.9415054321289062, 9.809797286987305, -5.0539398193359375, 10.75311279296875, 3.858417510986328, 2.9090003967285156, -1.751495361328125, 11.019432067871094, -9.102912902832031, 2.04119873046875, 6.406494140625, 8.284645080566406, 6.226215362548828, -0.066375732421875, 4.0184173583984375, 8.409820556640625, 2.5738906860351562, -0.21701622009277344, 1.8496322631835938, 2.1918888092041016, -1.6725082397460938, 6.245464324951172, 10.231201171875, -5.0428924560546875, 1.1790237426757812, 3.5670928955078125, 0.75244140625, 14.285789489746094, -1.2870044708251953, 8.733993530273438, 8.744426727294922, -11.9202880859375, 2.57025146484375, 0.42606353759765625, 1.8075752258300781, -2.5535106658935547, 2.9508132934570312, -0.1069793701171875, 23.5577392578125, -8.245582580566406, 5.533905029296875, 6.387901306152344, -8.715065002441406, 1.9062995910644531, 21.369903564453125, 17.26241111755371, 6.285531997680664, 2.4923667907714844, 8.438335418701172, 2.5025463104248047, 6.836334228515625, 8.127449035644531, 13.665021896362305, -4.233039855957031, 6.838153839111328, 8.309806823730469, -2.3684463500976562, -0.9305706024169922, -4.43768310546875, -1.933074951171875, -6.5072479248046875, 14.494056701660156, 7.692695617675781, 13.754066467285156, -4.980968475341797, 3.284820556640625, 2.586465835571289, 12.097869873046875, 6.469768524169922, 5.894317626953125, 8.802989959716797, 5.1970062255859375, 2.18499755859375, -0.5770950317382812, 7.681262969970703, -4.742828369140625, -2.482532501220703, 8.282325744628906, 4.261314392089844, 2.8411712646484375, -0.9123001098632812, 12.707405090332031, 8.60152816772461, 2.5447998046875, 3.706939697265625, 7.512908935546875, -1.9412040710449219, 4.712226867675781, -4.621068954467773, 5.77020263671875, 5.947746276855469, -7.1827850341796875, -8.311065673828125, 4.768735885620117, 10.995857238769531, 3.4520416259765625, 2.3235950469970703, 5.09912109375, 14.941947937011719, 3.3651790618896484, -0.141754150390625, -1.1610107421875, 2.539226531982422, -2.9375343322753906, 4.9699554443359375, 0.5270195007324219, 5.1134033203125, 2.7418270111083984, 7.395059585571289, 0.431304931640625, -14.166397094726562, -2.0244064331054688, 6.704465866088867, 14.152740478515625, 8.85202407836914, -1.5247268676757812, -0.90826416015625, 5.7678680419921875, 7.128265380859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000137.npy"}
{"epoch": 0.41515151515151516, "step": 138, "batch_size": 128, "mean": 3.9939985275268555, "std": 5.452207565307617, "min": -8.319572448730469, "p10": -2.7328729629516597, "median": 3.8720102310180664, "p90": 11.633972930908202, "max": 16.679384231567383, "pos_frac": 0.765625, "sample": [2.5535736083984375, 5.020832061767578, 1.7390594482421875, 5.668066024780273, 8.842323303222656, 1.8300228118896484, 4.976249694824219, -2.6404190063476562, 6.6143646240234375, 5.607723236083984, 15.041088104248047, -5.796531677246094, -6.5882415771484375, 16.400901794433594, -1.486175537109375, 5.5831146240234375, -1.329986572265625, 6.4422454833984375, 14.933219909667969, 10.099754333496094, 1.000070571899414, 3.620767593383789, 3.8664398193359375, -7.653602600097656, -1.935028076171875, 5.330120086669922, 3.012054443359375, -4.4194793701171875, -2.2623291015625, 10.368415832519531, 8.923486709594727, 7.378395080566406, 12.68216323852539, 5.7611083984375, 0.9985198974609375, 6.0139923095703125, -0.23681640625, 13.269947052001953, 0.4670867919921875, 0.6177959442138672, 5.0146484375, 1.5028533935546875, -0.6507759094238281, -2.5745468139648438, 2.3731613159179688, 1.1986618041992188, 9.946929931640625, 15.194374084472656, 1.9197463989257812, 2.7879486083984375, 3.9838180541992188, 0.23268699645996094, 2.179868698120117, 10.4273681640625, 5.680793762207031, 6.189506530761719, -1.0911788940429688, 8.99654769897461, -0.04909324645996094, 10.630752563476562, 7.9651336669921875, 4.434600830078125, -0.7425498962402344, 8.936637878417969, 3.8775806427001953, 0.17618179321289062, 1.088827133178711, 7.3802337646484375, 0.5973243713378906, 4.670753479003906, 16.679384231567383, -4.936275482177734, 4.579414367675781, 1.6572761535644531, 8.85272216796875, 1.1262454986572266, 1.2092399597167969, 14.1298828125, 6.236183166503906, 11.488443374633789, -3.7809677124023438, -2.948598861694336, 12.179656982421875, -5.53570556640625, 11.688377380371094, 0.9885177612304688, 11.697845458984375, 5.635105133056641, 11.927993774414062, 9.147354125976562, -2.61419677734375, 3.708404541015625, 1.0429840087890625, 3.1767425537109375, 3.534637451171875, 7.377845764160156, 5.96934700012207, 3.744762420654297, 4.8598480224609375, -1.3104896545410156, 1.8410072326660156, 1.3080520629882812, 7.343109130859375, 6.7840576171875, -3.6758651733398438, -3.46185302734375, 7.9956817626953125, 4.8996429443359375, 9.959266662597656, 11.61065673828125, 1.6025161743164062, -0.76873779296875, -8.319572448730469, 3.9903411865234375, 8.841957092285156, -0.147979736328125, -0.32093048095703125, -3.2473983764648438, 4.21528434753418, 9.854934692382812, 11.75631332397461, 1.411651611328125, -6.481193542480469, 9.323081970214844, 0.9702777862548828, 7.359458923339844, 4.611410140991211, -2.128448486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000138.npy"}
{"epoch": 0.41818181818181815, "step": 139, "batch_size": 128, "mean": 2.436525821685791, "std": 6.173001289367676, "min": -12.018348693847656, "p10": -5.674020004272461, "median": 2.55950927734375, "p90": 10.516835021972657, "max": 17.768203735351562, "pos_frac": 0.6796875, "sample": [10.62490463256836, -9.091110229492188, 6.672334671020508, 1.0013923645019531, 3.7930755615234375, -1.4043693542480469, 4.9443359375, -2.5285110473632812, 2.8019046783447266, -2.0380706787109375, -1.7147102355957031, 1.9742240905761719, 3.443147659301758, 8.968757629394531, 0.5525550842285156, 6.089385986328125, -6.252937316894531, 14.096336364746094, 1.7799015045166016, 2.469268798828125, -4.148872375488281, -8.966041564941406, -4.968070983886719, 0.6041069030761719, -9.673675537109375, 17.768203735351562, 5.9315948486328125, -3.3353195190429688, 15.452362060546875, 10.482292175292969, -0.19715499877929688, 0.5225429534912109, 6.831640243530273, 7.6157989501953125, -9.380790710449219, -2.7916412353515625, 10.224288940429688, 5.694175720214844, -4.508388519287109, 3.7076797485351562, -0.8790626525878906, 15.846832275390625, 0.17737579345703125, -7.6451873779296875, -6.9725341796875, 2.79388427734375, -1.4580326080322266, 4.636665344238281, -0.4578094482421875, -2.6954116821289062, 9.467704772949219, 3.2677383422851562, 6.036720275878906, 3.8900699615478516, 3.3342742919921875, 0.10317230224609375, 7.495216369628906, -5.635532379150391, 1.91119384765625, 15.186351776123047, -7.259727478027344, 0.38289642333984375, -0.447265625, 4.582586288452148, -2.3869247436523438, -3.0362396240234375, 4.642784118652344, 1.25982666015625, 4.6127777099609375, -1.1254348754882812, -12.018348693847656, -0.11655044555664062, -0.097869873046875, 0.05442619323730469, 1.75933837890625, 10.508224487304688, 3.601827621459961, 10.238273620605469, 3.702831268310547, 3.1252365112304688, -4.6369171142578125, 0.44500732421875, 5.64166259765625, -0.1636505126953125, 4.035467147827148, 2.649749755859375, 6.853948593139648, 3.771636962890625, -11.236503601074219, 1.9116668701171875, 2.7819442749023438, 6.567840576171875, -10.2967529296875, 0.3988800048828125, -2.1848602294921875, 7.8140716552734375, 13.972305297851562, 13.943016052246094, 9.154075622558594, -5.763824462890625, 10.837173461914062, 12.808143615722656, 7.579429626464844, -5.041988372802734, -4.4243927001953125, 10.53692626953125, -5.810089111328125, 2.663421630859375, 0.005157470703125, 6.3251190185546875, 3.071746826171875, 6.346794128417969, 10.623832702636719, 16.473464965820312, -0.1401214599609375, 8.083511352539062, 3.0456619262695312, 6.170448303222656, 3.0469741821289062, 0.6672096252441406, 0.541259765625, 0.0733184814453125, 7.514768600463867, -0.1289520263671875, 5.837718963623047, 0.5286521911621094, 1.8883552551269531, 3.6601524353027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000139.npy"}
{"epoch": 0.4212121212121212, "step": 140, "batch_size": 128, "mean": 4.769693374633789, "std": 5.55173921585083, "min": -8.508262634277344, "p10": -2.012326049804687, "median": 4.618799209594727, "p90": 11.565956306457519, "max": 22.934738159179688, "pos_frac": 0.8359375, "sample": [-3.137350082397461, 6.578983306884766, 11.416179656982422, 3.8157119750976562, -8.173112869262695, 8.072052001953125, -0.3107147216796875, -8.508262634277344, 10.529922485351562, 7.4799041748046875, -1.7954368591308594, -4.04888916015625, 0.7170448303222656, 5.108856201171875, 5.044830322265625, 3.3920631408691406, 12.108932495117188, 5.502861022949219, -1.8109512329101562, 8.432628631591797, 9.03570556640625, 6.2218017578125, 3.4521713256835938, 11.120046615600586, 7.5614013671875, 5.811248779296875, 6.198692321777344, -4.8918609619140625, 4.280366897583008, 10.367965698242188, 2.052875518798828, 3.817096710205078, -6.500091552734375, 4.600307464599609, 5.3699798583984375, 8.42877197265625, 0.08097457885742188, 11.819969177246094, 7.525032043457031, 15.878761291503906, 5.044219970703125, -3.3692245483398438, 5.380514144897461, 1.6560478210449219, 1.5474853515625, 9.237098693847656, 0.06254959106445312, 10.900079727172852, 8.714950561523438, 6.111383438110352, 8.96258544921875, 4.320594787597656, -3.2381973266601562, 3.366138458251953, -0.2064971923828125, 4.637290954589844, 11.656875610351562, 4.566947937011719, 6.432422637939453, 6.431737899780273, 8.090354919433594, 7.076744079589844, 15.86297607421875, 3.855945587158203, 2.9228591918945312, 16.08936309814453, 2.1908721923828125, 12.82379150390625, 0.6127777099609375, 11.52699089050293, 0.6178131103515625, -3.2776832580566406, 3.3286666870117188, 5.3293914794921875, 6.562591552734375, -1.5980758666992188, 2.926837921142578, 2.831514358520508, -0.24116897583007812, -1.3903427124023438, 3.6998748779296875, 9.47357177734375, 1.8799896240234375, 4.0130615234375, 15.815895080566406, -5.107536315917969, 13.118972778320312, 5.610343933105469, 4.104866027832031, 4.555536270141602, 13.025594711303711, 7.546573638916016, 1.4455490112304688, 10.025382995605469, 0.6121864318847656, 4.7239227294921875, 1.5153045654296875, 9.350502014160156, 0.6069793701171875, 6.435529708862305, 6.036598205566406, 7.54693603515625, 6.30938720703125, 7.445518493652344, 18.114425659179688, 3.17608642578125, 2.9719085693359375, 8.235206604003906, 14.546066284179688, 22.934738159179688, 4.446161270141602, -6.8278961181640625, -7.935634613037109, -2.4822006225585938, 0.3680839538574219, 0.6082897186279297, 1.1117401123046875, 7.098793029785156, 0.9535446166992188, -0.4561939239501953, 2.6496505737304688, 2.6470565795898438, 2.708372116088867, 5.3966522216796875, 6.24493408203125, 9.448844909667969, 3.4510345458984375, 10.316848754882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000140.npy"}
{"epoch": 0.42424242424242425, "step": 141, "batch_size": 128, "mean": 3.6668903827667236, "std": 6.006840705871582, "min": -9.357721328735352, "p10": -3.712393951416015, "median": 3.427553176879883, "p90": 11.541100311279298, "max": 19.411178588867188, "pos_frac": 0.7109375, "sample": [-3.5197677612304688, 11.17312240600586, 1.6211414337158203, 7.727691650390625, 10.809942245483398, 11.53240966796875, 5.165046691894531, 1.7609176635742188, 7.21240234375, 6.734901428222656, -1.1168956756591797, 4.529075622558594, 11.9769287109375, 2.336141586303711, 2.7720489501953125, 4.822456359863281, -2.094707489013672, 3.856290817260742, 13.382034301757812, -8.531055450439453, 0.5514984130859375, 15.447792053222656, 0.09343147277832031, -1.0283546447753906, 9.912979125976562, -3.5692672729492188, 0.62469482421875, 6.970245361328125, 7.9051361083984375, -7.729001998901367, 0.6795463562011719, 11.19731330871582, 5.2039642333984375, 10.65673828125, 3.39971923828125, 7.974309921264648, 7.11322021484375, 6.3239593505859375, -6.250457763671875, 2.873159408569336, 5.526020050048828, -1.691925048828125, 3.2187366485595703, 1.0794525146484375, 2.9364013671875, -8.648740768432617, 1.5150604248046875, 0.3318004608154297, 10.821548461914062, 9.055099487304688, 12.902904510498047, 7.543840408325195, 8.300588607788086, 1.7308578491210938, 6.357959747314453, 4.899433135986328, 8.652210235595703, -5.0850067138671875, 5.639381408691406, 4.924442291259766, -0.8102149963378906, -4.5216217041015625, 0.6802749633789062, -9.357721328735352, 3.1320877075195312, -1.47491455078125, -2.171506881713867, -0.9970016479492188, 3.0639114379882812, 7.383001327514648, 1.9817123413085938, 3.4553871154785156, 12.786481857299805, 4.802268981933594, 0.0557861328125, 6.286825180053711, -0.510589599609375, 3.102508544921875, -1.4810218811035156, 12.336807250976562, -1.365509033203125, 6.381704330444336, -2.0155067443847656, -5.469932556152344, 10.107608795166016, 1.696084976196289, -9.051567077636719, 4.060478210449219, -0.18185043334960938, 6.4649658203125, -8.766616821289062, 5.999000549316406, -4.046356201171875, 2.174245834350586, -6.5956573486328125, 19.411178588867188, -3.2872772216796875, 13.231338500976562, 8.691627502441406, 13.246957778930664, 2.826322555541992, 3.0825653076171875, 14.184066772460938, -2.2574615478515625, -1.921234130859375, -1.642578125, 5.54595947265625, 3.572141647338867, 13.751996994018555, 8.43023681640625, -2.2837448120117188, -2.056180953979492, 4.219245910644531, 9.817214965820312, -5.094085693359375, 10.00954818725586, 11.384624481201172, 13.629730224609375, 1.1055564880371094, -0.8090057373046875, 11.561378479003906, 4.626060485839844, -1.433340072631836, -1.38031005859375, 4.665929794311523, 7.04681396484375, 11.344802856445312, 8.497512817382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000141.npy"}
{"epoch": 0.42727272727272725, "step": 142, "batch_size": 128, "mean": 4.483747482299805, "std": 6.586967468261719, "min": -12.265655517578125, "p10": -3.4072601318359372, "median": 4.091621398925781, "p90": 12.171324920654296, "max": 30.626617431640625, "pos_frac": 0.7578125, "sample": [12.967727661132812, -4.040000915527344, -4.3447723388671875, 1.8596229553222656, 6.405448913574219, 2.026996612548828, 4.052085876464844, 8.068946838378906, 6.588104248046875, 11.438949584960938, -12.265655517578125, 12.098968505859375, 1.4883995056152344, 1.3217391967773438, 8.299201965332031, 0.06884765625, -2.4417572021484375, 3.7087974548339844, -0.56134033203125, 9.378631591796875, -3.8822860717773438, -1.382537841796875, 9.814140319824219, 5.304893493652344, -0.8785037994384766, -4.4429931640625, 4.938869476318359, 0.3657798767089844, 24.235595703125, 0.5762405395507812, 30.626617431640625, 9.45782470703125, 15.152091979980469, -2.853870391845703, -5.0828704833984375, -1.1285247802734375, 7.943824768066406, 4.776218414306641, 7.630516052246094, 5.7825775146484375, 11.780960083007812, 10.383535385131836, 5.848533630371094, 2.1795272827148438, 16.95294189453125, 4.831489562988281, 7.097785949707031, 4.994041442871094, 11.56365966796875, -3.7571773529052734, 7.485908508300781, 11.228446960449219, -0.2096271514892578, -1.8702621459960938, 5.806922912597656, -9.706314086914062, -1.8231887817382812, 5.658344268798828, 1.0051097869873047, 1.4312286376953125, -5.8122100830078125, 9.09964370727539, -0.86370849609375, 1.1080913543701172, 3.6375579833984375, 3.8288650512695312, 1.9990425109863281, 0.640960693359375, 17.390182495117188, 5.4420166015625, 1.155731201171875, -5.187652587890625, 4.131156921386719, 7.023626327514648, -0.6700439453125, 11.353057861328125, 8.741477966308594, -0.674591064453125, 3.070281982421875, -5.582790374755859, 8.244697570800781, -3.59112548828125, 5.2482757568359375, 2.7188892364501953, 9.891670227050781, 15.325931549072266, 13.854278564453125, -0.5575027465820312, 12.340156555175781, 18.80683135986328, 2.055398941040039, 5.830863952636719, 9.662460327148438, -3.328460693359375, 6.160331726074219, 5.12384033203125, 14.219070434570312, 3.5796546936035156, 1.3916339874267578, 8.821390151977539, 0.4672355651855469, 0.798828125, 7.513278961181641, -2.147296905517578, 1.1967239379882812, 5.9008941650390625, 2.39404296875, 4.6792449951171875, 4.3522491455078125, -1.2041435241699219, 6.481605529785156, 5.256099700927734, 11.475997924804688, 16.286849975585938, 2.771728515625, 5.939361572265625, 0.4292716979980469, 0.2590656280517578, 2.156280517578125, 10.570144653320312, 17.37212371826172, 4.5337066650390625, 3.4345149993896484, -0.4685707092285156, 3.8960037231445312, -7.057334899902344, 8.854011535644531, -1.8056659698486328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000142.npy"}
{"epoch": 0.4303030303030303, "step": 143, "batch_size": 128, "mean": 4.264645576477051, "std": 6.9132256507873535, "min": -13.98150634765625, "p10": -3.9653053283691406, "median": 4.048069000244141, "p90": 13.320609283447263, "max": 23.8106689453125, "pos_frac": 0.7578125, "sample": [2.6915283203125, 2.1151771545410156, -9.95068359375, 4.513507843017578, 2.7865524291992188, -1.481201171875, -7.547050476074219, 4.132793426513672, 12.807594299316406, 20.280052185058594, -4.752628326416016, 4.418426513671875, -8.031063079833984, 2.8574066162109375, -2.9259033203125, -1.159566879272461, -8.235767364501953, 11.736106872558594, 9.258224487304688, 2.9677963256835938, 7.2016448974609375, 6.3851318359375, 7.357648849487305, 7.439842224121094, -3.9505767822265625, 0.7834434509277344, 0.10555267333984375, 6.689949035644531, 2.597797393798828, 4.974464416503906, 0.730255126953125, 2.895862579345703, -1.1028099060058594, -10.58537483215332, 9.524917602539062, 7.326057434082031, 9.207906723022461, 5.581024169921875, 6.62066650390625, -2.8941192626953125, 2.32958984375, -3.5737838745117188, 1.2748260498046875, 2.5253753662109375, 12.1568603515625, 2.7620773315429688, 0.9126625061035156, 7.0067138671875, 6.429126739501953, 1.91571044921875, -2.9139633178710938, 14.139453887939453, 11.534988403320312, 3.6689605712890625, 3.510448455810547, 18.443740844726562, 13.938423156738281, 8.61771011352539, -4.245597839355469, -0.1335296630859375, -1.581878662109375, 1.3707275390625, -2.7828521728515625, 8.827735900878906, 7.556510925292969, 0.7096672058105469, 0.9266338348388672, -1.9841461181640625, 7.213462829589844, -0.14362716674804688, 6.5161285400390625, 2.794557571411133, 11.495399475097656, 7.105016708374023, -3.6628799438476562, 7.505165100097656, 4.388332366943359, -8.330450057983398, -8.009185791015625, 16.781394958496094, 8.903579711914062, 15.3643798828125, 1.33001708984375, 23.8106689453125, -0.7706680297851562, 7.634149551391602, 0.5573577880859375, 11.789947509765625, 10.345596313476562, 10.095182418823242, 14.19097900390625, 3.633026123046875, 3.9633445739746094, 2.139434814453125, -1.9526443481445312, 5.3398895263671875, 4.196857452392578, -6.987579345703125, 4.2162017822265625, 2.275310516357422, 8.001056671142578, 6.161434173583984, 7.215951919555664, 20.77191162109375, -3.9996719360351562, -5.586967468261719, 8.376632690429688, 9.652626037597656, 4.42999267578125, 13.055831909179688, 8.8485107421875, 5.226104736328125, 17.931060791015625, 17.16302490234375, 2.2335433959960938, -3.18389892578125, 4.973407745361328, 3.3203506469726562, 0.4353179931640625, 3.3917694091796875, 14.077713012695312, 2.2967376708984375, -13.98150634765625, 15.76031494140625, 4.891765594482422, -3.8002700805664062, 12.55828857421875, 5.212543487548828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000143.npy"}
{"epoch": 0.43333333333333335, "step": 144, "batch_size": 128, "mean": 4.2915191650390625, "std": 6.6777262687683105, "min": -12.499847412109375, "p10": -2.620644378662109, "median": 3.678476333618164, "p90": 13.175521850585936, "max": 21.6832275390625, "pos_frac": 0.7265625, "sample": [1.8217124938964844, 5.849395751953125, 0.7075366973876953, 6.219066619873047, 18.937347412109375, 10.959228515625, 6.943580627441406, 4.750396728515625, 12.886909484863281, 1.7583904266357422, 6.702718734741211, -0.00548553466796875, 9.326072692871094, 8.38006591796875, 8.093681335449219, 1.9816665649414062, -8.368947982788086, -1.91583251953125, -0.96063232421875, 8.00372314453125, 3.7035789489746094, -0.31201934814453125, 4.192375183105469, 8.271150588989258, 9.964387893676758, 7.743513107299805, -3.375873565673828, 4.033935546875, 5.725191116333008, 0.8238277435302734, -1.7732467651367188, 1.652923583984375, 7.017875671386719, -4.04705810546875, 5.848480224609375, 3.7350425720214844, 4.019296646118164, 10.122177124023438, 16.07781219482422, 19.536033630371094, 5.592464447021484, 10.726802825927734, -2.5621490478515625, -6.396331787109375, 5.998939514160156, 0.5875511169433594, 10.752334594726562, 4.484580993652344, 2.6461715698242188, 1.737588882446289, 9.013423919677734, 13.553955078125, 7.7217864990234375, 1.8838424682617188, 16.502437591552734, 2.1579360961914062, 14.018474578857422, -1.6535148620605469, -0.1511077880859375, -0.46294403076171875, -2.158050537109375, 19.711898803710938, 12.623357772827148, 4.121681213378906, 6.507780075073242, -0.9976654052734375, -12.499847412109375, 1.3148040771484375, 13.640026092529297, -0.22150230407714844, -2.11798095703125, -2.3797378540039062, -10.22119140625, 3.1432876586914062, -3.9490814208984375, 8.132530212402344, 0.9050827026367188, 4.20697021484375, 21.6832275390625, 17.5517578125, 7.5766754150390625, -0.4019775390625, 13.013336181640625, 9.105623245239258, 6.5972747802734375, -3.657806396484375, 1.6461181640625, 0.01233673095703125, 7.187177658081055, 10.04595947265625, 4.527107238769531, -0.949951171875, 10.852142333984375, 4.859405517578125, -0.9498748779296875, -1.6881828308105469, 10.219284057617188, 3.6455764770507812, 6.2618408203125, 1.487152099609375, -1.0719985961914062, 9.150503158569336, 0.259429931640625, 7.81866455078125, 1.3799057006835938, 20.539932250976562, 3.6533737182617188, 5.1462554931640625, -3.69830322265625, -0.12344169616699219, 15.411628723144531, -1.6100616455078125, -7.972888946533203, -12.080902099609375, 17.157211303710938, 3.1494064331054688, 1.5108528137207031, -7.8603057861328125, 3.0265045166015625, -2.7571334838867188, 1.9026908874511719, 2.5417404174804688, 3.194955825805664, 7.8611602783203125, 0.5825252532958984, -0.17769622802734375, 11.411605834960938, 1.4000015258789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000144.npy"}
{"epoch": 0.43636363636363634, "step": 145, "batch_size": 128, "mean": 3.956120252609253, "std": 6.203293323516846, "min": -12.472564697265625, "p10": -3.570055198669433, "median": 3.600139617919922, "p90": 11.59503936767578, "max": 22.577972412109375, "pos_frac": 0.7265625, "sample": [5.125143051147461, -4.3668975830078125, 7.770172119140625, -3.219482421875, -5.919013977050781, -6.359619140625, -0.7861080169677734, 2.6980972290039062, 11.33447265625, -5.268791198730469, -1.2089576721191406, 6.4792327880859375, -1.1146163940429688, 15.105707168579102, 2.9816951751708984, 1.0436782836914062, -4.070810317993164, -7.903175354003906, 17.673049926757812, -2.106485366821289, 2.110933303833008, 16.301910400390625, 6.6637115478515625, -1.2113876342773438, 0.3298797607421875, -3.342121124267578, 5.849814414978027, 15.8616943359375, -0.6116485595703125, 3.0622711181640625, 13.09393310546875, 7.940732955932617, 2.245849609375, 3.1555633544921875, 1.5086593627929688, 4.260898590087891, -0.47434425354003906, 7.845020294189453, 10.647071838378906, 0.5895023345947266, 10.223373413085938, 2.5830535888671875, 11.1929931640625, 5.135902404785156, 0.9889411926269531, 12.05908203125, 3.4852371215820312, 5.104701995849609, 3.6340408325195312, 6.328804016113281, 3.1483612060546875, -1.1054801940917969, 4.3627166748046875, -6.760108947753906, 4.436120986938477, 0.7502765655517578, 9.582084655761719, 7.3411865234375, 6.551536560058594, 5.158660888671875, -0.9620952606201172, -5.4181365966796875, 6.380023956298828, -4.6776275634765625, 10.857032775878906, 5.7848663330078125, 5.194902420043945, 22.577972412109375, 7.477592468261719, 4.8722076416015625, -5.459133148193359, 0.4361572265625, -0.17348480224609375, -0.09518814086914062, -2.447826385498047, 2.5640487670898438, -0.6839809417724609, 13.856712341308594, 10.690025329589844, 12.092887878417969, 7.126821517944336, 2.4371566772460938, 4.823448181152344, 14.042556762695312, 10.047172546386719, 3.792652130126953, 5.2917022705078125, -12.472564697265625, 8.389617919921875, 11.302154541015625, 6.997406005859375, 10.8138427734375, 0.3035774230957031, 3.523609161376953, -2.6436710357666016, 12.069778442382812, -0.4254913330078125, 9.862190246582031, 5.431087493896484, 1.7541389465332031, -8.657234191894531, -3.3554458618164062, 4.94195556640625, -2.2539291381835938, 0.3493614196777344, -2.5118026733398438, 7.517322540283203, 3.5662384033203125, 1.5910511016845703, 2.4932022094726562, 9.726360321044922, 8.634429931640625, 11.396163940429688, 16.838716506958008, 3.1019439697265625, -8.466411590576172, 1.2118148803710938, -0.07275390625, 2.0817699432373047, 17.93921661376953, 5.2829437255859375, 7.586887359619141, 8.42218017578125, 2.7868118286132812, 4.265903472900391, 4.3860321044921875, 5.328559875488281, -0.9987602233886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000145.npy"}
{"epoch": 0.4393939393939394, "step": 146, "batch_size": 128, "mean": 4.234456539154053, "std": 6.638282299041748, "min": -10.099613189697266, "p10": -4.913139152526855, "median": 4.558274269104004, "p90": 12.266093063354491, "max": 18.411773681640625, "pos_frac": 0.75, "sample": [9.435592651367188, 9.358074188232422, -0.8269805908203125, -4.7117156982421875, 9.536758422851562, 10.392875671386719, -4.9060821533203125, -3.2576560974121094, -3.699413299560547, -7.1067047119140625, 14.488311767578125, 3.7295379638671875, -1.7895889282226562, -1.9921722412109375, 12.587966918945312, 4.0702056884765625, 2.9632492065429688, 0.7222099304199219, -6.459236145019531, 3.73809814453125, 13.198982238769531, 6.308250427246094, 11.349288940429688, 5.505138397216797, 5.68280029296875, 17.558502197265625, -8.86480712890625, 2.1074676513671875, -5.614311218261719, 7.9034881591796875, -1.2344932556152344, -0.9073715209960938, 10.07470703125, 1.1008758544921875, 9.751022338867188, 13.043289184570312, -4.929605484008789, -2.4092636108398438, 10.346054077148438, 9.540538787841797, 6.213006973266602, 7.807094573974609, 15.387130737304688, 0.2691001892089844, 16.184772491455078, 2.0764312744140625, 4.980358123779297, 14.770401000976562, 6.746086120605469, 13.584701538085938, -10.099613189697266, 2.082763671875, 11.280773162841797, 11.588409423828125, 11.930381774902344, 9.070068359375, 2.5152549743652344, 12.040599822998047, 4.3263702392578125, 9.638175964355469, 6.576667785644531, 2.509157180786133, -2.3449325561523438, -0.841766357421875, -9.822036743164062, 1.0550384521484375, 5.53009033203125, 0.3498725891113281, 11.46783447265625, 6.725088119506836, 2.729022979736328, 18.411773681640625, 7.4517669677734375, 11.434738159179688, 7.2456817626953125, 6.5079193115234375, 1.705108642578125, 17.436988830566406, 4.6853179931640625, -4.737565994262695, 2.6844120025634766, 0.87744140625, -1.780374526977539, -1.0002212524414062, 3.4489517211914062, -8.302566528320312, 0.11343002319335938, 2.2841720581054688, 2.9627647399902344, 6.265449523925781, 14.437652587890625, 11.804824829101562, 2.4395751953125, 10.18552017211914, 0.90728759765625, 2.9404983520507812, 4.975128173828125, -9.110870361328125, -0.7349510192871094, 4.670677185058594, 5.65186882019043, -0.2498931884765625, -6.568206787109375, 0.3234672546386719, -5.77984619140625, 1.1446380615234375, 0.5777359008789062, 7.414817810058594, -4.4342498779296875, 5.104736328125, 4.8325653076171875, 6.811206817626953, 0.05535888671875, 8.906795501708984, 4.558614730834961, -3.0118846893310547, 17.955120086669922, 8.57275390625, 6.911590576171875, 2.9701976776123047, 4.557933807373047, 4.855690002441406, 10.560874938964844, 9.811996459960938, 10.70391845703125, -7.600860595703125, -7.075389862060547, 12.12814712524414], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000146.npy"}
{"epoch": 0.44242424242424244, "step": 147, "batch_size": 128, "mean": 5.04603385925293, "std": 7.364063262939453, "min": -18.52074432373047, "p10": -2.6624109268188474, "median": 4.07667350769043, "p90": 13.53545684814453, "max": 31.0316162109375, "pos_frac": 0.7421875, "sample": [3.3595733642578125, 4.636531829833984, 7.721904754638672, 10.858112335205078, 5.271034240722656, 1.8110408782958984, -2.7747459411621094, 2.991077423095703, 12.871330261230469, 8.0556640625, 12.00666618347168, 8.649627685546875, 8.871009826660156, -5.012516021728516, -3.6704483032226562, -6.64031982421875, 22.56866455078125, -2.614267349243164, 7.186149597167969, -1.0597190856933594, 9.395641326904297, -18.52074432373047, 5.2390594482421875, 5.916389465332031, 31.0316162109375, 14.253639221191406, 12.273138046264648, -0.0623626708984375, 8.318349838256836, 3.355926513671875, 7.933277130126953, -9.10345458984375, 0.049530029296875, 7.8306732177734375, 10.007110595703125, -0.34268951416015625, 13.565673828125, 10.143009185791016, 3.1846923828125, 2.2107696533203125, 10.94002914428711, 2.380664825439453, 10.998571395874023, -0.8699302673339844, 6.808097839355469, 23.271896362304688, 2.489534378051758, 17.953872680664062, -1.9090576171875, 18.67993927001953, -7.487857818603516, -2.496368408203125, -5.861808776855469, -2.4419021606445312, 6.784416198730469, 9.361568450927734, 13.814193725585938, 7.588775634765625, 3.0356178283691406, 11.025627136230469, 8.135498046875, 6.159000396728516, 9.400238037109375, 14.322616577148438, 19.43144989013672, 6.404380798339844, 10.452552795410156, -0.496673583984375, 12.100872039794922, 1.2334060668945312, 11.785430908203125, 1.2410507202148438, 9.89853286743164, 3.9179306030273438, 8.251880645751953, 12.518756866455078, -0.35833740234375, -0.08775711059570312, 1.7857532501220703, 3.037067413330078, 0.24032974243164062, 17.3179931640625, -2.367246627807617, -1.8797454833984375, 6.624534606933594, -1.7460365295410156, 3.3819503784179688, 8.805526733398438, 2.119813919067383, 9.468513488769531, -1.9102840423583984, 13.522506713867188, 2.5749969482421875, 1.2263984680175781, 2.659656524658203, 3.2622127532958984, -5.2841339111328125, 5.828910827636719, 6.2890167236328125, -1.5851364135742188, -4.149515151977539, 10.006088256835938, 5.841760635375977, 1.645904541015625, -3.356121063232422, 15.66473388671875, -8.54034423828125, 3.540935516357422, -1.9468917846679688, 28.101119995117188, 3.26995849609375, 2.2359580993652344, -0.745819091796875, 4.932586669921875, 7.380043029785156, 2.241373062133789, 4.235416412353516, 11.405532836914062, 1.5073204040527344, 7.789072036743164, 0.1091461181640625, -0.1976909637451172, 1.9122371673583984, -0.3185539245605469, 6.002620697021484, 0.6843833923339844, 7.240837097167969, -4.114723205566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000147.npy"}
{"epoch": 0.44545454545454544, "step": 148, "batch_size": 128, "mean": 4.87718391418457, "std": 6.853434085845947, "min": -10.742385864257812, "p10": -3.1489549636840812, "median": 4.669631004333496, "p90": 14.040615844726561, "max": 28.198402404785156, "pos_frac": 0.734375, "sample": [12.895416259765625, 3.9172439575195312, -0.8883552551269531, -0.051666259765625, 6.3838043212890625, 9.179862976074219, 7.688137054443359, 3.371225357055664, 3.1393470764160156, 8.659576416015625, -2.069427490234375, 4.670501708984375, 14.105087280273438, 1.0083141326904297, -0.42613983154296875, 4.668760299682617, 15.38543701171875, 8.299491882324219, -1.4815521240234375, -0.7449226379394531, 7.1741790771484375, 0.7959442138671875, 10.080326080322266, 10.151041030883789, -6.207191467285156, 4.849058151245117, -2.4978904724121094, 1.6253814697265625, 3.6625518798828125, -0.42339324951171875, 0.5712890625, 3.0493087768554688, 1.9955787658691406, 2.91259765625, 28.198402404785156, 4.039237976074219, 16.150802612304688, 8.618791580200195, 7.846719741821289, 3.1717758178710938, 7.91363525390625, -4.082061767578125, 3.968425750732422, -3.7520809173583984, 7.546638488769531, -0.9318618774414062, 1.8604888916015625, -1.8758697509765625, 8.066055297851562, -8.917503356933594, -2.890472412109375, 13.90252685546875, 2.8798904418945312, 10.473548889160156, -6.234935760498047, 1.8213043212890625, 9.736671447753906, 3.1138572692871094, -1.7147598266601562, 0.9408645629882812, 6.447883605957031, 10.47894287109375, 18.8070068359375, -2.479625701904297, 7.6042938232421875, 14.21087646484375, 9.493232727050781, 5.230373382568359, -1.4521636962890625, 2.0732269287109375, 1.1971931457519531, 22.122390747070312, 11.300033569335938, 6.373863220214844, 5.760416030883789, 6.4680023193359375, 2.321491241455078, -0.22281265258789062, 9.086929321289062, -9.170806884765625, 5.2847900390625, 6.998565673828125, 17.52276611328125, -8.615859985351562, 1.9847793579101562, 0.5624828338623047, 7.819032669067383, -3.753875732421875, 8.036441802978516, 9.790992736816406, 8.262481689453125, -0.057903289794921875, 10.01034164428711, 1.0343570709228516, 19.1881103515625, 4.28570556640625, -0.3493309020996094, 8.661800384521484, 6.642120361328125, 14.012985229492188, -0.7167339324951172, 14.18239974975586, 1.4655380249023438, 11.437789916992188, -10.742385864257812, -1.9672012329101562, 5.348335266113281, 3.0103683471679688, 7.019935607910156, 4.939338684082031, 2.1615638732910156, -4.155891418457031, 7.4929962158203125, -1.783416748046875, 19.64154052734375, -8.585525512695312, 11.659896850585938, 10.480255126953125, 6.8239593505859375, 7.368679046630859, -5.046966552734375, 13.331718444824219, 5.18902587890625, -4.0068511962890625, -0.6375465393066406, 15.473213195800781, 14.59503173828125, 12.029991149902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000148.npy"}
{"epoch": 0.4484848484848485, "step": 149, "batch_size": 128, "mean": 4.81694221496582, "std": 5.9519524574279785, "min": -14.25848388671875, "p10": -2.257956314086914, "median": 5.478828430175781, "p90": 11.590592765808104, "max": 18.830276489257812, "pos_frac": 0.78125, "sample": [0.14678192138671875, -4.291748046875, 9.775672912597656, 5.584014892578125, 7.102386474609375, 15.888900756835938, 6.406970977783203, 9.610157012939453, 6.354789733886719, 4.234397888183594, 6.137226104736328, 3.9345703125, 5.3736419677734375, 1.7790451049804688, 1.70599365234375, -5.527549743652344, -2.2058792114257812, -0.6029472351074219, 8.517410278320312, 3.832967758178711, 0.7014198303222656, -1.65667724609375, 4.280601501464844, 7.825752258300781, 4.231330871582031, 6.5913543701171875, 4.3535003662109375, 8.246105194091797, 5.964397430419922, 7.6561431884765625, 1.0519218444824219, 2.69122314453125, -2.1170310974121094, -1.1785163879394531, 8.065441131591797, 6.0376739501953125, 3.049989700317383, 8.27496337890625, 3.677845001220703, 11.991079330444336, -2.744638442993164, 12.207298278808594, 9.51165771484375, -2.2919464111328125, 10.545660018920898, 2.1902694702148438, 9.547286987304688, 13.031005859375, 13.050491333007812, 1.9274349212646484, 11.274810791015625, 9.460590362548828, 3.120929718017578, 10.880634307861328, -1.7607421875, -0.6330852508544922, 9.203392028808594, 2.7848129272460938, 1.9925155639648438, 15.799083709716797, -0.1569366455078125, 7.003271102905273, 11.159730911254883, 9.726654052734375, 5.312675476074219, -8.290467262268066, 15.435966491699219, 7.090110778808594, 8.406116485595703, 6.307731628417969, 4.754791259765625, -1.4891357421875, 11.79364013671875, -10.008293151855469, 11.048412322998047, 2.1338272094726562, -1.600494384765625, -1.7016563415527344, 11.760581970214844, 11.177978515625, 7.282867431640625, 6.9189605712890625, 6.962015151977539, 10.496826171875, 4.969778060913086, 8.870807647705078, 5.035289764404297, 1.9680862426757812, -1.04010009765625, 5.912090301513672, 2.5405807495117188, 3.848388671875, -1.8252696990966797, -4.987575531005859, 1.9114456176757812, 9.863361358642578, 4.346443176269531, 5.943239212036133, 13.07255744934082, 6.754051208496094, 2.060943603515625, 9.548675537109375, -8.2398681640625, 11.13824462890625, -3.840423583984375, 6.621955871582031, -9.187154769897461, 10.052417755126953, 8.418275833129883, 11.517740249633789, 2.418001174926758, 10.100555419921875, 8.602859497070312, -0.7777481079101562, -2.243389129638672, 7.032440185546875, 4.589298248291016, 18.830276489257812, 13.758773803710938, -5.4284515380859375, -7.016517639160156, 4.792205810546875, 2.306060791015625, -14.25848388671875, 4.7011871337890625, 9.379436492919922, 5.622220993041992, 14.767967224121094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000149.npy"}
{"epoch": 0.45151515151515154, "step": 150, "batch_size": 128, "mean": 3.798534870147705, "std": 7.60091495513916, "min": -11.803953170776367, "p10": -5.056583404541016, "median": 3.4839601516723633, "p90": 12.97115020751953, "max": 31.4229736328125, "pos_frac": 0.703125, "sample": [7.004367828369141, 3.144672393798828, 1.8823814392089844, -4.179840087890625, -2.2333335876464844, 4.838991165161133, 1.9623641967773438, 4.023555755615234, 1.4037246704101562, 8.139026641845703, 5.9539337158203125, -0.7268829345703125, 17.24864959716797, -2.8599624633789062, 3.3825645446777344, 6.792621612548828, 6.580146789550781, -9.145339965820312, -1.763397216796875, -3.72088623046875, 6.58343505859375, 11.153518676757812, 4.9142608642578125, 0.5240020751953125, 1.80181884765625, 10.720527648925781, 1.7636775970458984, -2.2973289489746094, 7.55613899230957, -5.38714599609375, -4.5686798095703125, 14.933891296386719, 22.54669189453125, 0.33854103088378906, -2.9628963470458984, 8.9444580078125, -11.099205017089844, 4.043735504150391, -3.446979522705078, 1.172576904296875, -5.1444091796875, -5.790802001953125, 1.3588409423828125, 2.0224609375, 31.4229736328125, -1.5250663757324219, 6.785614013671875, -0.8730010986328125, 5.103141784667969, 2.56903076171875, 10.18768310546875, 5.386564254760742, 25.52410888671875, -6.539398193359375, 7.976476669311523, 11.438064575195312, 10.96240234375, 6.952423095703125, 1.4266223907470703, 8.744857788085938, 0.3002605438232422, 6.0919036865234375, -5.018943786621094, 12.80145263671875, 2.068531036376953, 5.2438507080078125, -1.7139434814453125, 13.367111206054688, 2.415843963623047, -1.034149169921875, 2.548675537109375, 4.803550720214844, 10.002883911132812, -3.5676231384277344, 5.2611236572265625, 2.3839569091796875, 3.585355758666992, 5.5512847900390625, 3.8707199096679688, 1.7194766998291016, 5.5298919677734375, -0.42906951904296875, -3.16412353515625, 6.499580383300781, -2.4073448181152344, 19.798858642578125, 12.441246032714844, 15.653038024902344, -4.2044525146484375, 10.420106887817383, 10.395111083984375, 0.3593273162841797, -1.642181396484375, 7.998516082763672, -2.6909255981445312, -4.732246398925781, -11.803953170776367, -10.93606185913086, 19.154403686523438, 13.730659484863281, 8.924335479736328, 15.055557250976562, 18.88191032409668, 2.4656848907470703, -9.930984497070312, 7.145942687988281, 2.5457992553710938, -3.187610626220703, 11.641365051269531, 3.9470367431640625, 0.5283279418945312, -10.0330810546875, 5.662689208984375, 5.11956787109375, -6.074859619140625, 17.08929443359375, 6.463623046875, -11.425552368164062, 4.867778778076172, 2.9132957458496094, 2.4696998596191406, 10.135650634765625, 4.05401611328125, -2.0311012268066406, 3.964946746826172, 8.338272094726562, 8.312255859375, -5.234100341796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000150.npy"}
{"epoch": 0.45454545454545453, "step": 151, "batch_size": 128, "mean": 3.369046211242676, "std": 7.361353874206543, "min": -15.646392822265625, "p10": -5.425234222412109, "median": 3.226820945739746, "p90": 12.431695556640625, "max": 25.72308349609375, "pos_frac": 0.671875, "sample": [8.050727844238281, 9.48388671875, -0.8687057495117188, 1.3635845184326172, 2.43109130859375, -4.048831939697266, 12.334976196289062, 9.44076919555664, -5.514801025390625, 5.5659942626953125, 5.8356170654296875, 0.992706298828125, -4.16069221496582, 7.712978363037109, 16.435977935791016, -2.381561279296875, 13.577690124511719, 19.295684814453125, -0.4309120178222656, 5.186012268066406, -8.87109375, 25.620559692382812, -9.593986511230469, 3.974590301513672, 4.876800537109375, 17.2420654296875, 3.160947799682617, 11.952407836914062, 8.370712280273438, -0.5019550323486328, -4.119140625, 4.3598175048828125, 4.705020904541016, -1.4468154907226562, -0.5242557525634766, 2.2100486755371094, 11.881813049316406, -2.062803268432617, -6.759239196777344, 5.8782501220703125, 10.337814331054688, -0.4297904968261719, -0.011560440063476562, 12.935358047485352, 5.8349151611328125, -4.778167724609375, -11.138557434082031, -6.377590179443359, 1.1156597137451172, 6.353385925292969, 4.824958801269531, -9.378850936889648, 2.2290496826171875, 7.787994384765625, 12.874639511108398, 9.334732055664062, 10.592613220214844, 12.549583435058594, -0.5676345825195312, 8.581764221191406, 4.990119934082031, -4.047447204589844, 14.788066864013672, -2.1560134887695312, 5.146671295166016, 16.60088348388672, -2.8665924072265625, 25.72308349609375, 5.155948638916016, 5.102876663208008, 10.79864501953125, -14.309272766113281, 4.842525482177734, 1.1616439819335938, -2.4331932067871094, 1.3990936279296875, 0.9455718994140625, -5.0361480712890625, -15.646392822265625, 5.809844970703125, 10.412391662597656, 0.5696811676025391, -2.2472000122070312, 11.973194122314453, 15.923736572265625, -6.522621154785156, 2.6755905151367188, -0.4967803955078125, 1.9945831298828125, 3.4819259643554688, 4.399129867553711, 7.5369873046875, 6.4476776123046875, -2.821483612060547, 1.4776420593261719, -5.2578277587890625, 15.950157165527344, 3.9353599548339844, 3.292694091796875, 8.90542984008789, 0.7055816650390625, 2.8820037841796875, 2.0755653381347656, 11.452926635742188, 2.272430419921875, -3.7640037536621094, 3.3929214477539062, 5.750907897949219, 2.2806167602539062, -1.3094711303710938, -5.386848449707031, 5.7377471923828125, 4.768512725830078, 8.020015716552734, -1.0325641632080078, 5.64739990234375, 2.48822021484375, 1.8624496459960938, -8.571540832519531, 4.5118560791015625, 7.9594573974609375, 12.381172180175781, -5.9069061279296875, -1.4609222412109375, -11.257942199707031, -1.0621185302734375, 4.716434478759766, 1.1575794219970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000151.npy"}
{"epoch": 0.4575757575757576, "step": 152, "batch_size": 128, "mean": 5.422414779663086, "std": 6.77511739730835, "min": -11.508712768554688, "p10": -3.555362319946289, "median": 4.994586944580078, "p90": 14.54852523803711, "max": 21.68484115600586, "pos_frac": 0.78125, "sample": [3.6148681640625, 5.257972717285156, 15.137344360351562, 4.096689224243164, 14.652885437011719, 14.234474182128906, 7.67529296875, -3.6170005798339844, -7.460914611816406, 11.39398193359375, -7.024482727050781, -8.72747802734375, 0.349578857421875, 14.402599334716797, 17.637603759765625, 7.000576019287109, 6.605592727661133, 3.693706512451172, -3.5289459228515625, -1.602304458618164, 16.449539184570312, 0.6831207275390625, 10.623603820800781, 9.785102844238281, 14.875335693359375, 14.395339965820312, -2.516387939453125, 1.14202880859375, 1.258535385131836, 1.34259033203125, 16.25408935546875, 11.322402954101562, 8.322206497192383, 2.7324371337890625, 10.292583465576172, 0.12822723388671875, 4.547946929931641, -1.0301742553710938, -4.6795196533203125, -4.189792633056641, -6.023784637451172, 5.130889892578125, 13.497283935546875, -5.664730072021484, -4.66943359375, 5.76348876953125, 6.542198181152344, 3.002685546875, 2.04315185546875, 5.316379547119141, 8.978826522827148, 3.075347900390625, 0.5045242309570312, 6.662605285644531, 21.68484115600586, 4.858283996582031, 3.8864059448242188, 3.16790771484375, 12.957565307617188, 10.795557022094727, 15.502212524414062, -4.735435485839844, 14.082710266113281, 16.99688720703125, 12.679901123046875, -0.2914276123046875, 6.0559234619140625, 15.985214233398438, 2.977367401123047, 9.176614761352539, 4.675689697265625, 4.687128067016602, -1.3849945068359375, 5.702796936035156, 14.475387573242188, -11.508712768554688, 7.69415283203125, 8.973098754882812, 5.185813903808594, 4.347740173339844, 18.285858154296875, 8.145217895507812, 2.6026382446289062, 2.2100372314453125, 2.904603958129883, 0.5731887817382812, 5.560327529907227, 10.666549682617188, 0.5542068481445312, 12.58343505859375, -1.9039840698242188, -1.6125297546386719, 11.476760864257812, 0.3502082824707031, 8.560455322265625, -4.331672668457031, -1.8830375671386719, 3.344745635986328, -4.095222473144531, 9.782302856445312, 7.769805908203125, 1.6779594421386719, 12.451112747192383, 3.1334571838378906, -1.1789093017578125, -1.7160263061523438, -0.8043727874755859, 9.305553436279297, 5.282745361328125, 16.093280792236328, 11.596382141113281, 3.2162723541259766, -0.5653820037841797, 5.215311050415039, 17.919219970703125, 11.439468383789062, 1.5394287109375, -1.4668769836425781, -0.1475658416748047, 12.400592803955078, 5.638637542724609, 14.503799438476562, 14.224037170410156, 2.7863845825195312, 6.7391357421875, 1.1056785583496094, 11.1060791015625, 2.7105026245117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000152.npy"}
{"epoch": 0.46060606060606063, "step": 153, "batch_size": 128, "mean": 4.248882293701172, "std": 7.766724109649658, "min": -17.80010986328125, "p10": -5.0183012008666985, "median": 3.6896610260009766, "p90": 14.517846679687498, "max": 27.28326416015625, "pos_frac": 0.71875, "sample": [13.913894653320312, -1.7643814086914062, 1.2720794677734375, -8.258575439453125, 3.5166854858398438, 3.020038604736328, 15.624290466308594, -8.418916702270508, 2.2688865661621094, 1.2595958709716797, -11.18560791015625, 4.631797790527344, 0.7497100830078125, 9.203304290771484, 4.604579925537109, 1.2728347778320312, 11.44427490234375, 13.012947082519531, -1.908477783203125, 0.05924415588378906, 21.93938446044922, -4.7689208984375, 1.7824993133544922, 5.006256103515625, 5.655242919921875, 6.915519714355469, 0.61663818359375, 13.088226318359375, 6.0317230224609375, 12.974166870117188, 12.67413330078125, 15.32745361328125, 12.705581665039062, 15.76351547241211, 3.150585174560547, 18.341041564941406, -1.53564453125, 6.557731628417969, -5.6237640380859375, -0.9856376647949219, -1.5852241516113281, 12.594306945800781, 11.968799591064453, -0.46817779541015625, 5.350189208984375, -17.80010986328125, 4.7823944091796875, 5.772504806518555, 1.338623046875, 8.384696960449219, 17.322975158691406, 17.511825561523438, 9.5814208984375, -2.5706024169921875, -0.283203125, -11.259384155273438, -0.16593551635742188, 10.700531005859375, -8.434059143066406, 10.500265121459961, -4.785181045532227, 2.4131546020507812, 14.874725341796875, -6.3264312744140625, 16.37274169921875, 3.4343643188476562, 0.02085113525390625, 10.6187744140625, -2.5888137817382812, 8.259506225585938, 5.085384368896484, 5.410699844360352, 13.218490600585938, 5.675811767578125, 7.397006988525391, 2.450387954711914, 8.737777709960938, 12.76153564453125, 6.507781982421875, -1.781209945678711, -0.5817985534667969, 3.8626365661621094, 4.07611083984375, 3.2597122192382812, 15.703460693359375, 23.427520751953125, 1.9696121215820312, -1.6646156311035156, 3.865489959716797, 6.875576019287109, 5.207649230957031, 6.627483367919922, -7.001953125, -0.6780242919921875, 9.696197509765625, 2.3613452911376953, 1.919281005859375, 0.4704246520996094, 14.911483764648438, 13.519737243652344, -10.297050476074219, 2.9232749938964844, 2.9738235473632812, -9.667182922363281, 8.346670150756836, 4.498527526855469, -2.8663368225097656, 5.711097717285156, -2.7816543579101562, -3.99981689453125, -11.229400634765625, 10.561737060546875, 1.5566368103027344, 2.155313491821289, -0.7028961181640625, -4.094573974609375, 0.9400901794433594, 0.07187652587890625, 0.9046230316162109, 8.67218017578125, 5.40374755859375, -5.562248229980469, 6.092315673828125, 27.28326416015625, -3.4358081817626953, 14.364898681640625, 8.194229125976562, -0.9208393096923828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000153.npy"}
{"epoch": 0.4636363636363636, "step": 154, "batch_size": 128, "mean": 3.904852867126465, "std": 8.239350318908691, "min": -16.708274841308594, "p10": -6.288795471191405, "median": 3.8099937438964844, "p90": 13.77289810180664, "max": 35.3016357421875, "pos_frac": 0.6875, "sample": [7.5815887451171875, -1.5296287536621094, 2.954730987548828, 1.831838607788086, -13.995155334472656, 3.0622940063476562, 24.99889373779297, -3.9841384887695312, 11.145126342773438, -0.0222930908203125, 9.52418327331543, -1.3867225646972656, 5.4806365966796875, -1.0120048522949219, -6.149589538574219, -0.8754959106445312, 2.081745147705078, -2.2919673919677734, -7.268703460693359, 6.366363525390625, -6.753379821777344, 3.3670196533203125, 7.121908187866211, -12.78765869140625, 13.5780029296875, 4.335357666015625, -9.06298828125, 9.830432891845703, 9.001113891601562, 0.10790252685546875, 0.9358062744140625, 7.299676895141602, 11.31591796875, 7.897987365722656, 0.145843505859375, 8.217926025390625, 4.730663299560547, 12.067134857177734, -12.601608276367188, -2.0868682861328125, 4.588375091552734, -3.9145889282226562, -6.613609313964844, 9.058441162109375, 16.34991455078125, -0.9195194244384766, -0.7711563110351562, 6.192604064941406, 5.004062652587891, 0.8255767822265625, 3.8711624145507812, 17.536651611328125, 15.893043518066406, -3.1177291870117188, 2.4246158599853516, 11.663810729980469, -0.8030357360839844, 15.154678344726562, -4.62420654296875, 2.96514892578125, -4.753335952758789, 11.436336517333984, 11.352798461914062, 0.08440780639648438, 11.03993034362793, 5.819875717163086, 6.189910888671875, -5.526071548461914, 9.11126708984375, 10.506202697753906, -1.8616085052490234, 1.4234619140625, -0.328857421875, 3.1668853759765625, 12.944927215576172, 4.128868103027344, 12.540206909179688, -3.084482192993164, 16.225563049316406, 8.79146957397461, 4.563083648681641, -2.0046825408935547, 8.597694396972656, -1.7079658508300781, 14.67022705078125, 0.04178619384765625, 6.9656524658203125, -16.708274841308594, 9.683389663696289, 0.6015548706054688, 12.029678344726562, 1.412933349609375, -5.6517333984375, 9.735050201416016, 35.3016357421875, 1.0075149536132812, -1.7215576171875, -7.000938415527344, 16.88128662109375, 7.3353118896484375, 26.45556640625, 9.080717086791992, -5.79443359375, -6.927009582519531, 10.466873168945312, 14.810951232910156, -1.9921703338623047, 5.689197540283203, 1.5623855590820312, -5.30157470703125, 14.227653503417969, 4.275596618652344, 3.7724533081054688, 6.020849227905273, 9.903602600097656, 8.442928314208984, 3.8475341796875, 5.1138763427734375, 4.2607574462890625, -10.7052001953125, 16.044837951660156, -9.111539840698242, 0.3528003692626953, 3.4995651245117188, 2.3298721313476562, -9.778656005859375, 8.799896240234375, 3.2983036041259766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000154.npy"}
{"epoch": 0.4666666666666667, "step": 155, "batch_size": 128, "mean": 2.972506284713745, "std": 6.95102596282959, "min": -11.180461883544922, "p10": -5.985201263427734, "median": 3.0161819458007812, "p90": 12.390055084228516, "max": 23.386260986328125, "pos_frac": 0.6875, "sample": [2.181324005126953, 19.444732666015625, 2.729604721069336, 15.009931564331055, 4.11982536315918, -5.821937561035156, 6.310737609863281, -10.898357391357422, 3.91082763671875, 12.124300003051758, -8.775909423828125, 2.8150367736816406, 4.749126434326172, 12.002399444580078, 14.37759017944336, -0.6248435974121094, 6.4433135986328125, 6.1001434326171875, 0.17200469970703125, 0.7742691040039062, 4.796546936035156, 5.914783477783203, -1.4325218200683594, 4.767555236816406, -0.9058074951171875, 15.54085922241211, -4.663217544555664, 0.1165313720703125, 4.389171600341797, 10.861330032348633, 18.780426025390625, 3.3195762634277344, 8.264122009277344, 2.785837173461914, 3.0130996704101562, -2.176006317138672, -5.265388488769531, 2.7731494903564453, 4.71417236328125, 0.0750579833984375, 8.367721557617188, 5.107460021972656, -0.8993186950683594, 4.196758270263672, 6.8291473388671875, -1.2503623962402344, -0.4981575012207031, 1.997772216796875, 6.176300048828125, 12.477859497070312, 8.203802108764648, 8.388992309570312, 12.352424621582031, -9.322723388671875, 1.109161376953125, 3.824909210205078, 5.767627716064453, 4.869871139526367, -2.2753143310546875, -4.311004638671875, 3.110565185546875, 4.278251647949219, 6.343303680419922, 23.386260986328125, 12.140121459960938, -1.7246818542480469, 4.155338287353516, 3.8081722259521484, 3.0192642211914062, -6.8308868408203125, -11.180461883544922, 6.3120269775390625, 0.7061710357666016, 0.792388916015625, 20.944801330566406, -5.125881195068359, 2.6384048461914062, 3.832948684692383, -6.430379867553711, 14.769302368164062, -2.4837265014648438, 9.944320678710938, -5.100738525390625, 7.319091796875, 2.052154541015625, 4.6385955810546875, 6.9425811767578125, 3.4067306518554688, 6.194877624511719, -6.36614990234375, 5.682708740234375, 15.267044067382812, 2.364046096801758, -7.901588439941406, -0.9036464691162109, -2.630329132080078, 13.495872497558594, -1.8441009521484375, -4.093242645263672, -2.3772125244140625, 3.1493301391601562, 9.472625732421875, 4.8018798828125, -4.666790008544922, -9.392875671386719, -3.561349868774414, 4.561607360839844, -0.8716716766357422, -1.0649871826171875, 9.680170059204102, -8.166793823242188, 1.9993362426757812, 1.725555419921875, 5.0306396484375, -9.373931884765625, -4.788234710693359, 0.577362060546875, 0.41509246826171875, -7.645545959472656, 3.6072521209716797, 1.4498138427734375, 14.006956100463867, 8.449699401855469, 0.5991744995117188, -11.061477661132812, 15.8607177734375, -2.5305919647216797, 1.7392196655273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000155.npy"}
{"epoch": 0.4696969696969697, "step": 156, "batch_size": 128, "mean": 4.684710502624512, "std": 7.621835231781006, "min": -18.588851928710938, "p10": -4.029256439208984, "median": 3.835357666015625, "p90": 13.755233001708984, "max": 35.167633056640625, "pos_frac": 0.765625, "sample": [9.27714729309082, 0.2282257080078125, 4.1155242919921875, -6.599876403808594, -4.095794677734375, -2.003660202026367, 2.197742462158203, 12.185432434082031, 5.94671630859375, 3.3734130859375, 11.705429077148438, 3.4517974853515625, 2.3964271545410156, 12.761978149414062, 2.2668800354003906, 1.6910133361816406, 24.469696044921875, 2.4952239990234375, 5.1316070556640625, 8.008499145507812, 3.5361175537109375, 2.7277145385742188, 3.16229248046875, 14.070877075195312, 5.230712890625, 9.198787689208984, 13.619956970214844, 9.350250244140625, -6.924560546875, -10.968040466308594, 6.16339111328125, 7.242885589599609, 4.363027572631836, 11.425537109375, 6.703376770019531, -18.588851928710938, 4.697746276855469, 5.827121734619141, 10.792984008789062, -2.1934452056884766, 3.7021942138671875, 4.743846893310547, -7.304058074951172, -4.335607528686523, -3.8982620239257812, 6.1990966796875, -9.651046752929688, 23.12017822265625, 35.167633056640625, 8.303970336914062, -1.0942649841308594, 16.13092041015625, -4.286258697509766, -2.6153564453125, 2.38946533203125, 3.1454925537109375, 2.6751480102539062, -2.8294219970703125, -1.529876708984375, 6.9962921142578125, 3.299884796142578, 12.848587036132812, 16.9747314453125, 4.14141845703125, 1.5639019012451172, 3.9685211181640625, -4.000740051269531, 2.5055694580078125, 9.672637939453125, 8.302131652832031, -0.07018470764160156, -0.6113643646240234, 0.4880943298339844, 1.396728515625, 0.2882213592529297, 15.231781005859375, 17.850296020507812, 3.5316619873046875, -5.814178466796875, 4.9861907958984375, 8.63375473022461, 16.88623809814453, -6.298583984375, 4.088001251220703, 7.575275421142578, 3.4655685424804688, 10.859771728515625, 25.709457397460938, 11.187019348144531, -0.08005523681640625, -0.18943023681640625, 5.575994491577148, 11.893310546875, 0.981292724609375, 1.4831390380859375, 6.7454071044921875, -0.3299407958984375, 2.018817901611328, 13.085254669189453, 4.829864501953125, 7.337059020996094, 5.714031219482422, 0.8233489990234375, 5.0944061279296875, -2.078887939453125, 12.902503967285156, -3.4063339233398438, 3.048431396484375, 1.60284423828125, 8.717899322509766, 18.402381896972656, -5.895515441894531, 20.372215270996094, 2.5252227783203125, -0.29582977294921875, 7.287017822265625, 9.901863098144531, 0.45819854736328125, -2.0460357666015625, 5.327533721923828, 4.260425567626953, 4.28131103515625, -7.009733200073242, 14.265579223632812, 4.104576110839844, 0.09333229064941406, 2.190338134765625, 3.5193099975585938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000156.npy"}
{"epoch": 0.4727272727272727, "step": 157, "batch_size": 128, "mean": 4.085638999938965, "std": 7.786670207977295, "min": -14.916183471679688, "p10": -5.2628738403320305, "median": 3.759922981262207, "p90": 14.449360656738278, "max": 24.475440979003906, "pos_frac": 0.703125, "sample": [2.3945999145507812, 6.418670654296875, 13.108415603637695, 0.35833168029785156, 6.390876770019531, 9.964439392089844, 5.663963317871094, 2.795217514038086, 2.8077774047851562, -3.727933883666992, -5.59857177734375, -1.490631103515625, 3.4555206298828125, 9.311141967773438, -1.1620769500732422, 19.288169860839844, -7.259346008300781, -3.8606033325195312, 16.819183349609375, 14.232948303222656, 2.239774703979492, -0.7895927429199219, -4.8397674560546875, 16.015518188476562, 7.7995758056640625, 2.516162872314453, 9.458389282226562, 0.9421234130859375, 4.528465270996094, -2.0815811157226562, -2.6755142211914062, -14.916183471679688, 8.765361785888672, -3.1993179321289062, 0.0044708251953125, 10.920576095581055, 7.212181091308594, 3.6090774536132812, 4.291290283203125, -7.7839813232421875, -9.812061309814453, -10.64730453491211, 0.23334121704101562, 4.496612548828125, 10.21661376953125, 7.150657653808594, 22.587852478027344, 12.454004287719727, 5.192588806152344, 1.9767684936523438, 2.7201995849609375, -5.937873840332031, 24.475440979003906, 2.57794189453125, 4.215476989746094, -1.065673828125, 6.628265380859375, -8.061090469360352, 8.282585144042969, -7.610069274902344, -4.053825378417969, 1.801483154296875, 10.854347229003906, 9.0465087890625, 11.04135513305664, 1.202871322631836, 6.0761871337890625, 9.606853485107422, 2.319793701171875, 10.062095642089844, 1.090789794921875, 6.812782287597656, 13.046113967895508, 6.270532608032227, 14.954322814941406, 1.4180831909179688, 5.429962158203125, -5.1190032958984375, 8.865306854248047, 15.562896728515625, -0.8010711669921875, -1.6348628997802734, -0.17431259155273438, 5.648429870605469, -3.679412841796875, 4.498260498046875, 0.4420509338378906, 9.364660263061523, -8.071952819824219, 10.210052490234375, 10.88372802734375, 12.501949310302734, 3.63311767578125, 6.136383056640625, 3.8302154541015625, -9.978744506835938, 6.9358673095703125, 9.346931457519531, 6.925750732421875, -4.29707145690918, 24.424484252929688, -1.59576416015625, -3.707538604736328, 21.005767822265625, 21.170440673828125, -7.657936096191406, -2.606292724609375, 11.100288391113281, 4.8642120361328125, -4.0662994384765625, 10.834640502929688, 5.2063446044921875, 0.8116531372070312, -12.675369262695312, 14.982593536376953, 4.268714904785156, 2.7779579162597656, -1.7544403076171875, 7.175594329833984, 17.72780990600586, 2.6789894104003906, 6.664947509765625, 0.7347259521484375, -5.039276123046875, 3.6896305084228516, -1.1028823852539062, -2.041126251220703, 15.079158782958984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000157.npy"}
{"epoch": 0.47575757575757577, "step": 158, "batch_size": 128, "mean": 4.9752631187438965, "std": 7.367851257324219, "min": -14.112018585205078, "p10": -3.95607967376709, "median": 4.901340484619141, "p90": 15.360880279541012, "max": 21.303295135498047, "pos_frac": 0.7109375, "sample": [-6.509269714355469, 5.249053955078125, 9.687850952148438, 4.4561920166015625, -0.6322708129882812, 3.7546348571777344, -9.518157958984375, -2.270967483520508, 9.21854019165039, -4.542518615722656, 0.3152618408203125, -1.3350181579589844, 2.093547821044922, 10.68524169921875, 1.285593032836914, 8.544692993164062, 10.917610168457031, 5.327117919921875, 9.673187255859375, 16.198211669921875, 6.0357208251953125, 1.256072998046875, -7.2435455322265625, 5.2044677734375, -14.112018585205078, 4.253326416015625, 11.930198669433594, -1.8006515502929688, 4.914337158203125, 0.13580703735351562, 8.736568450927734, 5.925254821777344, -3.961629867553711, 14.794586181640625, -6.3830108642578125, 8.948755264282227, 1.2689151763916016, 12.648834228515625, -2.2788658142089844, 4.119537353515625, 4.65533447265625, 5.15509033203125, 12.56884765625, -0.6265106201171875, 3.4676380157470703, 7.296998977661133, 3.27508544921875, -2.511444091796875, 6.6175384521484375, -0.5781097412109375, 6.0790252685546875, 12.877857208251953, 2.2038421630859375, 5.9363861083984375, 3.4345550537109375, 21.303295135498047, 9.807706832885742, 14.984603881835938, 12.31202507019043, -8.078052520751953, 5.12628173828125, 1.2889537811279297, 19.70733642578125, -3.1340866088867188, -3.9537010192871094, 3.4997711181640625, 9.730186462402344, -3.6762619018554688, -4.161624908447266, -0.944000244140625, 9.57342529296875, 14.031349182128906, -7.076885223388672, -5.009666442871094, -6.174764633178711, 16.18311882019043, 6.364248275756836, 19.80158233642578, -1.6442909240722656, 3.8419570922851562, 0.6979751586914062, 16.259918212890625, 6.998382568359375, 1.9350852966308594, 2.2880477905273438, 7.562782287597656, 4.888343811035156, -1.3294219970703125, -1.2760848999023438, 17.420608520507812, -5.175537109375, -2.8094234466552734, -1.0202102661132812, -2.5152435302734375, 9.06591796875, 1.94122314453125, 4.305599212646484, 6.274726867675781, 13.089298248291016, 16.7554931640625, 7.218584060668945, 16.811416625976562, 7.4355621337890625, -2.406707763671875, -2.435321807861328, 7.1273956298828125, 8.223434448242188, 0.41449737548828125, -2.936504364013672, 10.488819122314453, 15.069145202636719, 21.27349853515625, 16.041595458984375, 3.6589279174804688, 10.777128219604492, 0.0010433197021484375, 13.639404296875, -0.5464763641357422, -1.5533828735351562, 12.37481689453125, -2.6696243286132812, 19.774288177490234, 15.061309814453125, 8.261367797851562, 8.955673217773438, 5.4861602783203125, 18.540843963623047, 6.843452453613281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000158.npy"}
{"epoch": 0.47878787878787876, "step": 159, "batch_size": 128, "mean": 5.235345840454102, "std": 9.152689933776855, "min": -21.27886962890625, "p10": -6.269061279296874, "median": 5.0621337890625, "p90": 17.73012008666992, "max": 32.55337142944336, "pos_frac": 0.703125, "sample": [11.829750061035156, 8.925958633422852, 1.0367717742919922, 8.211761474609375, 6.43695068359375, 25.838436126708984, -0.8272991180419922, -0.7108001708984375, 10.237800598144531, 10.164260864257812, 17.389434814453125, 21.37350082397461, -7.014795303344727, -1.2208709716796875, 8.766311645507812, -8.279945373535156, 6.471797943115234, 14.596639633178711, 17.718948364257812, -5.084663391113281, 6.01129150390625, -1.6885833740234375, 6.4684600830078125, 16.077049255371094, 6.817390441894531, -2.691638946533203, 7.018547058105469, 10.228435516357422, 3.8889007568359375, 14.563568115234375, 0.6976203918457031, 10.833236694335938, -1.8561019897460938, 7.366186141967773, -2.0238285064697266, 1.3849029541015625, -3.0251502990722656, 0.7158927917480469, 5.6686248779296875, 7.338521957397461, 12.606719970703125, 6.3118743896484375, 10.486343383789062, 10.842119216918945, 11.643577575683594, -21.27886962890625, 32.55337142944336, -3.5238189697265625, -1.3687362670898438, 8.319564819335938, 20.048500061035156, -3.051025390625, 1.3199920654296875, 6.988397598266602, 6.440895080566406, 12.297927856445312, -0.803314208984375, 9.063140869140625, 18.40060806274414, 13.408857345581055, 1.5187759399414062, 6.384391784667969, -1.3670196533203125, 4.4907379150390625, 4.66717529296875, 17.126754760742188, -1.295166015625, -7.901123046875, 21.47821044921875, 10.90487289428711, 0.449798583984375, -13.019882202148438, 2.357799530029297, -1.6991767883300781, 9.307546615600586, 0.6851387023925781, -4.601688385009766, 6.729579925537109, 16.50848388671875, 2.173553466796875, -7.6193084716796875, 0.7694244384765625, -0.14409637451171875, 23.927459716796875, 27.567794799804688, 2.0067138671875, 14.467498779296875, 2.4078521728515625, -0.029632568359375, 13.423934936523438, 0.6913738250732422, 23.2886962890625, 4.538928985595703, 3.3835296630859375, -9.996257781982422, -1.8067855834960938, 5.066295623779297, 20.632099151611328, 4.426765441894531, -4.8518829345703125, 6.3934783935546875, 6.4711151123046875, 13.164541244506836, 1.722015380859375, -0.6887626647949219, 4.472126007080078, 18.5103759765625, -9.16912841796875, 5.3276214599609375, 5.057971954345703, -6.941162109375, 7.70457649230957, -5.98101806640625, 4.307914733886719, 12.141166687011719, -8.834114074707031, -9.7847900390625, 8.807662963867188, 6.7407379150390625, 19.210289001464844, 17.756187438964844, -8.062614440917969, 1.5422897338867188, -5.230400085449219, 12.052637100219727, -10.236627578735352, -4.175945281982422, 0.4375457763671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000159.npy"}
{"epoch": 0.4818181818181818, "step": 160, "batch_size": 128, "mean": 4.344736099243164, "std": 9.296332359313965, "min": -18.332839965820312, "p10": -5.800676345825195, "median": 3.294902801513672, "p90": 14.610292053222656, "max": 30.300048828125, "pos_frac": 0.6640625, "sample": [9.053232192993164, 2.3934326171875, -4.212867736816406, 16.968154907226562, 2.352142333984375, 2.0750732421875, 6.4016571044921875, 2.4934463500976562, -1.8138275146484375, 29.4744873046875, 4.1752471923828125, 15.689922332763672, 6.259284973144531, 0.1221923828125, 3.4386138916015625, 9.092254638671875, 6.365997314453125, 27.6949462890625, -7.002605438232422, 5.5837860107421875, -5.2989349365234375, 6.141712188720703, 10.493812561035156, -6.2720184326171875, -13.39665412902832, 10.417686462402344, 0.21989822387695312, 0.4460296630859375, -0.11737060546875, 1.3700637817382812, 7.334526062011719, 14.002098083496094, -2.2911376953125, 4.17088508605957, 11.028999328613281, 10.997825622558594, -1.4150314331054688, 0.711456298828125, 9.251510620117188, 8.639053344726562, 27.361900329589844, -2.2565383911132812, 12.462604522705078, 5.2620391845703125, 12.098522186279297, 14.698165893554688, -3.7726821899414062, -6.554859161376953, 8.961528778076172, 5.932062149047852, 1.343963623046875, 0.5608711242675781, 7.3604583740234375, 0.5217399597167969, -18.332839965820312, -5.039703369140625, 9.837739944458008, 7.68440055847168, 13.196281433105469, 13.899957656860352, -0.3962249755859375, -6.174247741699219, 21.764537811279297, 5.684928894042969, 8.643985748291016, -0.42235565185546875, 1.6537818908691406, 2.4560070037841797, 7.445240020751953, -2.084596633911133, 12.068962097167969, -2.6011829376220703, 6.00408935546875, -2.5462417602539062, -9.667930603027344, 7.345752716064453, -4.533693313598633, 4.749362945556641, -17.107467651367188, 2.374544143676758, 6.778652191162109, 14.5726318359375, -0.11067771911621094, 28.37529754638672, 3.006877899169922, -3.1501846313476562, -11.478759765625, -2.866056442260742, -5.901519775390625, -2.959989547729492, 3.058185577392578, -5.757457733154297, 20.511367797851562, -2.503936767578125, -3.0998077392578125, 9.244129180908203, 3.8235626220703125, -0.6631660461425781, -5.345878601074219, 6.988164901733398, -1.1697731018066406, -4.0560760498046875, -9.745620727539062, 13.675567626953125, 3.9827880859375, 30.300048828125, 27.763534545898438, 10.527755737304688, 6.815458297729492, 1.2296981811523438, -4.632530212402344, -7.387508392333984, 10.223243713378906, 22.49686050415039, 3.1511917114257812, 0.9376296997070312, -2.5406265258789062, -2.141094207763672, 23.967864990234375, 8.742591857910156, -0.8887100219726562, 7.576921463012695, 11.186973571777344, 1.698394775390625, 4.082756042480469, -11.18166732788086, 7.895986557006836, 8.17133903503418], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000160.npy"}
{"epoch": 0.48484848484848486, "step": 161, "batch_size": 128, "mean": 5.604058265686035, "std": 7.70290994644165, "min": -24.23162841796875, "p10": -3.81539306640625, "median": 5.807098388671875, "p90": 15.290225982666016, "max": 20.753509521484375, "pos_frac": 0.75, "sample": [1.3664779663085938, 10.689403533935547, -2.298501968383789, 8.2891845703125, 8.239936828613281, -3.8555221557617188, 17.83465576171875, 6.624534606933594, -1.710693359375, 8.679977416992188, 9.151962280273438, 18.862258911132812, 3.6337432861328125, -0.91485595703125, -5.817333221435547, -2.0206451416015625, 10.601982116699219, 5.981021881103516, 1.916830062866211, 12.476966857910156, 13.873252868652344, -2.2207393646240234, 3.3300342559814453, 15.327262878417969, 16.407127380371094, 11.111541748046875, 5.178836822509766, 4.489824295043945, 10.904098510742188, 17.629497528076172, 20.753509521484375, 8.655464172363281, 15.687088012695312, -0.4972057342529297, 14.37398910522461, -0.8125, 2.7789688110351562, 12.86102294921875, -5.045860290527344, -0.6827201843261719, 7.035316467285156, -6.784400939941406, -3.395589828491211, -1.74310302734375, 7.591991424560547, -0.6747894287109375, 7.668914794921875, 9.06976318359375, -0.8581295013427734, -0.9277153015136719, 3.4666213989257812, 10.8349609375, -2.3734512329101562, -10.197196960449219, 10.079795837402344, 9.742507934570312, 3.8990821838378906, 2.904970169067383, 15.27435302734375, -14.106086730957031, 4.938873291015625, -8.293479919433594, 3.453857421875, -4.095344543457031, -3.541452407836914, 11.565071105957031, 9.045585632324219, 6.182456970214844, 9.232864379882812, 5.021787643432617, 14.001670837402344, 9.821756362915039, 2.5637359619140625, 5.6055755615234375, 0.114532470703125, 3.1446075439453125, 7.283855438232422, 1.69415283203125, -5.867338180541992, 8.72886848449707, 6.080780029296875, 7.95562744140625, 13.764419555664062, 4.455286026000977, -3.7981948852539062, 19.91144561767578, 2.3502578735351562, 1.8960380554199219, -6.043918609619141, -24.23162841796875, 9.82518196105957, 6.230987548828125, 4.335468292236328, 4.886777877807617, 19.650466918945312, 3.3266448974609375, 12.077699661254883, -2.280658721923828, -3.047454833984375, 15.113037109375, 16.061767578125, 20.23378562927246, 10.22222900390625, 5.788051605224609, 12.557201385498047, 13.629241943359375, 5.826145172119141, 4.440202713012695, 0.2739524841308594, -0.7898063659667969, 1.6702194213867188, 9.250473022460938, 18.24212646484375, 12.80207633972168, 1.8677635192871094, 8.569417953491211, 5.362548828125, 13.795036315917969, 11.6612548828125, 8.971141815185547, 5.852577209472656, 1.87957763671875, 14.92742919921875, -6.940652847290039, 18.143192291259766, -4.36346435546875, 11.225296020507812, 4.759052276611328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000161.npy"}
{"epoch": 0.48787878787878786, "step": 162, "batch_size": 128, "mean": 5.335984706878662, "std": 8.665324211120605, "min": -14.394088745117188, "p10": -4.88513126373291, "median": 4.715263366699219, "p90": 18.510496520996092, "max": 29.131927490234375, "pos_frac": 0.734375, "sample": [4.711570739746094, 2.2890625, 2.5968399047851562, -3.6598892211914062, 4.857231140136719, 8.127426147460938, 6.903106689453125, 5.26051139831543, -4.505252838134766, 9.696807861328125, 8.643661499023438, 2.267486572265625, 5.336273193359375, 20.832748413085938, 1.2013969421386719, 5.918434143066406, 0.3169708251953125, -1.2229652404785156, 18.377052307128906, 4.7022247314453125, -9.7822265625, 13.003246307373047, 6.650199890136719, 0.6645183563232422, 25.871322631835938, -0.7961959838867188, -14.394088745117188, -4.677314758300781, 7.742919921875, 3.1289825439453125, 1.061553955078125, 8.059974670410156, 1.3002548217773438, 19.533470153808594, 10.15216064453125, -6.630132675170898, 9.66802978515625, 0.8086090087890625, 23.39427947998047, -12.212825775146484, 23.245498657226562, 0.3487968444824219, -13.453544616699219, 5.478694915771484, 1.0332221984863281, -6.2790374755859375, 29.131927490234375, 5.7154998779296875, 6.382591247558594, -1.356964111328125, -8.492431640625, 5.724853515625, 19.98590087890625, 28.814926147460938, 17.057418823242188, -4.9735565185546875, 23.6387939453125, 11.094985961914062, 2.596405029296875, -4.847234725952148, 7.834144592285156, -6.240394592285156, -2.2593460083007812, 4.591407775878906, 12.954681396484375, 18.268653869628906, -0.8872604370117188, -1.031402587890625, -6.250450134277344, 16.922897338867188, 3.6432743072509766, 11.36385726928711, 11.054908752441406, -8.623836517333984, 8.130813598632812, -3.22760009765625, 9.041839599609375, 2.63055419921875, 2.1443328857421875, 10.355430603027344, 12.230567932128906, 8.331592559814453, 3.4579315185546875, 8.295291900634766, -0.9882278442382812, -0.6808147430419922, 10.194236755371094, 4.981422424316406, 5.160587310791016, 11.11331558227539, 18.419296264648438, 1.4406204223632812, 3.3918380737304688, 5.867927551269531, 18.989334106445312, 1.68365478515625, -8.679649353027344, -0.04550933837890625, 12.070316314697266, 12.466930389404297, 4.469444274902344, 8.226051330566406, 6.1129150390625, 1.656097412109375, 7.166481018066406, 4.718955993652344, -2.20025634765625, -0.20828628540039062, 18.723297119140625, 7.842174530029297, 1.4070911407470703, -7.00152587890625, 5.145042419433594, -0.5658721923828125, 19.54742431640625, -0.7303962707519531, 20.820449829101562, -1.8761234283447266, 5.202571868896484, 2.5253372192382812, 0.7512626647949219, -0.08740234375, 1.457855224609375, 1.5639190673828125, 14.908348083496094, 10.957061767578125, 8.356399536132812, -0.04361724853515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000162.npy"}
{"epoch": 0.4909090909090909, "step": 163, "batch_size": 128, "mean": 3.6169590950012207, "std": 7.6116042137146, "min": -13.357423782348633, "p10": -6.193645858764648, "median": 3.911937713623047, "p90": 12.327262496948242, "max": 24.76080322265625, "pos_frac": 0.6875, "sample": [5.3271331787109375, 14.064132690429688, 4.750751495361328, 8.701656341552734, 0.9396324157714844, 12.519264221191406, 11.368106842041016, -0.1881885528564453, 11.715471267700195, 5.349922180175781, 1.380706787109375, -5.490407943725586, 22.11007308959961, -7.5448455810546875, 8.142988204956055, 1.307016372680664, 6.448448181152344, 6.625335693359375, 5.686370849609375, 17.498611450195312, -3.183696746826172, 4.45135498046875, 3.5834884643554688, 8.194091796875, 14.300046920776367, 9.628772735595703, 17.61367416381836, -9.169403076171875, 3.4486312866210938, -8.158592224121094, 1.9496917724609375, -0.02841949462890625, 2.4611129760742188, 7.3804168701171875, 8.372825622558594, 10.725408554077148, 1.6589126586914062, -2.972597122192383, 5.1501312255859375, -5.103076934814453, 2.366250991821289, 24.589981079101562, 5.541606903076172, 10.808511734008789, -7.383481979370117, 5.5224456787109375, -0.2627105712890625, 7.18212890625, 5.0919189453125, 2.163829803466797, -5.3646697998046875, 24.76080322265625, -13.247634887695312, 1.3794021606445312, -1.2622451782226562, 5.995368957519531, 8.81662368774414, 2.2703094482421875, 5.1798858642578125, 4.343799591064453, 5.777809143066406, 2.2280654907226562, 7.9969940185546875, -1.3535690307617188, -11.341739654541016, -2.7131805419921875, 3.919391632080078, 3.9044837951660156, 8.493377685546875, -11.14080810546875, 21.24138641357422, 4.795654296875, 1.9365062713623047, 9.856002807617188, -0.22632217407226562, 16.722373962402344, 13.538581848144531, -2.2932472229003906, -1.5721206665039062, 3.6920700073242188, 8.09122085571289, 0.9349708557128906, 6.338081359863281, 5.143917083740234, 3.2442283630371094, -0.216156005859375, -9.576446533203125, 3.7339935302734375, 5.152639389038086, 14.15086555480957, 5.4203338623046875, -5.960685729980469, -4.400909423828125, 10.573677062988281, -11.410316467285156, 5.2621917724609375, 0.0002288818359375, 11.810798645019531, 0.44004058837890625, 8.421321868896484, 4.3079071044921875, 1.6906890869140625, -2.773468017578125, 8.040428161621094, -9.29391860961914, -1.5600433349609375, -4.463890075683594, -1.7047271728515625, -4.92462158203125, 0.9567718505859375, 12.244976043701172, -6.492790222167969, 7.862815856933594, 17.120174407958984, -2.2172622680664062, -1.0065422058105469, 8.034839630126953, 6.081615447998047, 2.4203414916992188, -8.223337173461914, -13.357423782348633, -0.9901275634765625, 9.422828674316406, 11.3876953125, 9.765846252441406, -6.065441131591797, -2.279022216796875, 8.863687515258789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000163.npy"}
{"epoch": 0.49393939393939396, "step": 164, "batch_size": 128, "mean": 3.6595427989959717, "std": 8.323163032531738, "min": -23.408798217773438, "p10": -5.330024719238281, "median": 2.711637496948242, "p90": 14.85634078979492, "max": 23.344573974609375, "pos_frac": 0.640625, "sample": [12.527587890625, -15.50439453125, -0.2960357666015625, 20.2547607421875, -1.1576385498046875, -0.51263427734375, -4.673828125, 10.07327651977539, 11.797653198242188, 0.3191852569580078, -0.290618896484375, -5.225059509277344, 0.9524974822998047, 1.1622085571289062, 5.37354850769043, 9.25775146484375, -8.9013671875, 10.532135009765625, 0.5739822387695312, -1.0015182495117188, 4.323883056640625, 12.021324157714844, -12.385047912597656, -6.584680557250977, 20.865318298339844, 7.019205093383789, 12.505329132080078, 3.329784393310547, 2.8712539672851562, 6.153316497802734, -4.645626068115234, -1.2115039825439453, -0.05644416809082031, 9.158021926879883, 19.82637596130371, 1.7015190124511719, -12.283086776733398, 10.5684814453125, 3.0979156494140625, 8.587974548339844, 15.293365478515625, 1.66400146484375, -2.6103515625, 5.446437835693359, 1.0032539367675781, -5.841766357421875, 4.000846862792969, -0.6064891815185547, 18.215957641601562, 5.811147689819336, -23.408798217773438, -0.511077880859375, 6.539710998535156, 23.344573974609375, -3.1565170288085938, 9.657157897949219, 7.64459228515625, 1.9576339721679688, -0.38481903076171875, -1.098175048828125, -5.574943542480469, -6.138248443603516, 7.2156524658203125, -6.732666015625, 19.653030395507812, -1.3248825073242188, 3.3921432495117188, 10.134407043457031, 7.655769348144531, 1.8929386138916016, -4.0308837890625, 16.24612045288086, 11.706092834472656, 3.5612964630126953, 12.245132446289062, -0.7816581726074219, 9.108917236328125, -3.08404541015625, -1.248281478881836, 10.49371337890625, 9.661548614501953, 5.2540130615234375, 14.669044494628906, -2.8762741088867188, -4.366462707519531, 2.535848617553711, 22.1248779296875, 7.3949737548828125, 1.0374221801757812, 2.5975914001464844, 8.947074890136719, -4.99639892578125, 8.848888397216797, 11.565231323242188, 2.82568359375, 0.7129764556884766, 10.716690063476562, 5.4182586669921875, 0.8221969604492188, 9.575790405273438, 15.412002563476562, 0.5653762817382812, 2.57855224609375, -3.42376708984375, -3.0804710388183594, -11.179779052734375, 16.692535400390625, 5.708900451660156, -13.039993286132812, 0.5574569702148438, -13.852928161621094, -1.1108531951904297, 9.569465637207031, 8.804229736328125, -4.504655838012695, -3.881265640258789, 1.2722320556640625, 9.472160339355469, -0.7219028472900391, -2.1343536376953125, 5.983909606933594, 5.479152679443359, 16.159912109375, -0.58221435546875, 10.436408996582031, -2.427867889404297, 18.35232925415039, 11.37285041809082], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000164.npy"}
{"epoch": 0.49696969696969695, "step": 165, "batch_size": 128, "mean": 5.388072967529297, "std": 8.775738716125488, "min": -19.04470443725586, "p10": -5.365061950683594, "median": 4.630185127258301, "p90": 16.68693389892578, "max": 23.740867614746094, "pos_frac": 0.71875, "sample": [6.258100509643555, 3.2593994140625, 3.0010986328125, 7.6825103759765625, -14.98931884765625, 4.313816070556641, 21.747528076171875, 1.1160812377929688, -1.8985671997070312, 15.776397705078125, -5.333892822265625, 4.183738708496094, 7.010894775390625, 0.7223758697509766, 20.63519287109375, 6.5783843994140625, -7.548332214355469, -8.206085205078125, 6.652360916137695, 0.0660858154296875, 23.740867614746094, -1.3416919708251953, 13.755998611450195, 7.1016998291015625, 2.5100364685058594, 10.950891494750977, -4.504112243652344, 10.280105590820312, 9.0267333984375, -10.172920227050781, 21.402549743652344, 18.197677612304688, 17.376007080078125, 1.144195556640625, 9.708915710449219, 6.486030578613281, -4.347932815551758, 8.023357391357422, 0.5921955108642578, -9.355911254882812, -10.38916015625, 7.335968017578125, 23.088817596435547, 11.422554016113281, 0.364105224609375, -0.17676544189453125, 14.105583190917969, 12.893440246582031, 20.060073852539062, 12.024505615234375, 2.7578659057617188, -5.4377899169921875, -1.392913818359375, 6.498828887939453, -1.3510875701904297, 0.47354888916015625, 2.0083484649658203, 15.612979888916016, 3.274242401123047, -7.07708740234375, 9.416145324707031, 0.083648681640625, -0.0270538330078125, -6.13226318359375, -0.8754501342773438, -4.728584289550781, -0.4450969696044922, -3.5111923217773438, 4.409244537353516, -19.04470443725586, 12.332199096679688, 4.281730651855469, 19.458480834960938, -9.664871215820312, 18.36351776123047, 12.59588623046875, 12.070255279541016, -3.557527542114258, 12.313629150390625, -3.8061294555664062, -2.168926239013672, 7.763324737548828, -4.082635879516602, 16.154151916503906, 5.468879699707031, 19.55743408203125, 4.6232757568359375, 4.308305740356445, -3.5157699584960938, 11.387931823730469, 1.2576446533203125, 11.7457275390625, 15.576282501220703, 14.538900375366211, 6.5774993896484375, 1.9387893676757812, 2.3676681518554688, 13.924888610839844, 12.403450012207031, 8.452913284301758, -1.4284133911132812, -3.2048416137695312, 3.5806121826171875, 14.737442016601562, 9.495597839355469, 13.957565307617188, 2.8435821533203125, -2.2914657592773438, -5.831119537353516, 3.8797988891601562, 4.637094497680664, 9.737312316894531, 0.1584625244140625, 16.391616821289062, 5.436393737792969, 13.091167449951172, 13.425369262695312, 14.326784133911133, 3.347827911376953, 6.5026702880859375, 9.492347717285156, -0.7136287689208984, 21.865982055664062, -12.976478576660156, 8.986543655395508, 9.672401428222656, -0.06320953369140625, 21.105789184570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000165.npy"}
{"epoch": 0.5, "step": 166, "batch_size": 128, "mean": 6.064634323120117, "std": 8.65263843536377, "min": -14.827804565429688, "p10": -2.6972290039062496, "median": 5.177375793457031, "p90": 18.689961242675782, "max": 30.78961944580078, "pos_frac": 0.765625, "sample": [19.28031349182129, 5.432975769042969, 7.484550476074219, 11.281999588012695, 7.544242858886719, 22.637237548828125, 10.08319091796875, 16.225433349609375, 30.746044158935547, 11.489883422851562, 6.781593322753906, 21.316011428833008, 7.78831672668457, 2.005878448486328, 3.4586944580078125, 4.549160003662109, 9.977474212646484, 12.057632446289062, -1.1426849365234375, -1.8861312866210938, 4.546318054199219, -4.023044586181641, -0.278900146484375, 8.883769989013672, -9.388595581054688, 0.8344631195068359, -5.9753570556640625, 10.743881225585938, 7.533180236816406, -2.5626449584960938, 10.449333190917969, 1.1770668029785156, 4.334953308105469, 5.380779266357422, 5.129365921020508, 23.28924560546875, 14.559219360351562, 18.851364135742188, 0.9063720703125, 0.0150299072265625, -0.6056842803955078, 15.894401550292969, 10.942201614379883, 29.709197998046875, 4.26220703125, 7.23492431640625, 18.62078857421875, 5.225385665893555, -6.622657775878906, 11.024314880371094, 5.359870910644531, -2.850311279296875, 7.676788330078125, -14.827804565429688, 4.130584716796875, -5.942207336425781, -3.058645248413086, 12.09768295288086, 4.270801544189453, -8.327341079711914, 12.092613220214844, 6.624765396118164, 3.875640869140625, 16.069154739379883, -1.3911018371582031, 2.60882568359375, 2.5528488159179688, -0.4703521728515625, 0.6009674072265625, 6.112060546875, 9.405906677246094, -0.15018081665039062, 3.510417938232422, 2.1378326416015625, 1.0226783752441406, 5.7329559326171875, 10.900886535644531, -0.20439910888671875, -1.1925182342529297, 13.367504119873047, -0.8343887329101562, 4.482513427734375, -4.35284423828125, 2.5148963928222656, 3.1602325439453125, 20.89879608154297, -10.443843841552734, 1.2771282196044922, 7.210479736328125, 1.7588481903076172, 22.826263427734375, 12.59912109375, 4.586494445800781, -2.1991348266601562, -2.631622314453125, -12.00811767578125, -0.9545745849609375, 6.025943756103516, 1.10333251953125, 30.78961944580078, 19.559539794921875, -1.3639755249023438, 15.358238220214844, 9.098762512207031, 8.377120971679688, 6.5764007568359375, 0.3879814147949219, 1.5587692260742188, 1.825469970703125, 6.985496520996094, 6.7804412841796875, 17.613407135009766, 11.830184936523438, 8.87860107421875, 6.440347671508789, 1.3408393859863281, 0.46363067626953125, 0.959808349609375, 9.369735717773438, 10.592487335205078, 21.44342803955078, -0.8376083374023438, 7.3466796875, 14.163896560668945, 25.215782165527344, 3.709503173828125, -12.012306213378906, -0.13726043701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000166.npy"}
{"epoch": 0.503030303030303, "step": 167, "batch_size": 128, "mean": 5.453766345977783, "std": 8.829275131225586, "min": -23.057403564453125, "p10": -5.043664169311523, "median": 4.512969970703125, "p90": 16.481371307373042, "max": 27.403564453125, "pos_frac": 0.765625, "sample": [2.8401565551757812, -1.1991157531738281, 10.530975341796875, 18.867721557617188, 6.477325439453125, 0.5653839111328125, 3.3079795837402344, 8.842803955078125, 12.655784606933594, 18.531631469726562, 19.794570922851562, 14.16830062866211, -4.793983459472656, -0.8284072875976562, 4.0661468505859375, 3.6794967651367188, -4.100807189941406, 2.1747207641601562, 13.734163284301758, 8.27215576171875, 25.412704467773438, 0.32126808166503906, 2.2744827270507812, -7.070457458496094, -2.883495330810547, 9.799118041992188, 2.5966949462890625, -1.3828697204589844, 6.907922744750977, 18.925331115722656, 4.267200469970703, -1.0256576538085938, 5.214168548583984, -0.58636474609375, 14.593574523925781, 6.51915168762207, 7.32159423828125, 8.369731903076172, 15.041593551635742, 1.488250732421875, 10.866445541381836, 0.16516876220703125, -3.087106704711914, 7.788848876953125, 5.419162750244141, 10.501609802246094, 10.363235473632812, 15.204914093017578, -1.2517566680908203, 0.3316497802734375, -7.070331573486328, 2.2652626037597656, 4.6533355712890625, -4.881008148193359, 27.403564453125, 10.363136291503906, 11.341644287109375, 5.8678436279296875, 12.707664489746094, 9.373943328857422, 17.40716552734375, 18.02252197265625, 13.84918212890625, 10.588508605957031, -12.530387878417969, -23.057403564453125, 3.422382354736328, 3.9090728759765625, 7.08734130859375, -4.1109619140625, -9.545562744140625, 26.848220825195312, 2.505035400390625, -3.742155075073242, -11.033111572265625, 10.792427062988281, 9.607742309570312, 22.46562957763672, -3.2058868408203125, 6.089134216308594, 19.51990509033203, -13.150238037109375, 10.540058135986328, 4.5782012939453125, 12.869598388671875, 10.216514587402344, 3.249847412109375, 5.859153747558594, -7.050079345703125, 13.900123596191406, 6.086250305175781, 3.0205230712890625, -0.69000244140625, 3.688251495361328, -5.423194885253906, 11.528297424316406, 2.7522354125976562, -13.0650634765625, 2.9670677185058594, 24.4447021484375, -6.110279083251953, 6.3643646240234375, 1.144906997680664, 4.881418228149414, 14.989032745361328, -11.637947082519531, 4.4477386474609375, 2.646289825439453, 14.805549621582031, 2.900928497314453, -0.8578033447265625, 0.4729900360107422, 3.34735107421875, -8.5096435546875, -1.4316768646240234, 2.7022018432617188, 16.08460235595703, 1.1787567138671875, 2.78118896484375, 2.563222885131836, 3.01507568359375, 20.239608764648438, 6.6745147705078125, 3.8046531677246094, 12.38714599609375, 11.158000946044922, 10.718208312988281, 14.092460632324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000167.npy"}
{"epoch": 0.5060606060606061, "step": 168, "batch_size": 128, "mean": 4.680984973907471, "std": 9.537724494934082, "min": -18.80682373046875, "p10": -6.37731590270996, "median": 3.066922187805176, "p90": 17.08439903259277, "max": 33.76287841796875, "pos_frac": 0.6484375, "sample": [15.152328491210938, 15.0821533203125, -9.958639144897461, 33.76287841796875, 2.6951828002929688, -1.563690185546875, 11.585966110229492, 0.007724761962890625, 0.9398345947265625, 3.996339797973633, 2.7923831939697266, 15.827882766723633, -6.180986404418945, -0.38961029052734375, -0.6493873596191406, 20.591907501220703, 4.591777801513672, -2.0378036499023438, 6.3289337158203125, -7.580944061279297, 16.61847686767578, 2.5342788696289062, -1.7382659912109375, 19.91277313232422, -1.4119186401367188, -2.1882362365722656, -0.2669677734375, -3.834278106689453, 1.066986083984375, -10.77104377746582, -18.80682373046875, -6.471195220947266, 23.903106689453125, 10.632312774658203, 21.807235717773438, 14.506546020507812, 4.0416412353515625, -1.23944091796875, 26.496498107910156, 15.479354858398438, -3.5229644775390625, -1.20770263671875, 9.919868469238281, -7.6013946533203125, 0.2859077453613281, 12.755531311035156, 15.644233703613281, -0.6468582153320312, 1.8048934936523438, -2.6354293823242188, -8.5435791015625, 13.086029052734375, 4.545978546142578, 18.171550750732422, -4.44903564453125, -8.723709106445312, 12.643646240234375, 12.533332824707031, 2.6675281524658203, 12.362716674804688, -5.114387512207031, -0.0416717529296875, -1.8602218627929688, 4.061676025390625, -0.30178070068359375, 5.9943695068359375, 7.133338928222656, 2.0421142578125, 19.574600219726562, 11.251029968261719, 8.609298706054688, -9.229415893554688, -0.10528564453125, 7.316368103027344, 25.68585205078125, -2.236410140991211, 3.6187591552734375, 6.297325134277344, 13.555381774902344, 6.044029235839844, 2.7281455993652344, 6.447052001953125, -2.37420654296875, -0.4760780334472656, 2.7806625366210938, -6.783958435058594, 15.111063003540039, 3.83721923828125, 0.058315277099609375, 11.102737426757812, 1.674062728881836, -0.9909820556640625, 9.219707489013672, 10.740364074707031, 32.36663818359375, 3.858745574951172, 19.947467803955078, 3.555797576904297, -2.2261009216308594, 1.5934410095214844, 21.629154205322266, -3.8855972290039062, 9.505020141601562, 0.12600326538085938, -8.502098083496094, 12.905445098876953, 4.5716094970703125, -17.592636108398438, 2.1186752319335938, -11.168159484863281, 21.465576171875, -6.3370819091796875, -0.30426025390625, 0.2535858154296875, 3.9023971557617188, 5.151332855224609, 6.105613708496094, 7.490440368652344, 14.51507568359375, -0.1387786865234375, 3.341461181640625, 1.4242095947265625, 3.7978973388671875, -3.3304443359375, 7.9806671142578125, 12.364538192749023, 8.885208129882812, -5.929679870605469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000168.npy"}
{"epoch": 0.509090909090909, "step": 169, "batch_size": 128, "mean": 4.186809539794922, "std": 8.353130340576172, "min": -14.405723571777344, "p10": -5.657550048828125, "median": 3.8283920288085938, "p90": 14.516046524047852, "max": 33.090972900390625, "pos_frac": 0.625, "sample": [7.041465759277344, 3.7657623291015625, -9.465509414672852, -5.047313690185547, 8.591278076171875, 11.177749633789062, -7.813379287719727, 16.491071701049805, 3.211080551147461, 15.238433837890625, 4.76971435546875, -1.9830093383789062, 8.196876525878906, -1.491170883178711, -6.324161529541016, -12.035575866699219, 13.24041748046875, -5.231014251708984, 17.71942138671875, 10.77920150756836, 15.744415283203125, -1.8204269409179688, -0.47757720947265625, 9.458076477050781, 5.7521820068359375, 2.434223175048828, 11.9415283203125, 33.090972900390625, -2.0271949768066406, 0.6627349853515625, -0.6000938415527344, 18.194137573242188, -0.611083984375, -4.1029205322265625, 17.331512451171875, 0.9231033325195312, 7.213294982910156, -10.090255737304688, -1.5238037109375, -6.8850860595703125, 14.368967056274414, 5.831447601318359, -0.7591552734375, 4.87969970703125, -0.6271896362304688, 21.664825439453125, 6.014656066894531, -11.141937255859375, 8.900276184082031, -0.2953948974609375, 11.871021270751953, 8.081390380859375, 4.486991882324219, 2.579437255859375, 7.630002975463867, 10.467193603515625, -2.4945507049560547, -3.7803573608398438, 12.0184326171875, 5.060819625854492, -10.833206176757812, 3.602752685546875, 14.52254867553711, 7.4019927978515625, -5.6951904296875, 2.324636459350586, 10.465225219726562, 8.080570220947266, 8.398727416992188, 9.33450698852539, -10.399282455444336, 7.371086120605469, -3.485626220703125, -3.8890151977539062, -2.3241043090820312, 1.3315353393554688, 9.685203552246094, 14.513259887695312, 3.6943283081054688, -3.9717788696289062, -0.11148452758789062, -0.5037040710449219, 1.7694129943847656, 9.865821838378906, 5.35443115234375, 0.6212844848632812, 17.172935485839844, 2.1812286376953125, 8.960380554199219, -2.0893192291259766, -5.64141845703125, 20.259315490722656, 10.80499267578125, -0.25710296630859375, 6.113761901855469, 9.909120559692383, 2.181488037109375, 13.746807098388672, 23.83575439453125, 5.544197082519531, -4.6858367919921875, -0.6144561767578125, 3.891021728515625, -1.8333053588867188, 5.736808776855469, 6.9045867919921875, -14.405723571777344, 10.353622436523438, 10.186355590820312, 10.41999626159668, -2.9143829345703125, 7.6162567138671875, -2.7132949829101562, 13.14010238647461, -2.695598602294922, 8.786535263061523, 3.5559921264648438, 4.819372177124023, -8.787391662597656, -1.2430534362792969, -10.33880615234375, 13.725143432617188, 12.437660217285156, -3.4745559692382812, -1.1531715393066406, 0.1500091552734375, 20.234683990478516, -1.2287139892578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000169.npy"}
{"epoch": 0.5121212121212121, "step": 170, "batch_size": 128, "mean": 5.287426948547363, "std": 8.922146797180176, "min": -15.322212219238281, "p10": -5.696637344360351, "median": 5.409707069396973, "p90": 17.99162445068359, "max": 25.701589584350586, "pos_frac": 0.703125, "sample": [5.865968704223633, 12.337127685546875, 6.356254577636719, -2.8256187438964844, -2.646648406982422, 11.867656707763672, 0.5676116943359375, 5.43516731262207, -6.503854751586914, 11.981292724609375, 18.881671905517578, -0.3423423767089844, 6.402587890625, 3.113983154296875, 16.57686996459961, 6.1727447509765625, 20.40796661376953, 25.701589584350586, 8.889747619628906, 4.969573974609375, 12.723716735839844, -10.677421569824219, 11.675399780273438, 0.0410919189453125, -9.014236450195312, -10.13043212890625, 1.4994354248046875, 1.8336124420166016, 6.326047897338867, 11.717473983764648, -7.867218017578125, 24.17015838623047, 3.967266082763672, -2.5083465576171875, 8.024421691894531, 10.05419921875, 5.7509918212890625, 4.932136535644531, 3.4349899291992188, 8.203248977661133, 2.614826202392578, 11.163520812988281, 1.1517086029052734, 6.458038330078125, -11.836051940917969, 8.013423919677734, 7.24311637878418, 12.902076721191406, 15.007423400878906, 0.3739166259765625, -3.3006935119628906, -1.24945068359375, 5.384246826171875, -2.812835693359375, -3.859588623046875, 10.499114990234375, -0.770599365234375, 7.881015777587891, 2.9795989990234375, 7.361083984375, 10.711738586425781, 12.643150329589844, 1.6382770538330078, 18.154640197753906, 11.066375732421875, 18.735252380371094, 0.2443675994873047, 0.4958477020263672, 0.6522369384765625, 21.473506927490234, -1.1892127990722656, 0.061412811279296875, 0.5395412445068359, -10.453353881835938, 16.071456909179688, 6.66070556640625, -1.9961299896240234, -4.532121658325195, -3.547199249267578, 8.663227081298828, -3.9876708984375, 8.600738525390625, 20.991226196289062, 5.653478622436523, 14.549049377441406, -1.7852058410644531, -4.22369384765625, 22.56218147277832, 10.383460998535156, 19.055206298828125, 4.1041107177734375, 3.3350372314453125, 6.3138427734375, -8.146003723144531, -15.322212219238281, -8.454727172851562, -0.7100982666015625, 16.720870971679688, 0.6103668212890625, 14.94222640991211, 9.489189147949219, 14.656347274780273, 23.769508361816406, 12.291938781738281, -4.098670959472656, 15.65234375, 0.25433349609375, 12.439178466796875, -5.8978729248046875, 14.439416885375977, -10.003276824951172, 10.984306335449219, 9.32469367980957, -4.548770904541016, -1.1361083984375, 20.156206130981445, 21.117542266845703, -3.783132553100586, -5.0438385009765625, -5.610393524169922, 17.92176055908203, -1.2427005767822266, 7.103551864624023, 14.8675537109375, 5.2415924072265625, 2.41693115234375, -6.782989501953125, -1.0436630249023438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000170.npy"}
{"epoch": 0.5151515151515151, "step": 171, "batch_size": 128, "mean": 5.625857830047607, "std": 9.032465934753418, "min": -17.124038696289062, "p10": -4.283576202392577, "median": 3.8481178283691406, "p90": 17.77084503173828, "max": 28.544631958007812, "pos_frac": 0.734375, "sample": [-10.137481689453125, 8.166038513183594, 13.790542602539062, 7.806840896606445, 21.814868927001953, 12.255367279052734, 12.430351257324219, 26.598785400390625, 10.056800842285156, 1.18853759765625, 15.40553092956543, 15.490089416503906, 9.906463623046875, 20.874237060546875, 18.00627899169922, 1.341888427734375, -4.1984405517578125, 0.23366546630859375, 5.129669189453125, 14.122879028320312, 5.424221038818359, 5.703954696655273, 3.8128585815429688, -17.124038696289062, 15.311546325683594, 14.007274627685547, 13.157745361328125, 3.5590744018554688, 17.235107421875, -2.478240966796875, -0.9149169921875, 15.5755615234375, 13.178890228271484, -6.3161773681640625, 11.96844482421875, 3.805187225341797, -10.082191467285156, -2.062326431274414, -1.5854759216308594, 1.7879219055175781, -5.712158203125, 21.179126739501953, 12.685333251953125, -6.062980651855469, -1.0872840881347656, -4.4727020263671875, 2.438251495361328, 1.5537109375, -12.213470458984375, -7.1173858642578125, 2.3281173706054688, 14.020278930664062, 3.7534332275390625, 19.521568298339844, 10.644271850585938, 6.468017578125, 6.755046844482422, 5.971160888671875, 0.3412342071533203, -4.202522277832031, -1.1707916259765625, 1.0772819519042969, 1.3870506286621094, -14.407173156738281, 6.137237548828125, 3.7979507446289062, -3.1787452697753906, -4.761650085449219, 0.8948516845703125, 17.669944763183594, 1.2283878326416016, 16.022945404052734, 1.829122543334961, -13.55612564086914, -2.699657440185547, -2.772005081176758, 2.248149871826172, 14.763877868652344, 14.057785034179688, 16.212608337402344, 3.3271942138671875, 2.8525161743164062, 9.5986328125, -2.9999942779541016, 11.611610412597656, 3.173553466796875, 1.7456817626953125, 13.305562973022461, 28.544631958007812, 20.835433959960938, 3.8833770751953125, -3.8854141235351562, 2.8641357421875, 24.192440032958984, -1.1223125457763672, 19.36101531982422, -1.9220733642578125, 8.399642944335938, -1.4953689575195312, 8.600906372070312, -0.3479194641113281, 12.191856384277344, 5.365379333496094, -3.0644912719726562, -7.612312316894531, 11.09676742553711, 7.6377410888671875, -1.3568439483642578, 9.856773376464844, 16.815532684326172, 1.9896793365478516, 3.7564163208007812, 1.9148178100585938, 21.49496078491211, 20.910964965820312, 5.679744720458984, -3.529348373413086, 23.08129119873047, 1.5200386047363281, -0.1689739227294922, 5.689477920532227, 0.14037322998046875, 0.4410400390625, 5.498264312744141, 8.073179244995117, 6.606861114501953, 4.2376861572265625, 5.500274658203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000171.npy"}
{"epoch": 0.5181818181818182, "step": 172, "batch_size": 128, "mean": 5.247993469238281, "std": 8.962703704833984, "min": -16.730560302734375, "p10": -6.025775146484374, "median": 5.682771682739258, "p90": 16.64720916748047, "max": 28.833099365234375, "pos_frac": 0.71875, "sample": [2.1330032348632812, 10.350849151611328, 0.7969169616699219, -5.457672119140625, -2.9888839721679688, 5.916595458984375, 5.861320495605469, -3.5777225494384766, 12.506423950195312, 12.154876708984375, 16.277816772460938, 5.8309326171875, 10.085968017578125, 4.9226226806640625, 9.518184661865234, 2.1708831787109375, 16.555496215820312, 4.7709808349609375, -0.11466217041015625, -1.8062095642089844, 13.117996215820312, 12.44223403930664, -14.807525634765625, 26.485244750976562, 0.2456226348876953, 7.401103973388672, 17.988327026367188, 1.6166458129882812, -3.5484771728515625, 11.267585754394531, 8.869407653808594, 7.319244384765625, -7.058891296386719, -5.80694580078125, -13.041767120361328, 5.92474365234375, 9.778594970703125, 6.477313995361328, 28.833099365234375, -3.4798545837402344, 3.2575149536132812, 9.119424819946289, 4.596736907958984, -3.136524200439453, 12.42132568359375, -1.212392807006836, -2.5653228759765625, 0.8912124633789062, -10.16619873046875, 3.747650146484375, 5.457633972167969, 20.8392333984375, 7.377227783203125, 3.9664306640625, 17.122177124023438, 1.0287322998046875, -4.172054290771484, -1.5079727172851562, 5.923713684082031, -3.794342041015625, -16.730560302734375, -6.990959167480469, -0.6326618194580078, 1.8079376220703125, -1.5316314697265625, 12.764202117919922, 9.259208679199219, 7.243965148925781, 21.5242919921875, -1.2147407531738281, 5.2920989990234375, 9.70242691040039, 0.5302581787109375, 19.166412353515625, 5.700538635253906, 20.719772338867188, -7.899938583374023, -6.536376953125, -11.706832885742188, 16.75017547607422, 0.26552581787109375, 1.2979354858398438, 6.053600311279297, 20.22980499267578, 10.396221160888672, 3.4194107055664062, 7.122018814086914, -10.00674057006836, 11.450958251953125, 8.072547912597656, 13.863094329833984, 5.986785888671875, 14.64620590209961, 4.712451934814453, 14.229204177856445, -4.204124450683594, 6.8763885498046875, 13.16009521484375, 6.5385589599609375, 0.16730499267578125, 4.325258255004883, 8.822677612304688, 5.665004730224609, 16.60308074951172, -0.6124191284179688, -3.101959228515625, -7.6376953125, 0.9484596252441406, 17.59881591796875, 14.09341049194336, -9.037765502929688, 23.38872528076172, -0.5973472595214844, 10.484901428222656, 0.2754325866699219, -0.6102485656738281, 3.4500885009765625, 0.7206554412841797, 15.63848876953125, 12.476642608642578, 15.54315185546875, 6.782505035400391, -4.3813018798828125, 17.950363159179688, -14.669654846191406, 13.307205200195312, 14.155776977539062, 13.540451049804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000172.npy"}
{"epoch": 0.5212121212121212, "step": 173, "batch_size": 128, "mean": 6.2465009689331055, "std": 9.472085952758789, "min": -15.748733520507812, "p10": -6.378909683227539, "median": 5.632884979248047, "p90": 17.84729461669922, "max": 31.804336547851562, "pos_frac": 0.71875, "sample": [5.78851318359375, 2.0872840881347656, 22.555374145507812, -12.310752868652344, 0.6157054901123047, 2.41094970703125, -0.02734375, -6.369270324707031, -9.992347717285156, 3.3600025177001953, 3.426717758178711, 23.105934143066406, 27.32843017578125, 3.2133865356445312, 4.924896240234375, 16.1629638671875, 4.849021911621094, 5.477256774902344, 14.80609130859375, -5.270488739013672, 17.87862777709961, 22.48388671875, 16.23472023010254, -8.738517761230469, 12.476070404052734, 1.2863540649414062, 17.064682006835938, 6.267784118652344, -3.5764122009277344, -0.747406005859375, 9.916473388671875, 10.715156555175781, -8.961872100830078, 1.6529083251953125, 1.1726341247558594, 14.446346282958984, 16.214454650878906, 7.654888153076172, -6.403291702270508, 5.915641784667969, 10.782968521118164, -1.0733489990234375, 3.4932403564453125, 3.263317108154297, -6.275215148925781, 7.048896789550781, 9.563850402832031, 15.241485595703125, 16.598373413085938, -0.6840896606445312, -9.910938262939453, 31.804336547851562, 11.343086242675781, -0.8266639709472656, 8.800941467285156, -7.067909240722656, -0.3619728088378906, -5.074607849121094, 5.002626419067383, 9.678054809570312, 9.063304901123047, -1.5343017578125, 14.50967025756836, 16.187088012695312, 13.542196273803711, 16.994815826416016, 13.80224609375, 17.203231811523438, 10.431884765625, 5.2606201171875, 9.7017822265625, 11.72104263305664, -3.071044921875, 10.743278503417969, -3.7160396575927734, 10.311458587646484, 6.5329132080078125, 7.8217620849609375, -8.12945556640625, -4.34150505065918, 13.548690795898438, -0.20528411865234375, 4.640880584716797, 7.720115661621094, 2.6444854736328125, -7.4488372802734375, 14.049545288085938, 23.456436157226562, -3.0817337036132812, 1.8367156982421875, 8.076164245605469, 1.2818145751953125, -0.44196510314941406, 7.789127349853516, -6.401401519775391, 2.8583984375, 17.833866119384766, 17.42334747314453, 20.646347045898438, -1.3695602416992188, 3.11663818359375, 14.682443618774414, -3.157154083251953, 17.988895416259766, 9.352140426635742, 20.252349853515625, -15.389381408691406, 3.310749053955078, 12.132225036621094, -15.748733520507812, 4.833770751953125, -7.054939270019531, 27.826980590820312, 17.94914436340332, -3.83258056640625, 19.218414306640625, 3.9269180297851562, 3.9573974609375, 16.064308166503906, 3.6828842163085938, 8.201095581054688, 6.435340881347656, -3.5415191650390625, 17.52098846435547, -1.509796142578125, 0.8498935699462891, 9.166831970214844, 10.982841491699219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000173.npy"}
{"epoch": 0.5242424242424243, "step": 174, "batch_size": 128, "mean": 5.577205657958984, "std": 9.870379447937012, "min": -12.794876098632812, "p10": -4.798369979858398, "median": 4.221342086791992, "p90": 18.00085582733154, "max": 40.78941345214844, "pos_frac": 0.6875, "sample": [0.21410369873046875, 2.8835372924804688, 3.1197662353515625, 9.227462768554688, -6.252204895019531, -2.277029037475586, 16.019271850585938, 10.657573699951172, 6.4935455322265625, 8.05555534362793, -7.490081787109375, 4.72723388671875, 5.7956390380859375, 6.215290069580078, 32.281959533691406, 1.0456771850585938, -12.342521667480469, 9.186336517333984, -1.9352989196777344, 7.310405731201172, -6.455524444580078, 6.972110748291016, -11.37371826171875, 5.368560791015625, 15.725143432617188, 17.799114227294922, 13.144744873046875, 12.1627197265625, 0.9041900634765625, -1.7095794677734375, -4.759433746337891, 11.601211547851562, 4.855260848999023, 15.460681915283203, -3.3958358764648438, 13.403083801269531, 8.297500610351562, -1.2426605224609375, -12.794876098632812, 40.78941345214844, -8.31313705444336, 0.2972297668457031, 1.3560638427734375, 15.323974609375, 23.081878662109375, -2.5175933837890625, -0.5851402282714844, 0.37999725341796875, 11.519655227661133, 0.758453369140625, 1.9999504089355469, 14.736007690429688, 5.096828460693359, 20.562583923339844, -4.646749496459961, -10.953155517578125, 2.634763717651367, 7.62200927734375, -0.7724151611328125, -1.2559757232666016, 2.7521438598632812, 0.563446044921875, 17.532188415527344, 10.833717346191406, 14.785919189453125, 20.077049255371094, -4.4483489990234375, 4.053188323974609, 16.087242126464844, 8.86248779296875, -4.88922119140625, -0.069427490234375, 0.5166015625, 28.8427734375, 5.147918701171875, 24.929664611816406, 1.1083984375, 12.773286819458008, 14.47772216796875, -1.7011032104492188, 1.4270439147949219, 9.96356201171875, 4.725860595703125, 3.7446670532226562, 10.902610778808594, -2.000974655151367, 1.0993309020996094, 11.657989501953125, -9.212577819824219, 11.215827941894531, -1.970001220703125, 29.081920623779297, -4.042329788208008, 6.742889404296875, 22.035694122314453, -1.7238998413085938, -10.657737731933594, -1.673614501953125, 25.31468963623047, -2.42596435546875, 9.378463745117188, 9.715721130371094, 15.067352294921875, 14.56120491027832, 5.318206787109375, 18.471586227416992, -3.8686447143554688, 14.225812911987305, -10.854455947875977, -5.289192199707031, 4.389495849609375, 0.39471435546875, 6.478759765625, 26.30355453491211, 0.58831787109375, 1.843984603881836, 20.839069366455078, -2.262838363647461, -1.466827392578125, -2.8411026000976562, 8.964290618896484, 2.03668212890625, -1.0245704650878906, 5.319726943969727, -3.4671096801757812, 4.031982421875, 7.2348175048828125, -0.6608924865722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000174.npy"}
{"epoch": 0.5272727272727272, "step": 175, "batch_size": 128, "mean": 6.880983352661133, "std": 9.429365158081055, "min": -19.138107299804688, "p10": -3.792910385131835, "median": 5.643677711486816, "p90": 20.862845611572265, "max": 29.643280029296875, "pos_frac": 0.765625, "sample": [3.0373001098632812, 11.4849853515625, 12.953811645507812, 11.561859130859375, 2.0377044677734375, 2.578277587890625, 10.869426727294922, 15.220497131347656, 15.010854721069336, -14.09576416015625, -19.138107299804688, 15.818944931030273, 8.206291198730469, 14.93865966796875, 6.9527130126953125, 3.311969757080078, -11.702445983886719, 14.105926513671875, 4.805450439453125, 14.390357971191406, 9.127971649169922, 29.643280029296875, -5.6897735595703125, 5.5092010498046875, -5.454551696777344, -4.358301162719727, 15.485588073730469, 8.043777465820312, -7.047046661376953, 3.3740768432617188, 2.0584068298339844, 24.432388305664062, -3.550600051879883, 3.634815216064453, -3.3450679779052734, 4.334566116333008, 16.78740692138672, 5.003398895263672, 8.879280090332031, 16.46012306213379, 15.641660690307617, 3.9014739990234375, -2.6243438720703125, 13.900476455688477, -4.6209259033203125, 12.251983642578125, 9.961601257324219, 8.276927947998047, 19.610794067382812, -0.9370803833007812, 4.514970779418945, -3.5245361328125, 4.724466323852539, 21.059715270996094, 24.802825927734375, 0.9734954833984375, 3.7472915649414062, 0.6417312622070312, -9.357410430908203, 15.830085754394531, 1.3109569549560547, 3.4581642150878906, 8.319053649902344, 26.16368865966797, 24.95519256591797, -1.0337104797363281, 8.323654174804688, -0.8222980499267578, 13.098876953125, 4.269523620605469, 25.53197479248047, 8.837051391601562, 20.778472900390625, 1.3180274963378906, -0.4100666046142578, -2.8882408142089844, 10.603363037109375, 13.180747985839844, -8.090873718261719, 10.924468994140625, 15.2308349609375, 0.9509963989257812, 7.189670562744141, 21.955108642578125, 10.457817077636719, 19.038833618164062, 24.345924377441406, 22.957366943359375, -0.5277252197265625, 17.71661376953125, -0.1704559326171875, 2.510772705078125, 2.0687923431396484, 4.96978759765625, 0.8460464477539062, -6.099151611328125, 3.041728973388672, 8.500324249267578, -0.20444488525390625, 1.13720703125, 1.9091796875, 6.572637557983398, 5.778154373168945, 8.923713684082031, 6.745962142944336, 0.8667716979980469, -7.157461166381836, 13.8973388671875, 0.26404571533203125, -1.1912059783935547, 7.82958984375, -6.078662872314453, 8.0833740234375, 27.166240692138672, 27.156158447265625, -0.3521575927734375, 2.6488265991210938, 3.927265167236328, 8.357841491699219, -2.084075927734375, 7.950653076171875, 2.846923828125, -3.3367767333984375, -2.9012451171875, 22.041908264160156, 6.8792877197265625, 10.750219345092773, 15.074481964111328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000175.npy"}
{"epoch": 0.5303030303030303, "step": 176, "batch_size": 128, "mean": 5.6091108322143555, "std": 9.495729446411133, "min": -23.44162940979004, "p10": -6.376824188232421, "median": 4.436176300048828, "p90": 17.18010902404785, "max": 29.998016357421875, "pos_frac": 0.71875, "sample": [2.639963150024414, 15.411834716796875, 16.08912467956543, 20.757675170898438, -2.5399322509765625, 16.770755767822266, -0.6985359191894531, 7.025714874267578, -2.8076019287109375, -0.020999908447265625, 14.829299926757812, 5.902387619018555, 5.766838073730469, -7.602394104003906, 12.885749816894531, 6.8311004638671875, 12.458267211914062, 0.03639984130859375, 11.341384887695312, -6.863361358642578, -9.047645568847656, 17.029739379882812, 1.7181167602539062, -1.9522819519042969, 1.6272735595703125, 4.46246337890625, -0.20413970947265625, -1.2914009094238281, 13.936355590820312, 11.277748107910156, 14.933393478393555, 5.169681549072266, 18.79931640625, 4.292083740234375, 1.02484130859375, 16.447364807128906, 3.0360183715820312, -0.183258056640625, 3.7120437622070312, -7.1273956298828125, 15.493705749511719, 21.8406982421875, 7.401702880859375, 9.343223571777344, -14.79248046875, 10.677879333496094, 12.53469467163086, -4.066413879394531, 8.975715637207031, 17.53097152709961, 1.946197509765625, -2.284191131591797, 15.759521484375, 5.1056976318359375, 18.613235473632812, 9.317001342773438, 5.597333908081055, 29.681793212890625, -23.44162940979004, 4.409889221191406, -15.4161376953125, 0.6844024658203125, 8.958919525146484, -8.4420166015625, 5.812492370605469, 29.998016357421875, 25.467281341552734, 1.3196640014648438, 16.751380920410156, -8.351516723632812, -2.8514366149902344, 1.7691898345947266, 0.91455078125, 0.5304107666015625, -8.059234619140625, 2.6512832641601562, 16.42839813232422, 12.640151977539062, 1.8032379150390625, 22.750476837158203, 8.192352294921875, 5.837806701660156, 1.4540863037109375, -0.17874526977539062, 3.0457382202148438, 19.764907836914062, 15.003997802734375, 13.636154174804688, 2.7491912841796875, 6.53363037109375, -1.2217864990234375, 12.799468994140625, 14.73858642578125, 13.199333190917969, 10.653457641601562, -0.25952911376953125, 0.73004150390625, 8.925874710083008, 3.4911117553710938, 4.927581787109375, -1.9899444580078125, -1.5714244842529297, 2.1811065673828125, 4.7232818603515625, 17.92200469970703, 2.39263916015625, 11.054317474365234, -5.474361419677734, -1.088531494140625, -4.19719123840332, 1.4092159271240234, 3.7994747161865234, 8.112239837646484, -8.790618896484375, 29.59033203125, -0.3997325897216797, 0.46804046630859375, 14.535520553588867, -9.289337158203125, -0.030185699462890625, -6.168308258056641, 19.616859436035156, -7.286205291748047, 8.183929443359375, 0.49323272705078125, 13.863847732543945, 5.301540374755859, -4.296863555908203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000176.npy"}
{"epoch": 0.5333333333333333, "step": 177, "batch_size": 128, "mean": 6.088815212249756, "std": 9.688359260559082, "min": -34.44075012207031, "p10": -5.870273590087891, "median": 5.889486312866211, "p90": 17.72158889770508, "max": 34.648155212402344, "pos_frac": 0.78125, "sample": [6.483026504516602, 17.627365112304688, -12.871711730957031, 6.4796600341796875, -5.769674301147461, 7.429721832275391, 8.224258422851562, -6.0120391845703125, 5.8939361572265625, 10.412452697753906, 12.620445251464844, 12.429763793945312, 9.591217041015625, 9.186370849609375, 3.052793502807617, 3.902029037475586, 5.119617462158203, 1.8788833618164062, -34.44075012207031, 18.583995819091797, -8.127099990844727, 0.8865814208984375, 13.368881225585938, 15.20058822631836, 5.885036468505859, 7.478115081787109, 1.2593421936035156, 4.235279083251953, -0.7372512817382812, 1.3940963745117188, 0.6346035003662109, 4.922698974609375, 10.318222045898438, 10.641571044921875, -0.9557991027832031, 4.910686492919922, 6.652914047241211, -5.770965576171875, 24.118507385253906, 3.085620880126953, -3.3091201782226562, 3.3650436401367188, 16.671367645263672, 13.074277877807617, 18.85352325439453, 5.630214691162109, 4.795600891113281, 1.318634033203125, 2.470682144165039, 16.475669860839844, 34.648155212402344, 5.543676376342773, -5.3128662109375, 9.083709716796875, 12.02325439453125, 2.1042518615722656, 6.370830535888672, 2.01190185546875, -2.742992401123047, -6.817756652832031, 17.06500244140625, -1.842041015625, 17.48516273498535, 7.314857482910156, 26.559776306152344, 13.746284484863281, -8.596118927001953, 9.80923080444336, -4.673042297363281, 2.863100051879883, 12.59405517578125, 2.5913772583007812, 10.107536315917969, -7.061435699462891, 16.28873634338379, 27.873794555664062, 19.85228157043457, 10.523540496826172, 7.88494873046875, 15.035957336425781, 11.122726440429688, 9.868255615234375, 6.013641357421875, 3.441234588623047, -4.154323577880859, -9.22341537475586, 18.992862701416016, -4.776329040527344, 4.882959365844727, -3.7115631103515625, 7.4349822998046875, 10.136741638183594, 18.645599365234375, 15.913711547851562, 10.44723129272461, 1.8474006652832031, 14.297584533691406, 2.5693740844726562, 11.223138809204102, 16.51807975769043, 11.034669876098633, 1.6510009765625, 1.4644622802734375, 0.09849739074707031, -14.392131805419922, -3.776508331298828, -11.981430053710938, 16.398975372314453, 5.143962860107422, 6.7065277099609375, 10.689567565917969, 13.422195434570312, 12.484031677246094, 17.941444396972656, -1.5919265747070312, -6.67462158203125, 22.329559326171875, -7.136554718017578, 11.391357421875, 22.785232543945312, -7.5530242919921875, 5.189661026000977, 1.7827339172363281, 2.1060867309570312, 4.6569671630859375, 18.409896850585938, -5.809516906738281, 0.20330047607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000177.npy"}
{"epoch": 0.5363636363636364, "step": 178, "batch_size": 128, "mean": 6.824317932128906, "std": 10.131897926330566, "min": -22.623779296875, "p10": -6.0110839843749995, "median": 6.545166015625, "p90": 20.225672912597656, "max": 30.6275634765625, "pos_frac": 0.7578125, "sample": [4.545421600341797, 21.003562927246094, -4.450788497924805, 7.615791320800781, 17.290802001953125, 15.877735137939453, 9.710494995117188, -1.7238311767578125, -2.9531822204589844, 8.364311218261719, -9.175582885742188, 2.6021347045898438, -6.478294372558594, 4.522573471069336, 9.654411315917969, 14.446800231933594, 1.8817634582519531, 27.918914794921875, 10.506553649902344, -8.629829406738281, 12.555221557617188, -3.1919822692871094, -1.7686576843261719, 13.045419692993164, 21.044361114501953, -12.54397201538086, 16.949115753173828, 10.667396545410156, 13.411186218261719, 4.364635467529297, 18.48058319091797, -2.8007354736328125, -0.6265335083007812, 6.161773681640625, 11.639320373535156, -0.966156005859375, 3.9826297760009766, 1.2694854736328125, 13.197906494140625, 18.311748504638672, 18.460384368896484, 4.2210845947265625, 5.58527946472168, 14.643089294433594, 16.334415435791016, 23.134796142578125, -8.463272094726562, 8.196479797363281, 3.111949920654297, -5.8292236328125, 28.979068756103516, 4.238319396972656, -0.8075962066650391, 22.474319458007812, 15.2872314453125, 2.1898651123046875, 0.9124774932861328, -0.7762413024902344, 20.465560913085938, -1.13104248046875, 10.403118133544922, 5.316343307495117, 2.0910873413085938, 5.318809509277344, 18.790359497070312, 19.287097930908203, 5.3604888916015625, -9.515670776367188, 15.747838973999023, 6.934211730957031, 0.06688690185546875, 11.795989990234375, 4.41705322265625, 4.447456359863281, 15.23504638671875, 1.6435012817382812, 20.12286376953125, -20.778743743896484, 8.736701965332031, 11.870864868164062, -1.8366985321044922, 1.5802173614501953, -6.4354248046875, 6.6594696044921875, 8.600128173828125, -4.980339050292969, 4.854701995849609, 16.631359100341797, -2.0329456329345703, 12.468093872070312, -1.5503978729248047, 7.988185882568359, 22.07593536376953, 15.435813903808594, 3.4572525024414062, 15.10700798034668, 11.974058151245117, 3.080272674560547, -9.6661376953125, -22.623779296875, 15.141555786132812, -6.657497406005859, 23.065093994140625, 24.60382843017578, 6.555999755859375, 21.169158935546875, -3.6865997314453125, -18.02392578125, 7.829795837402344, 6.778209686279297, 8.06875228881836, 6.534332275390625, 0.6074333190917969, 8.139289855957031, 18.26742935180664, -3.667999267578125, 0.078460693359375, 4.184343338012695, 17.134307861328125, 8.75634765625, 9.416669845581055, 0.24329376220703125, 22.55052947998047, 3.052459716796875, -7.3202972412109375, 30.6275634765625, 4.289159774780273, 10.75750732421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000178.npy"}
{"epoch": 0.5393939393939394, "step": 179, "batch_size": 128, "mean": 5.976173400878906, "std": 9.506043434143066, "min": -13.95220947265625, "p10": -6.795978546142578, "median": 5.390735626220703, "p90": 18.317131805419923, "max": 30.469207763671875, "pos_frac": 0.7265625, "sample": [23.9971923828125, -3.3616714477539062, 16.243011474609375, 2.547576904296875, -9.055801391601562, 23.520980834960938, 12.534027099609375, 9.056655883789062, 14.943733215332031, 2.9371795654296875, 11.618400573730469, 6.001682281494141, 6.283927917480469, 3.8829002380371094, 8.874336242675781, -2.1875858306884766, -10.527206420898438, 13.263286590576172, 5.932548522949219, 23.848445892333984, 17.525848388671875, 5.4875030517578125, 13.036933898925781, 21.659929275512695, -6.820625305175781, -0.27198028564453125, 1.6422309875488281, 7.552825927734375, 14.807205200195312, 6.078948974609375, 11.10546875, -0.40019989013671875, 8.388168334960938, 1.6453399658203125, 2.9414520263671875, 12.11090087890625, -5.2548675537109375, 3.4386978149414062, 28.190963745117188, 5.6952362060546875, -0.9473419189453125, 13.059555053710938, -0.4904003143310547, 2.792905807495117, -13.265106201171875, 16.542930603027344, -6.7854156494140625, 1.7287368774414062, -2.6911849975585938, 15.242095947265625, -1.2847061157226562, -7.135932922363281, 2.2759246826171875, 5.37579345703125, 2.1609649658203125, 2.642688751220703, -4.4087982177734375, 2.160236358642578, 18.432392120361328, -10.79210090637207, -13.208206176757812, -11.766029357910156, 19.589927673339844, -0.21361160278320312, 5.959115982055664, 9.680946350097656, 8.58005142211914, 19.76458740234375, -1.0886974334716797, 17.035118103027344, 13.8299560546875, -2.2258071899414062, -7.209514617919922, 1.8246917724609375, 13.137237548828125, 8.413957595825195, 9.657379150390625, 2.101276397705078, 1.1098251342773438, 12.46489143371582, -13.95220947265625, 2.719858169555664, -3.3944320678710938, 4.44793701171875, 12.071113586425781, 14.176994323730469, 3.5051956176757812, -1.696746826171875, 3.157978057861328, 18.887908935546875, 0.019662857055664062, -9.687950134277344, -13.251718521118164, 18.26773452758789, 5.670295715332031, 5.157676696777344, -0.4566650390625, 1.5312480926513672, -2.1822052001953125, 10.571001052856445, 2.4528656005859375, 18.21953582763672, 17.416725158691406, 1.522369384765625, 13.479963302612305, 2.418560028076172, 2.9138641357421875, -2.1349029541015625, -6.9324798583984375, 5.405677795410156, -5.545860290527344, 5.870246887207031, 17.48316192626953, 17.336395263671875, 16.529022216796875, 3.114612579345703, 24.983642578125, 23.491985321044922, 6.163337707519531, -1.8375625610351562, -1.0485000610351562, 9.508399963378906, 7.083122253417969, 10.747947692871094, 10.011686325073242, 19.05268096923828, 10.247852325439453, 30.469207763671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000179.npy"}
{"epoch": 0.5424242424242425, "step": 180, "batch_size": 128, "mean": 4.889063835144043, "std": 9.32685661315918, "min": -16.248092651367188, "p10": -4.6622720718383786, "median": 3.274639129638672, "p90": 18.183369636535645, "max": 32.38048553466797, "pos_frac": 0.6328125, "sample": [1.4765853881835938, 14.464790344238281, -4.01702880859375, 8.728485107421875, 9.577590942382812, 1.6186599731445312, -3.1584739685058594, 5.792400360107422, 18.462656021118164, -4.854253768920898, 6.471221923828125, -15.676813125610352, 0.40653228759765625, 1.003173828125, -1.2496795654296875, -7.782783508300781, -4.60400390625, 15.188529968261719, 0.8914260864257812, 3.1904830932617188, 5.099418640136719, 6.865528106689453, -3.3865318298339844, -2.2688827514648438, -2.6642990112304688, 9.685958862304688, 3.9518585205078125, -4.780553817749023, -16.248092651367188, -0.4371185302734375, 13.490791320800781, -0.8547515869140625, -0.11676025390625, -12.298095703125, 20.48822021484375, -3.8921146392822266, 5.2777862548828125, 22.662376403808594, -1.6220169067382812, 5.885650634765625, 7.8326263427734375, 18.157623291015625, 6.78203010559082, 0.16064453125, 4.4628448486328125, -16.10403823852539, 19.26507568359375, 26.218826293945312, -1.8991127014160156, 9.94375991821289, 18.243444442749023, 6.559547424316406, -0.8995285034179688, 5.5465087890625, -2.0687255859375, -4.3897705078125, 11.485031127929688, 5.295019149780273, 7.524932861328125, 13.947891235351562, 8.487281799316406, 18.767196655273438, -1.8929214477539062, 0.316680908203125, 1.719635009765625, 26.48870849609375, -0.85980224609375, -0.872406005859375, 11.052539825439453, 17.043991088867188, -5.173572540283203, 3.286314010620117, 27.489776611328125, -6.769287109375, 1.1559410095214844, 12.540664672851562, 11.00210952758789, -1.9705486297607422, -2.6565704345703125, -3.4779739379882812, 2.987895965576172, -5.595428466796875, 3.540424346923828, 10.926460266113281, -3.6826934814453125, 5.336606979370117, 14.88449478149414, 0.8838882446289062, 3.2629642486572266, 25.085865020751953, 15.900863647460938, 11.722000122070312, 19.246761322021484, -1.966165542602539, -0.7926445007324219, 9.16552734375, -2.006427764892578, 16.701629638671875, -0.7211742401123047, -4.611579895019531, -2.1964569091796875, -2.7353477478027344, 16.517675399780273, -5.1461181640625, 4.807075500488281, 3.9227027893066406, 6.363899230957031, 2.673908233642578, 9.007625579833984, 10.539749145507812, 2.3368148803710938, -0.6795082092285156, 32.38048553466797, 1.5898590087890625, 10.314094543457031, -0.34676361083984375, 26.22454833984375, 9.316513061523438, 16.401559829711914, 4.9506378173828125, -0.18502426147460938, -5.976097106933594, 0.8632011413574219, 15.76785659790039, -3.863037109375, -6.163002014160156, 7.097221374511719, 9.236541748046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000180.npy"}
{"epoch": 0.5454545454545454, "step": 181, "batch_size": 128, "mean": 6.3491973876953125, "std": 9.600634574890137, "min": -18.824371337890625, "p10": -5.59174575805664, "median": 5.927271842956543, "p90": 17.353351593017578, "max": 41.4888916015625, "pos_frac": 0.7734375, "sample": [2.0985565185546875, 16.789756774902344, 1.1539688110351562, -5.011388778686523, 20.214447021484375, 4.307626724243164, -2.2177810668945312, -10.053779602050781, -2.5202560424804688, -4.800289154052734, 0.29388427734375, -9.725921630859375, 6.686370849609375, 16.647846221923828, 4.5108642578125, 18.162460327148438, 11.730247497558594, 30.63861083984375, 14.632705688476562, 12.755462646484375, 14.601175308227539, 5.943365097045898, -2.6067123413085938, -1.7572784423828125, -2.7886962890625, 13.148456573486328, 9.005546569824219, 4.873771667480469, 8.45968246459961, 4.239875793457031, 30.528160095214844, 6.207611083984375, 8.413516998291016, 3.6317577362060547, -0.376739501953125, -5.883026123046875, -2.2680931091308594, 6.285316467285156, 9.942184448242188, 5.152004241943359, 12.999580383300781, 13.104393005371094, 13.388431549072266, -18.824371337890625, 17.65485382080078, 4.8781280517578125, 13.423124313354492, 6.556770324707031, 3.2185497283935547, 0.17084503173828125, 22.095413208007812, 15.005752563476562, 5.766380310058594, 15.571517944335938, 11.346813201904297, 3.980316162109375, 19.134201049804688, 6.88818359375, -7.6677703857421875, 10.340290069580078, 2.718170166015625, 12.156394958496094, 3.752695083618164, -7.311309814453125, -7.391946792602539, 7.801258087158203, -6.1775054931640625, 0.22861862182617188, 1.9016380310058594, -15.315376281738281, 8.240493774414062, 5.4358978271484375, 4.705024719238281, 9.740745544433594, 1.1349735260009766, 20.916427612304688, 1.7129192352294922, 2.4293670654296875, 3.5380172729492188, 9.5445556640625, 0.16915512084960938, 13.791702270507812, 6.544212341308594, 7.9827880859375, 16.058826446533203, 7.3121795654296875, 5.9111785888671875, -5.466911315917969, 7.639537811279297, -0.97314453125, -9.27553939819336, 6.6561279296875, 14.534034729003906, -15.348739624023438, 9.942913055419922, 0.11936187744140625, 41.4888916015625, 14.721420288085938, 1.654296875, 0.3409690856933594, 4.8575897216796875, 3.7186508178710938, -0.4084606170654297, 5.27972412109375, 9.083099365234375, 6.443778991699219, 17.224136352539062, -4.9925537109375, -0.4782543182373047, 11.602893829345703, 25.707977294921875, 3.2589149475097656, 5.000326156616211, 15.103958129882812, 17.915428161621094, 14.892589569091797, 4.313453674316406, -3.3291015625, 14.370403289794922, 9.347183227539062, 18.43846893310547, -8.4617919921875, -6.1423492431640625, 7.99163818359375, 30.622100830078125, -1.5233573913574219, 7.445892333984375, 11.773937225341797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000181.npy"}
{"epoch": 0.5484848484848485, "step": 182, "batch_size": 128, "mean": 7.200283527374268, "std": 10.058183670043945, "min": -14.087287902832031, "p10": -4.799297142028808, "median": 5.418854713439941, "p90": 22.351377868652342, "max": 34.691802978515625, "pos_frac": 0.7578125, "sample": [4.353759765625, -4.746009826660156, 18.098526000976562, 5.749263763427734, 12.828201293945312, 8.332954406738281, 2.998016357421875, 20.7159423828125, 1.3774356842041016, 0.5984134674072266, 9.948921203613281, 10.07318115234375, 3.9817352294921875, 11.946239471435547, 4.699535369873047, 16.528182983398438, 11.460243225097656, -8.324764251708984, 16.07250213623047, 11.046714782714844, 3.21728515625, 6.047080993652344, 3.3927745819091797, -0.6981048583984375, 10.605888366699219, -5.263954162597656, 17.315317153930664, -5.1230926513671875, 3.583555221557617, 10.579330444335938, -4.761072158813477, -4.88848876953125, 1.1307296752929688, -0.5696773529052734, -2.0423431396484375, 6.7938232421875, 34.691802978515625, 28.7550048828125, -0.013713836669921875, 23.22515869140625, 14.787139892578125, 0.019115447998046875, 5.7877960205078125, 3.2558822631835938, -2.8362579345703125, 5.49406623840332, -6.442985534667969, -10.617389678955078, 7.887542724609375, 0.98651123046875, 9.325233459472656, 22.188491821289062, 2.6544723510742188, 18.72539520263672, 17.35763168334961, 8.618019104003906, 12.224128723144531, -2.7207202911376953, 4.607666015625, 6.551277160644531, -0.12244224548339844, 1.6621017456054688, 7.351295471191406, 3.4879016876220703, 23.492874145507812, -0.32118988037109375, 2.8192901611328125, 33.439125061035156, 28.743919372558594, 2.166961669921875, 8.802345275878906, 9.931961059570312, 3.7385101318359375, 0.457489013671875, 26.35870361328125, 3.8371734619140625, 20.630599975585938, 0.9688301086425781, 11.312034606933594, 20.103500366210938, -5.514530181884766, 5.3436431884765625, 4.182182312011719, 0.7718963623046875, 11.546497344970703, 18.489898681640625, -1.2676849365234375, 7.520931243896484, 6.821380615234375, 15.157356262207031, -1.1110992431640625, 12.582830429077148, -11.562980651855469, 23.911041259765625, -7.162864685058594, 2.9846954345703125, 1.362701416015625, 23.534896850585938, 29.100509643554688, 6.7802734375, -3.2016639709472656, -4.987785339355469, 19.19476318359375, 24.401752471923828, -14.087287902832031, -5.7132415771484375, 30.55254364013672, -1.9752168655395508, 1.157522201538086, 9.129402160644531, 15.58364486694336, 16.876792907714844, 0.09487152099609375, 2.8368377685546875, -3.400115966796875, -2.362762451171875, 22.7314453125, 6.7197265625, -3.9039382934570312, 8.962135314941406, 3.4325180053710938, 16.82123565673828, -0.982025146484375, 3.5175323486328125, 6.193122863769531, -5.1805267333984375, 17.66946029663086, 7.653696060180664], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000182.npy"}
{"epoch": 0.5515151515151515, "step": 183, "batch_size": 128, "mean": 5.181260108947754, "std": 9.68286418914795, "min": -19.43140411376953, "p10": -5.638298797607422, "median": 3.7996692657470703, "p90": 16.875737762451173, "max": 32.350181579589844, "pos_frac": 0.7109375, "sample": [3.248964309692383, 10.616050720214844, 10.569480895996094, 0.8599491119384766, 5.637701034545898, 2.195596694946289, 15.099205017089844, 12.25990104675293, -1.9050426483154297, 8.195892333984375, -0.547119140625, -0.3960094451904297, 3.817047119140625, 1.5812454223632812, 10.309562683105469, 2.053590774536133, 15.023460388183594, 4.630645751953125, 1.891693115234375, 6.4717254638671875, 9.208175659179688, -2.4800987243652344, -4.031124114990234, 6.971076965332031, 15.638671875, 2.355987548828125, 11.522514343261719, -2.6122703552246094, 12.950153350830078, -6.49273681640625, 1.9324817657470703, 3.7262725830078125, 21.433425903320312, 3.031158447265625, 10.055389404296875, 8.065624237060547, -16.484851837158203, 1.3364620208740234, 5.335914611816406, 16.847267150878906, -0.6960639953613281, -3.649078369140625, 6.29779052734375, 24.924896240234375, -0.9680099487304688, 8.271888732910156, -0.60467529296875, 3.2245311737060547, 18.86007308959961, -0.10758209228515625, 11.926067352294922, 9.678421020507812, 1.51275634765625, 14.42928695678711, 4.906190872192383, -3.0479373931884766, -2.6003799438476562, -5.517765045166016, 1.6983795166015625, 1.1283798217773438, 26.843345642089844, 14.640960693359375, 0.3448219299316406, 12.1949462890625, -0.8907985687255859, 20.4765625, 15.425670623779297, 10.008695602416992, -0.5087356567382812, 14.122314453125, -18.543991088867188, -12.072885513305664, -19.43140411376953, 2.4795303344726562, 3.5417404174804688, 16.702735900878906, 13.604705810546875, 5.603118896484375, 6.492584228515625, -2.5593700408935547, -7.818822860717773, 20.727279663085938, -19.153797149658203, 2.0327320098876953, -0.8094863891601562, -4.8021240234375, 8.728912353515625, 6.455671310424805, 23.477802276611328, 16.541549682617188, 6.638053894042969, 18.802894592285156, 11.769474029541016, -17.901599884033203, 16.942169189453125, -6.314037322998047, -7.7875823974609375, -7.993507385253906, -5.919544219970703, 5.436607360839844, 3.777423858642578, 10.528152465820312, 2.7643051147460938, 7.656005859375, -5.125331878662109, 7.8949127197265625, 3.082487106323242, 20.430856704711914, -14.540962219238281, -0.3180980682373047, 32.350181579589844, 3.7822914123535156, 0.9364814758300781, -5.513181686401367, 2.6872196197509766, 1.013580322265625, -4.306427001953125, 3.6787109375, -0.9031143188476562, 19.335418701171875, 14.717082977294922, 11.080551147460938, 16.844886779785156, 15.468982696533203, 21.177879333496094, 9.657054901123047, 10.833415985107422, 7.093166351318359], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000183.npy"}
{"epoch": 0.5545454545454546, "step": 184, "batch_size": 128, "mean": 4.822202205657959, "std": 9.013938903808594, "min": -18.655555725097656, "p10": -6.508023262023926, "median": 4.981382369995117, "p90": 17.049065399169923, "max": 26.449172973632812, "pos_frac": 0.7265625, "sample": [4.014453887939453, 17.1463623046875, 7.853593826293945, 1.8601417541503906, 2.380706787109375, -2.1415252685546875, 13.669471740722656, 15.008560180664062, 5.688652038574219, -4.2287750244140625, -5.058895111083984, 17.02871322631836, 0.68792724609375, 7.0879058837890625, -1.7283554077148438, -8.090211868286133, 10.551326751708984, 2.7687225341796875, 5.54429817199707, 2.7091407775878906, -2.1778945922851562, -5.835079193115234, 1.5615177154541016, 9.754066467285156, 7.641735076904297, 1.7244377136230469, 7.1239166259765625, 4.197010040283203, -9.462226867675781, 9.030887603759766, 11.971817016601562, 17.096553802490234, -1.2728805541992188, 2.8918495178222656, 3.0127410888671875, 7.72454833984375, 4.620216369628906, 5.3699188232421875, 18.360366821289062, 4.314977645874023, 8.1217041015625, -18.34836196899414, 3.6355743408203125, 12.759201049804688, 6.366888046264648, 20.321456909179688, 21.858062744140625, 16.820781707763672, -3.770172119140625, -16.486705780029297, 2.0888538360595703, 18.910240173339844, 6.566017150878906, -0.76837158203125, 4.478084564208984, 0.9343643188476562, 5.586660385131836, -1.14801025390625, 6.2795562744140625, 13.422386169433594, -8.572830200195312, 13.206979751586914, 7.463432312011719, 16.87659454345703, -18.655555725097656, 10.890731811523438, -2.5348167419433594, 6.620647430419922, -2.1053619384765625, 11.79461669921875, 17.427371978759766, 4.273937225341797, -9.198867797851562, -11.077857971191406, 0.09326934814453125, 17.248878479003906, -4.228492736816406, 5.438404083251953, 6.958839416503906, -6.9145660400390625, 5.722240447998047, -2.373638153076172, 9.591514587402344, 3.0182876586914062, 7.98834228515625, 1.7064208984375, 0.8525466918945312, 5.245780944824219, 6.458900451660156, -10.63330078125, 15.436424255371094, 22.446258544921875, 22.72174072265625, 12.223381042480469, 8.122852325439453, 2.7850799560546875, 0.14511871337890625, -1.8596878051757812, 15.003044128417969, -1.9890060424804688, 0.03539276123046875, 12.42047119140625, 20.023578643798828, 8.314247131347656, 16.08405113220215, 7.179018020629883, -13.564208984375, 16.814239501953125, 26.449172973632812, 8.407855987548828, -0.35843467712402344, 16.2789306640625, 11.304367065429688, -2.996530532836914, 19.456283569335938, 7.746543884277344, -6.397670745849609, -1.822845458984375, 4.716983795166016, -5.690605163574219, 4.565803527832031, -4.899726867675781, -6.765512466430664, 0.0428009033203125, 7.350433349609375, 8.514228820800781, -13.008140563964844, 1.394622802734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000184.npy"}
{"epoch": 0.5575757575757576, "step": 185, "batch_size": 128, "mean": 5.875006198883057, "std": 10.168135643005371, "min": -22.96335792541504, "p10": -7.009825325012207, "median": 4.3928375244140625, "p90": 18.752523040771486, "max": 28.021873474121094, "pos_frac": 0.7421875, "sample": [16.802196502685547, -11.869705200195312, 6.42930793762207, 13.914794921875, -15.68132209777832, 18.733734130859375, 11.213424682617188, 3.2242279052734375, 2.4137096405029297, 6.566497802734375, 1.99407958984375, 0.7091712951660156, 1.1927490234375, -13.328081130981445, -0.8752899169921875, -6.249711990356445, 3.1396522521972656, 16.748268127441406, 9.064573287963867, 20.329118728637695, 24.272811889648438, -1.108734130859375, 4.4705963134765625, -7.395912170410156, 12.0345458984375, -7.181694030761719, -0.6240615844726562, 0.47747039794921875, 2.770221710205078, 18.849109649658203, 0.5852203369140625, -4.913547515869141, 3.731475830078125, 3.186054229736328, 4.846778869628906, 15.107505798339844, 10.084016799926758, 13.092575073242188, 20.251094818115234, 3.195171356201172, 8.293281555175781, 16.5926513671875, -8.864124298095703, 6.410152435302734, 5.377815246582031, -6.518192291259766, 8.31646728515625, -1.7299880981445312, -1.815521240234375, 8.664413452148438, 4.000249862670898, 9.17473030090332, 2.270294189453125, 4.3150787353515625, 15.42138671875, 6.987712860107422, -3.7919998168945312, 25.81003189086914, 1.9042205810546875, 7.8742828369140625, -7.10511589050293, -7.802490234375, -10.38985824584961, 12.05147933959961, 7.022590637207031, 26.635414123535156, 14.12155532836914, 1.2610626220703125, 24.271148681640625, 1.6101226806640625, 28.021873474121094, -1.5501937866210938, 2.9842071533203125, -6.662559509277344, -6.968986511230469, 9.300308227539062, 1.467193603515625, 12.849197387695312, 16.83877944946289, 11.378707885742188, 1.6394805908203125, -12.300628662109375, 1.7998123168945312, 17.945289611816406, 24.8505859375, 10.81643295288086, 21.837024688720703, 16.363487243652344, 11.970022201538086, 17.66064453125, -22.96335792541504, 3.6359519958496094, 10.949920654296875, -1.3636322021484375, 4.688671112060547, 0.32857513427734375, 21.628448486328125, 9.41666030883789, 2.3620452880859375, -5.376167297363281, 0.6418418884277344, 16.605567932128906, 24.776931762695312, 7.636737823486328, 18.012611389160156, 9.126876831054688, 17.988815307617188, -1.9047412872314453, 1.96954345703125, 1.0208015441894531, -3.531269073486328, -5.0694580078125, 17.25750732421875, -19.067611694335938, 11.91802978515625, 7.7440338134765625, -1.1800270080566406, 18.796363830566406, 18.371261596679688, 12.393550872802734, 16.154205322265625, 2.1364593505859375, -3.967428207397461, -0.056423187255859375, 1.3358840942382812, 13.493392944335938, -8.210197448730469, 3.4887771606445312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000185.npy"}
{"epoch": 0.5606060606060606, "step": 186, "batch_size": 128, "mean": 5.5694074630737305, "std": 11.028128623962402, "min": -19.745494842529297, "p10": -8.622090148925782, "median": 5.365240097045898, "p90": 19.693540954589842, "max": 33.522850036621094, "pos_frac": 0.6640625, "sample": [20.609970092773438, -14.161722183227539, 26.820404052734375, 9.511127471923828, 19.886924743652344, -1.1744842529296875, 16.22974967956543, -2.1458587646484375, 0.5282611846923828, 13.052677154541016, 6.4950103759765625, 22.52773094177246, 5.6717529296875, -8.591094970703125, 0.9349784851074219, 0.8627071380615234, -11.31207275390625, -3.3730239868164062, 10.391887664794922, 4.7763824462890625, -2.0064773559570312, -13.876813888549805, -6.9699554443359375, 18.068191528320312, -3.518871307373047, 14.113780975341797, 5.3142547607421875, -9.419477462768555, 0.9267482757568359, 9.837265014648438, 16.117652893066406, 0.3259773254394531, -0.27801513671875, 2.225370407104492, 13.188796997070312, 14.209419250488281, -9.1190185546875, 23.743379592895508, 0.17178916931152344, 19.636627197265625, 16.74932098388672, 15.023544311523438, 8.381011962890625, 33.522850036621094, 20.353981018066406, 14.076438903808594, 13.888153076171875, 14.897964477539062, -8.694412231445312, 10.438735961914062, -1.7935428619384766, 7.5664825439453125, 9.90182876586914, -6.893871307373047, 5.508893966674805, 7.01887321472168, 12.311103820800781, 11.807258605957031, 6.289421081542969, 4.851737976074219, 17.50201416015625, 17.719959259033203, -14.688751220703125, -6.6658782958984375, -0.6060104370117188, 9.050811767578125, 14.764019012451172, -7.61376953125, 8.786537170410156, 11.574638366699219, 19.826339721679688, -7.436271667480469, 12.757339477539062, -19.745494842529297, -1.7689056396484375, 4.751136779785156, 2.6588382720947266, -2.221860885620117, -0.520477294921875, 16.3482666015625, -0.880950927734375, -1.8275604248046875, -15.965471267700195, 5.416225433349609, -15.824748992919922, 5.312955856323242, 15.344009399414062, -0.2826194763183594, 2.561025619506836, 1.9220123291015625, 4.362800598144531, -15.474555969238281, 8.166412353515625, 2.7992477416992188, 1.7938041687011719, -5.759618759155273, 16.86457061767578, 20.809226989746094, 22.8985595703125, 1.8624420166015625, 6.0561676025390625, 29.792236328125, 12.95855712890625, 9.191574096679688, 4.059406280517578, -1.88128662109375, -6.443389892578125, 17.07434844970703, 24.69495391845703, -6.575836181640625, 17.738967895507812, 15.636619567871094, 16.240814208984375, 5.653167724609375, 10.766754150390625, 15.819503784179688, -0.3640403747558594, 10.871313095092773, -18.885395050048828, -1.2852821350097656, -8.951759338378906, -4.408233642578125, 13.8848876953125, 26.55596923828125, -0.7632389068603516, -3.7864837646484375, -6.170860290527344, 1.3967819213867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000186.npy"}
{"epoch": 0.5636363636363636, "step": 187, "batch_size": 128, "mean": 6.361828804016113, "std": 10.800338745117188, "min": -20.90582275390625, "p10": -7.473765563964844, "median": 5.781292915344238, "p90": 19.41537055969238, "max": 35.467674255371094, "pos_frac": 0.7265625, "sample": [-7.000221252441406, 17.522817611694336, 16.038436889648438, 11.657318115234375, 16.96136474609375, -15.214237213134766, 0.107208251953125, 13.77816390991211, 11.155755996704102, 0.0340118408203125, -18.068099975585938, -2.8169097900390625, -0.337860107421875, -1.6869182586669922, 14.467517852783203, -9.444511413574219, 0.7416534423828125, 19.0673828125, 0.56817626953125, -1.0902652740478516, -8.885345458984375, 2.452911376953125, -1.0554962158203125, 13.555107116699219, 4.307826995849609, 7.3698883056640625, 9.630142211914062, 24.438247680664062, 3.452045440673828, 35.467674255371094, 10.564804077148438, 19.374008178710938, -1.501739501953125, 6.925619125366211, 13.975868225097656, -14.562400817871094, 1.0011749267578125, -3.465972900390625, -15.670495986938477, -1.4942703247070312, 16.760948181152344, 18.3206787109375, -5.540557861328125, 10.94148063659668, -0.41998291015625, 8.240636825561523, 24.478565216064453, 2.7262496948242188, 11.605140686035156, -7.44488525390625, -12.674110412597656, -6.04522705078125, 2.675811767578125, 13.384786605834961, -13.304580688476562, 23.720062255859375, 3.322376251220703, 17.36798095703125, 6.5849151611328125, 22.215774536132812, 7.420843124389648, 4.003658294677734, 3.643280029296875, 19.511882781982422, 30.448699951171875, 10.494819641113281, 6.190666198730469, -4.668115615844727, 5.437238693237305, 14.397010803222656, 15.773872375488281, 18.08135986328125, 22.35967254638672, 0.7300224304199219, -2.5785140991210938, 0.8702774047851562, 5.9624176025390625, 10.646024703979492, 1.7694454193115234, -3.0104827880859375, 4.225645065307617, 0.18318748474121094, 16.219009399414062, 14.404064178466797, -3.57470703125, -20.90582275390625, 10.192840576171875, 12.92837142944336, 12.415515899658203, 0.5214309692382812, 18.7103271484375, 14.040618896484375, 19.281539916992188, 12.578575134277344, 5.681131362915039, -0.16416358947753906, 22.37652587890625, 7.916208267211914, 4.512844085693359, 1.8795166015625, 5.8814544677734375, 20.56482696533203, 3.1155776977539062, -9.252487182617188, 10.872299194335938, 9.012100219726562, 1.7170467376708984, 23.515541076660156, 0.9797630310058594, 12.4307861328125, 5.237388610839844, 8.96295166015625, 2.596038818359375, -11.170320510864258, 0.7045612335205078, 27.317962646484375, 22.616683959960938, 18.87560272216797, -1.12274169921875, -1.1465911865234375, 15.702011108398438, 16.519805908203125, 6.019235610961914, 16.389312744140625, -9.356943130493164, -4.58160400390625, -0.6882476806640625, -7.5411529541015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000187.npy"}
{"epoch": 0.5666666666666667, "step": 188, "batch_size": 128, "mean": 6.0066375732421875, "std": 10.535955429077148, "min": -19.6627197265625, "p10": -6.511170959472656, "median": 4.752582550048828, "p90": 20.692664337158202, "max": 31.181488037109375, "pos_frac": 0.703125, "sample": [1.9378814697265625, -1.0613479614257812, 2.6437149047851562, 11.739105224609375, 13.041803359985352, 4.224523544311523, 19.95696258544922, -0.4217700958251953, 4.321998596191406, -7.7815704345703125, 6.5643768310546875, -5.798431396484375, 2.9868316650390625, 15.698333740234375, 12.330909729003906, -5.6172332763671875, -0.8949966430664062, -8.898338317871094, 2.8214263916015625, 7.7093353271484375, 11.608695983886719, 0.6977691650390625, 25.44442367553711, 1.4156112670898438, 8.84881591796875, 8.982391357421875, 11.387016296386719, 8.986396789550781, -15.099617004394531, 17.700355529785156, 2.6356945037841797, 0.4635963439941406, -6.37384033203125, 8.02444839477539, -6.607723236083984, 13.877159118652344, -2.0372772216796875, 18.116613388061523, 4.373851776123047, -2.948688507080078, 6.093650817871094, 15.506629943847656, -3.1729888916015625, 0.9716796875, 0.3623619079589844, 14.440977096557617, 3.6745834350585938, -4.189813613891602, 13.435943603515625, 5.60767936706543, 10.686466217041016, -19.6627197265625, 1.8220367431640625, 5.61065673828125, 20.897842407226562, 5.206428527832031, 9.401847839355469, -10.368602752685547, -0.47281646728515625, 15.156620025634766, -6.469791412353516, 12.14738655090332, -4.1826629638671875, -14.325363159179688, 16.015151977539062, -0.0400390625, -0.2141265869140625, -0.9171142578125, 12.041213989257812, -7.638021469116211, -6.020606994628906, 23.359405517578125, 20.19678497314453, 5.131313323974609, 11.312841415405273, 28.984771728515625, 9.215202331542969, 31.181488037109375, -3.0834808349609375, 11.340187072753906, 25.19014549255371, 7.930488586425781, -0.2607860565185547, 20.89898681640625, 19.11273193359375, -4.878025054931641, 20.682838439941406, -15.210952758789062, -0.693878173828125, 15.116260528564453, -4.4707794189453125, 3.792276382446289, 2.7454452514648438, 6.938152313232422, 27.729814529418945, 0.020008087158203125, 0.3323249816894531, 9.25925064086914, 20.106597900390625, 12.112375259399414, -16.800277709960938, -0.8188285827636719, -8.099365234375, 2.599893569946289, 23.974411010742188, 2.4984703063964844, 20.715591430664062, 23.243762969970703, 6.7252960205078125, 13.504478454589844, 18.537899017333984, 10.507284164428711, 9.545511245727539, -9.941009521484375, 15.812385559082031, 1.124725341796875, 10.505365371704102, 25.573394775390625, -4.842077255249023, 4.196525573730469, 13.413482666015625, 1.0270118713378906, 20.848201751708984, 13.563880920410156, -1.3558425903320312, 2.991191864013672, 2.3243589401245117, -13.02166748046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000188.npy"}
{"epoch": 0.5696969696969697, "step": 189, "batch_size": 128, "mean": 5.325828552246094, "std": 9.95116901397705, "min": -18.458229064941406, "p10": -6.499485969543457, "median": 4.508493423461914, "p90": 18.535761260986327, "max": 43.7235107421875, "pos_frac": 0.6875, "sample": [17.96691131591797, -6.764385223388672, 12.36178207397461, 15.203496932983398, 12.708412170410156, 4.871131896972656, -13.659626007080078, -7.032630920410156, -9.937156677246094, 17.34494400024414, -4.957948684692383, -6.385957717895508, 21.061813354492188, -0.43144798278808594, 4.514583587646484, 7.8394622802734375, 12.403585433959961, -0.3289966583251953, 8.657440185546875, -2.237150192260742, 7.6498565673828125, 6.847801208496094, 16.6090087890625, 3.6251678466796875, -6.023456573486328, 6.550601959228516, 14.599395751953125, -2.1547317504882812, -0.9351005554199219, 21.415138244628906, -4.1104278564453125, -5.496442794799805, 5.167327880859375, 14.747322082519531, 17.48839569091797, -9.870750427246094, 12.729888916015625, 8.059898376464844, 9.692806243896484, 11.380645751953125, 11.585580825805664, -11.6336669921875, 6.981658935546875, 23.384536743164062, 4.521083831787109, -4.913078308105469, 13.477500915527344, -3.182008743286133, -1.9942474365234375, 19.222076416015625, 6.116641998291016, 3.020526885986328, 2.3136749267578125, 20.08197784423828, 7.260648727416992, -4.298198699951172, 19.952232360839844, 7.455781936645508, -13.992116928100586, 3.133983612060547, 9.298711776733398, -0.537384033203125, 15.992095947265625, 8.846019744873047, 7.602806091308594, 3.8975143432617188, 0.7867889404296875, 2.372610092163086, 17.460540771484375, 16.867713928222656, -0.22058868408203125, -1.6791000366210938, 1.5017719268798828, 3.2330188751220703, 20.756881713867188, -2.8504486083984375, 14.201776504516602, 4.502403259277344, 2.8211708068847656, 3.8959197998046875, -12.360870361328125, 1.6248283386230469, 15.081893920898438, 2.1089096069335938, 4.814117431640625, -1.3928031921386719, 9.114822387695312, -3.307584762573242, 2.617523193359375, 6.920982360839844, 2.508373260498047, -0.8850593566894531, -1.0591964721679688, -11.597000122070312, -5.74127197265625, 18.471282958984375, 2.36871337890625, 24.198020935058594, -12.499610900878906, 2.0799617767333984, 26.186843872070312, -0.8264923095703125, 13.323333740234375, 12.407527923583984, 18.68621063232422, 4.9018707275390625, 1.8328590393066406, 7.88934326171875, -12.648757934570312, 2.956876754760742, 10.648941040039062, 6.59954833984375, 43.7235107421875, -3.6294708251953125, 3.3929977416992188, 6.808769226074219, 3.6319732666015625, 13.214485168457031, 1.6195564270019531, 20.293672561645508, -8.986825942993164, 12.332275390625, 3.2802734375, 5.146392822265625, -1.5376205444335938, -18.458229064941406, -1.2752971649169922, 18.710006713867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000189.npy"}
{"epoch": 0.5727272727272728, "step": 190, "batch_size": 128, "mean": 7.825458526611328, "std": 10.67859172821045, "min": -21.312885284423828, "p10": -5.068115997314452, "median": 6.683483123779297, "p90": 22.039104461669922, "max": 32.013580322265625, "pos_frac": 0.8046875, "sample": [12.092290878295898, -8.486953735351562, 13.909744262695312, 1.089874267578125, 4.065277099609375, 18.511566162109375, 11.54533576965332, -15.119243621826172, 2.9298248291015625, -3.0226058959960938, 16.087711334228516, 18.207839965820312, 32.013580322265625, 11.807708740234375, 21.127033233642578, 17.2366943359375, 3.439291000366211, 2.8526687622070312, -13.46258544921875, 12.069366455078125, 20.661407470703125, 8.987419128417969, 2.1904983520507812, -6.300792694091797, 0.7921695709228516, 9.636810302734375, 14.099334716796875, -1.0855789184570312, 1.446624755859375, -0.7145576477050781, 9.449222564697266, 2.0634632110595703, 2.0056915283203125, 4.3814239501953125, 6.932037353515625, 4.57464599609375, -3.3025054931640625, 19.82391357421875, 1.1281700134277344, 30.01556396484375, -0.619842529296875, -13.391510009765625, 10.065879821777344, -2.610595703125, 6.434928894042969, -12.016265869140625, 30.401023864746094, 4.560455322265625, -5.933540344238281, 7.452178955078125, -0.30379486083984375, 19.84284782409668, 15.725753784179688, 1.6334590911865234, 23.337787628173828, 18.2034912109375, 5.9523773193359375, 12.552986145019531, 10.930648803710938, 5.297613143920898, 4.062812805175781, 13.736228942871094, 12.69732666015625, 4.050498962402344, 5.304496765136719, 16.297595977783203, 12.989059448242188, 2.1633834838867188, -16.035858154296875, -7.243461608886719, 15.379325866699219, 23.97083282470703, 1.8506088256835938, 10.435600280761719, 11.519203186035156, 22.733217239379883, 3.7790756225585938, 9.827476501464844, 2.131305694580078, -21.312885284423828, 8.703529357910156, 2.602100372314453, 28.076759338378906, 4.009815216064453, 27.306533813476562, 14.397891998291016, 7.758026123046875, 5.264007568359375, 3.58685302734375, 23.064376831054688, 5.630889892578125, 3.1291160583496094, -4.6972198486328125, 22.355506896972656, 15.489471435546875, 15.570056915283203, -4.415582656860352, 19.19341278076172, 11.337158203125, 1.0092811584472656, -0.8213653564453125, 17.361631393432617, 1.9375762939453125, -6.212932586669922, 19.291053771972656, -3.504322052001953, 10.140077590942383, 24.83454132080078, -7.308965682983398, 5.216182708740234, 13.000717163085938, 10.667510986328125, 14.586654663085938, -16.395381927490234, 24.612852096557617, 29.89324951171875, 1.018035888671875, 12.141168594360352, 0.41301727294921875, 12.732154846191406, 2.1440353393554688, 21.90350341796875, 17.577302932739258, 17.93968963623047, 5.380001068115234, 10.016239166259766, 5.552680969238281, -3.361297607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000190.npy"}
{"epoch": 0.5757575757575758, "step": 191, "batch_size": 128, "mean": 4.561325550079346, "std": 10.270008087158203, "min": -23.294403076171875, "p10": -7.34630661010742, "median": 3.6506824493408203, "p90": 19.413994598388673, "max": 32.999114990234375, "pos_frac": 0.6484375, "sample": [-4.6628570556640625, 3.2923736572265625, -0.923095703125, -12.208721160888672, -23.294403076171875, 6.058984756469727, -1.201171875, 6.760307312011719, 12.518081665039062, -1.6924896240234375, 1.1334304809570312, 0.7602348327636719, 2.230043411254883, -6.868385314941406, 4.999696731567383, 22.757949829101562, 6.487567901611328, 4.075614929199219, -2.725606918334961, 20.33747673034668, -1.0592384338378906, -5.660682678222656, -11.487264633178711, 3.5993499755859375, -0.8920230865478516, -2.381010055541992, -0.771636962890625, 10.779022216796875, 12.663009643554688, 1.6949920654296875, -0.3165607452392578, -1.3777389526367188, 27.12749481201172, 7.767421722412109, 1.142974853515625, 0.40053749084472656, 9.005804061889648, 7.507789611816406, -1.2549610137939453, 32.999114990234375, -9.506973266601562, 9.72470474243164, -8.461456298828125, -13.452922821044922, 14.55474853515625, -1.1043949127197266, 9.514820098876953, 8.345085144042969, 16.128238677978516, -6.2574920654296875, 7.0518035888671875, 7.816511154174805, 9.322021484375, 27.715530395507812, 9.769647598266602, 3.7577342987060547, 23.474502563476562, -14.55438232421875, -3.26580810546875, 0.7639961242675781, -0.34593963623046875, 2.4090499877929688, 1.0404815673828125, 25.115032196044922, -11.741165161132812, -0.14444351196289062, 25.065765380859375, 11.725645065307617, 12.589607238769531, -1.8850936889648438, 13.36798095703125, -5.7548675537109375, -11.954124450683594, -1.4782562255859375, -14.923238754272461, 14.398834228515625, -0.9173297882080078, 15.108985900878906, 2.139068603515625, 1.4538650512695312, -11.24697494506836, -3.956035614013672, 20.643354415893555, -2.6679840087890625, -13.325981140136719, 1.6261558532714844, 3.7264480590820312, 5.680850982666016, -1.136260986328125, 3.7269210815429688, 2.899444580078125, 6.487068176269531, -1.6986160278320312, 5.343217849731445, 19.625572204589844, 0.6663093566894531, 19.323318481445312, 8.711370468139648, 5.6504364013671875, 11.35394287109375, -2.3812637329101562, 1.6654052734375, 18.449758529663086, 0.5134429931640625, -2.552032470703125, 3.702014923095703, 9.598861694335938, 9.277381896972656, -5.315704345703125, 27.87725830078125, 12.503814697265625, 4.330852508544922, -3.4091796875, 25.439292907714844, 5.6514892578125, 10.608291625976562, 3.2566871643066406, 16.303184509277344, -9.041000366210938, -6.647056579589844, 9.303451538085938, 4.869976043701172, 10.034976959228516, 26.982162475585938, 6.240455627441406, 10.875320434570312, 12.277227401733398, 8.07480239868164], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000191.npy"}
{"epoch": 0.5787878787878787, "step": 192, "batch_size": 128, "mean": 6.356161117553711, "std": 10.93486499786377, "min": -18.96563720703125, "p10": -7.321409606933593, "median": 5.46859073638916, "p90": 21.69983539581299, "max": 33.061912536621094, "pos_frac": 0.734375, "sample": [0.2654247283935547, 14.380325317382812, 14.844512939453125, 4.7413787841796875, -7.53955078125, -7.223503112792969, 7.272426605224609, 2.0620365142822266, 21.24560546875, -3.2553672790527344, 24.569046020507812, 18.09421157836914, 8.503921508789062, -5.890380859375, 25.06391143798828, 3.55670166015625, -7.9813385009765625, 26.351348876953125, -1.2768478393554688, 18.737632751464844, 8.561820983886719, -15.812103271484375, -1.839996337890625, 1.621795654296875, -15.403495788574219, 25.163658142089844, 3.456655502319336, 27.33910369873047, 15.218681335449219, 1.9721527099609375, -1.09356689453125, 17.39678955078125, 2.448455810546875, -6.514793395996094, 2.9329071044921875, 0.704315185546875, 4.451591491699219, 8.835399627685547, 13.411670684814453, -3.3801422119140625, 18.194705963134766, 9.40521240234375, 7.510887145996094, -8.930694580078125, 6.552701950073242, 29.458984375, 21.693037033081055, -6.2858123779296875, 1.6051712036132812, 1.9551467895507812, 4.4366912841796875, -0.5936183929443359, 19.434120178222656, 1.277578353881836, -0.6884002685546875, 7.138603210449219, 18.67650604248047, -7.76593017578125, -4.057037353515625, -10.88015365600586, 20.1297550201416, -14.929069519042969, 21.991474151611328, 15.974908828735352, -7.2279205322265625, 23.590362548828125, 8.411258697509766, -2.1562652587890625, 2.6088180541992188, 11.578914642333984, 6.2076568603515625, 13.533454895019531, 6.6504669189453125, 26.479400634765625, -4.68292236328125, 20.530609130859375, 4.282623291015625, 10.5950927734375, 7.831867218017578, -16.111671447753906, 1.173971176147461, 6.096828460693359, 5.370994567871094, -2.5465030670166016, 21.7156982421875, 3.9634475708007812, -18.96563720703125, 11.571144104003906, 1.1312789916992188, 5.566186904907227, 4.04119873046875, 0.0474090576171875, 1.5041656494140625, 15.847335815429688, 6.131687164306641, -0.7526359558105469, 18.276355743408203, -1.8010673522949219, 2.97601318359375, 7.8130340576171875, 4.2643280029296875, 4.727912902832031, 4.718784332275391, 6.365119934082031, -5.884824752807617, 10.748043060302734, 31.120155334472656, 8.603973388671875, -8.852081298828125, 6.29779052734375, 22.149425506591797, 11.702674865722656, 4.719696044921875, 10.13165283203125, 20.45989227294922, -13.480772018432617, 33.061912536621094, 16.231094360351562, 6.7036590576171875, 2.442638397216797, -8.283973693847656, 15.603034973144531, -0.7255172729492188, -6.527860641479492, 13.33160400390625, 8.597686767578125, 6.400705337524414, 10.382102966308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000192.npy"}
{"epoch": 0.5818181818181818, "step": 193, "batch_size": 128, "mean": 5.242209434509277, "std": 9.733969688415527, "min": -22.03619384765625, "p10": -6.306342315673828, "median": 5.322559356689453, "p90": 17.68072052001953, "max": 31.565650939941406, "pos_frac": 0.703125, "sample": [-6.333728790283203, 0.018306732177734375, 15.339437484741211, 4.6874542236328125, 2.524730682373047, 4.6532440185546875, 2.37652587890625, 14.91476058959961, 21.37423324584961, 17.63055419921875, -0.301116943359375, 3.6491355895996094, -3.3220901489257812, -10.023284912109375, 1.8815078735351562, 13.167854309082031, 17.797775268554688, -1.343719482421875, 7.029167175292969, 21.97162628173828, 10.284996032714844, 7.3571624755859375, 11.036203384399414, 18.099061965942383, -2.5944595336914062, 3.63037109375, 5.357940673828125, 11.486211776733398, -3.0425186157226562, -2.60723876953125, 6.786975860595703, 10.084884643554688, 6.145332336425781, 7.713039398193359, -6.294605255126953, 7.212409973144531, 8.997716903686523, 12.392372131347656, 10.787773132324219, 16.929168701171875, 21.35335350036621, 11.041671752929688, 5.43797492980957, 13.399955749511719, -3.9852371215820312, 5.302268981933594, 1.3217697143554688, -7.8975372314453125, -2.055025100708008, -13.105026245117188, 14.931808471679688, 8.057937622070312, 2.1240615844726562, 20.748931884765625, 15.5826416015625, -6.882011413574219, 1.2473030090332031, 18.706531524658203, -4.404327392578125, 27.871231079101562, -4.639331817626953, 2.5239715576171875, 18.548507690429688, 16.808456420898438, 17.093414306640625, -5.788089752197266, 0.8366832733154297, 10.515359878540039, 23.992889404296875, -0.1956329345703125, -1.4729080200195312, 5.646148681640625, 4.1521148681640625, 17.21211051940918, 6.0505523681640625, 10.022308349609375, 5.156827926635742, 13.891868591308594, 10.7646484375, -7.604450225830078, -3.1969833374023438, -9.069305419921875, -13.849128723144531, -2.0291709899902344, -11.809951782226562, 2.32330322265625, 4.668495178222656, 18.848045349121094, 0.5402641296386719, 31.565650939941406, 2.216094970703125, -1.1601715087890625, -22.03619384765625, 8.63507080078125, 21.3662109375, -3.9022140502929688, 3.0629959106445312, 7.1135711669921875, 5.5012969970703125, 5.745880126953125, -15.352071762084961, -1.9866447448730469, 0.953765869140625, 10.577980041503906, -5.749000549316406, 9.395183563232422, 16.64281463623047, -2.71435546875, 5.711645126342773, 5.471649169921875, 3.7686767578125, 12.10211181640625, -2.785614013671875, -13.81004524230957, -0.0482177734375, 4.12115478515625, 16.75299072265625, -0.8108177185058594, 2.742950439453125, 11.616096496582031, 5.747528076171875, 5.3428497314453125, 3.9894256591796875, 15.057441711425781, 11.204444885253906, -0.502197265625, 11.033981323242188, -21.7735595703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000193.npy"}
{"epoch": 0.5848484848484848, "step": 194, "batch_size": 128, "mean": 6.208719730377197, "std": 10.552331924438477, "min": -17.516944885253906, "p10": -6.30956916809082, "median": 5.066561698913574, "p90": 19.92926483154297, "max": 33.26664733886719, "pos_frac": 0.7265625, "sample": [14.57293701171875, 1.6471786499023438, 16.65593147277832, -0.0355377197265625, 0.275665283203125, 26.566741943359375, 1.3103866577148438, 2.1757774353027344, 11.958206176757812, 14.058258056640625, 8.299480438232422, -8.407012939453125, 14.56121826171875, 19.494434356689453, 8.198333740234375, -3.0217247009277344, 5.7945404052734375, 17.445066452026367, -5.4370574951171875, 10.776466369628906, 18.406827926635742, 1.53564453125, 10.81773567199707, -11.559501647949219, -1.628499984741211, 20.029891967773438, -5.7894744873046875, 12.23480224609375, 6.50018310546875, 7.3272705078125, -1.1114425659179688, 5.175344467163086, 14.416576385498047, 32.0068359375, 5.452169418334961, -0.3276939392089844, 3.774578094482422, 10.207717895507812, 16.397207260131836, 0.7345771789550781, -1.7464313507080078, 15.445556640625, -2.957000732421875, 3.751893997192383, 11.257461547851562, 7.523321151733398, 0.18856048583984375, 0.290435791015625, 0.0046939849853515625, 14.733551025390625, 9.547664642333984, -16.25794219970703, -4.658256530761719, 17.765769958496094, 17.38799285888672, 24.235549926757812, -2.2604293823242188, 6.442623138427734, 23.180648803710938, 19.034515380859375, 25.146026611328125, 0.8304195404052734, 6.873401641845703, 9.726058959960938, 22.671249389648438, 13.951004028320312, 11.218841552734375, 29.834300994873047, -0.9769515991210938, 5.517425537109375, 9.644302368164062, 1.856903076171875, 0.08964729309082031, 33.26664733886719, 4.057155609130859, 14.823625564575195, 19.886138916015625, -0.8597183227539062, 22.198440551757812, 3.8751678466796875, 0.15795135498046875, 17.50287628173828, -5.8121337890625, 3.2876129150390625, -3.43695068359375, -12.908905029296875, 6.1875152587890625, -13.262248992919922, 4.4400634765625, 23.735872268676758, 0.042781829833984375, 23.445581436157227, 1.0511016845703125, 3.99737548828125, -17.516944885253906, -11.663368225097656, -0.3254241943359375, -3.8523483276367188, 3.5987167358398438, 12.786598205566406, -6.209194183349609, 8.211261749267578, -2.719390869140625, 23.3577880859375, 18.006656646728516, -12.073968887329102, 11.076202392578125, 8.874771118164062, 5.391353607177734, 3.389698028564453, -3.240795135498047, -5.411613464355469, 4.548088073730469, 17.963531494140625, -11.17791748046875, 9.699539184570312, 19.083072662353516, 4.696800231933594, -2.5104904174804688, 3.8372459411621094, 9.415802001953125, 4.9577789306640625, -7.2696533203125, -6.5437774658203125, 1.9861297607421875, -6.762565612792969, -11.288551330566406, 5.942346572875977], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000194.npy"}
{"epoch": 0.5878787878787879, "step": 195, "batch_size": 128, "mean": 6.631426811218262, "std": 9.904552459716797, "min": -23.382057189941406, "p10": -4.847399139404297, "median": 5.929265975952148, "p90": 18.843267059326173, "max": 33.887733459472656, "pos_frac": 0.7421875, "sample": [-2.3885421752929688, -8.781618118286133, 12.29736328125, 11.249221801757812, 4.028844833374023, 8.964836120605469, 14.484092712402344, 16.929828643798828, 3.8893203735351562, 12.09527587890625, 3.6005401611328125, 5.117156982421875, 18.677005767822266, 21.28142547607422, -1.6387214660644531, 7.882804870605469, -2.4035186767578125, 22.37078094482422, 18.85346221923828, 4.91595458984375, 27.34893798828125, 2.100139617919922, -7.289192199707031, -14.449193954467773, -15.614105224609375, 6.045379638671875, 1.7734794616699219, 1.964202880859375, 9.36572265625, -3.299409866333008, -5.739585876464844, 26.478469848632812, 18.838897705078125, 15.01611328125, 3.025421142578125, -23.382057189941406, 6.3802642822265625, 11.156639099121094, 8.411643981933594, 13.566783905029297, 12.0838623046875, -5.584999084472656, 21.450748443603516, -8.882930755615234, 16.634597778320312, 14.365692138671875, 6.4791717529296875, 12.502058029174805, 15.13909912109375, -4.791053771972656, 0.6371517181396484, -4.830270767211914, 8.080284118652344, 3.3954315185546875, 13.3978271484375, -2.7355880737304688, 17.314491271972656, 5.233512878417969, 5.226341247558594, 27.547256469726562, -1.5654773712158203, 14.137052536010742, 5.924922943115234, -1.5579032897949219, 1.6833038330078125, 9.527894973754883, 15.945880889892578, -1.2819671630859375, 13.331573486328125, 9.209968566894531, 3.4043922424316406, 19.724544525146484, 16.010265350341797, -8.495479583740234, 6.764530181884766, -0.25637054443359375, 8.560638427734375, 6.4978790283203125, 6.8271484375, 27.48926544189453, 5.9336090087890625, 9.097419738769531, -4.887365341186523, 3.4694442749023438, -15.65890121459961, 10.480422973632812, 21.24327850341797, -1.0360355377197266, 12.72698974609375, 0.203582763671875, 12.712898254394531, 7.480014801025391, 4.310794830322266, 4.86817741394043, 2.152212142944336, 3.9271583557128906, -3.202718734741211, 3.1656036376953125, 3.160421371459961, -3.6331405639648438, 17.071529388427734, -0.5877647399902344, -2.0614585876464844, 33.887733459472656, 14.07305908203125, 19.291494369506836, 2.94952392578125, -10.886138916015625, 3.2529449462890625, 31.83448028564453, -5.1316375732421875, 7.758945465087891, 11.449470520019531, 3.4576034545898438, -0.5464591979980469, 12.816448211669922, -0.440765380859375, 14.541824340820312, 1.866464614868164, 2.9452476501464844, 2.0701637268066406, -0.8173904418945312, 4.843278884887695, -0.3514728546142578, 10.306053161621094, 15.249366760253906, 7.2969970703125, 18.500362396240234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000195.npy"}
{"epoch": 0.5909090909090909, "step": 196, "batch_size": 128, "mean": 8.299646377563477, "std": 12.597726821899414, "min": -24.570903778076172, "p10": -6.258211135864257, "median": 8.667743682861328, "p90": 21.774534606933592, "max": 44.190277099609375, "pos_frac": 0.734375, "sample": [6.354827880859375, -24.570903778076172, 21.098522186279297, 5.604377746582031, 16.142745971679688, 22.822097778320312, -1.234292984008789, 23.291259765625, -12.326898574829102, 11.123519897460938, -5.7392730712890625, -0.640533447265625, -2.174530029296875, 4.620395660400391, 6.5475616455078125, 11.372074127197266, 10.034072875976562, -0.928253173828125, -6.972259521484375, -3.9867477416992188, 3.4387950897216797, 12.95361328125, 23.376953125, 27.195831298828125, 19.849925994873047, 20.06548309326172, -14.806808471679688, -4.865043640136719, 19.461292266845703, 12.75283432006836, 19.18585205078125, 17.888973236083984, 12.1685791015625, 39.5069580078125, 9.34649658203125, -13.589469909667969, 19.684326171875, 1.208709716796875, 19.954090118408203, 34.323036193847656, 13.305152893066406, -5.952190399169922, -11.976432800292969, -3.868227005004883, 14.877243041992188, -0.7177238464355469, 44.190277099609375, 14.39013671875, 5.562656402587891, 13.566165924072266, -17.465063095092773, -14.128185272216797, 2.8342552185058594, 12.274251937866211, 20.721099853515625, 1.0005416870117188, 17.80996322631836, 3.3817691802978516, 5.753204345703125, 3.80120849609375, -1.257049560546875, 9.317054748535156, 7.781024932861328, 8.759658813476562, -17.13592529296875, 2.6281700134277344, 7.8804473876953125, 9.064468383789062, 8.445480346679688, 3.787923812866211, 21.805862426757812, -2.6279468536376953, 4.7884368896484375, 11.059799194335938, 21.034744262695312, 13.051231384277344, 7.326202392578125, -0.03894805908203125, 4.1284332275390625, -11.151664733886719, 11.845420837402344, 4.5754852294921875, -4.86456298828125, 14.082008361816406, 2.1320648193359375, 8.575828552246094, 9.270942687988281, 17.424419403076172, 21.035423278808594, 5.3719024658203125, 0.2117328643798828, 40.183013916015625, 0.23433685302734375, 3.345458984375, -5.485931396484375, -0.5574989318847656, 22.725616455078125, 21.50480079650879, 12.92779541015625, -20.661422729492188, 17.070358276367188, 13.120307922363281, 21.601516723632812, 11.983062744140625, 18.434219360351562, 20.238433837890625, 19.891429901123047, 10.51443862915039, 13.442131042480469, -12.714508056640625, 0.5812454223632812, 28.678573608398438, 32.31495666503906, 31.741958618164062, -7.665153503417969, 21.7611083984375, -0.2017841339111328, 1.9892501831054688, -0.2283935546875, -1.1805267333984375, 12.519378662109375, -5.9419708251953125, 6.705108642578125, 13.7911376953125, -0.07988739013671875, 16.1866397857666, 9.383323669433594, 16.995765686035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000196.npy"}
{"epoch": 0.593939393939394, "step": 197, "batch_size": 128, "mean": 7.150927543640137, "std": 9.127269744873047, "min": -15.294876098632812, "p10": -3.3246639251708983, "median": 6.998452186584473, "p90": 19.417772483825683, "max": 30.63599395751953, "pos_frac": 0.71875, "sample": [10.982810974121094, -1.5674514770507812, 11.111015319824219, 17.21426773071289, 18.776865005493164, 12.226646423339844, 3.3067665100097656, 3.3465118408203125, 9.544425964355469, 18.185161590576172, -1.1094322204589844, 2.8397445678710938, -3.0265674591064453, 3.897043228149414, -3.22515869140625, 30.63599395751953, 22.694046020507812, 20.935504913330078, 3.365415573120117, 11.5184326171875, -6.727882385253906, 24.4752197265625, -1.5910720825195312, -2.6555843353271484, -0.6779651641845703, 4.4683837890625, 14.259695053100586, 28.07762908935547, 13.635963439941406, -5.828636169433594, 7.977512359619141, 17.644960403442383, 0.7731590270996094, 5.5649871826171875, 4.952968597412109, -4.476875305175781, 22.19479751586914, 13.787487030029297, 9.244209289550781, -5.196666717529297, 7.5064544677734375, 16.045303344726562, -1.3212089538574219, 18.55200958251953, 17.24651336669922, 13.385948181152344, 3.55859375, -2.1763172149658203, 7.779874801635742, 4.7910308837890625, 9.903146743774414, -0.5037269592285156, 3.7530975341796875, 12.66982650756836, 13.611587524414062, -0.5326042175292969, 1.5417938232421875, -1.291351318359375, -15.294876098632812, -5.227203369140625, 4.626543045043945, 19.553499221801758, 8.793914794921875, 6.58502197265625, -0.9650650024414062, 22.06287384033203, 19.359603881835938, -0.8743858337402344, -0.8757476806640625, 5.555412292480469, -12.265853881835938, 5.861385345458984, -0.9648284912109375, -7.370336532592773, -1.325592041015625, 9.955364227294922, 13.668212890625, -6.4121551513671875, 8.40945053100586, 8.26611328125, 25.193078994750977, 13.406837463378906, 5.18995475769043, 9.131195068359375, -0.8052692413330078, 9.482162475585938, 13.265396118164062, -1.1888351440429688, -3.556842803955078, 10.666450500488281, -4.6400299072265625, 7.483848571777344, 11.019859313964844, 12.173103332519531, 6.431465148925781, 7.478086471557617, 22.70703887939453, 9.533357620239258, 22.066802978515625, 8.501667022705078, 2.017383575439453, -2.192150115966797, -3.1912002563476562, 11.083566665649414, 12.679035186767578, 21.06468963623047, 15.474420547485352, 18.224571228027344, 4.654314041137695, 1.6057395935058594, -8.8529052734375, 5.7628021240234375, 7.411882400512695, -2.9473838806152344, -0.206146240234375, 12.101459503173828, 6.459527969360352, 24.391357421875, -14.192567825317383, 10.762985229492188, 10.264179229736328, 14.95870590209961, 4.886907577514648, 3.8121337890625, 2.8276519775390625, 17.589033126831055, 5.676540374755859, 12.457183837890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000197.npy"}
{"epoch": 0.5969696969696969, "step": 198, "batch_size": 128, "mean": 5.831066608428955, "std": 9.7578706741333, "min": -18.893463134765625, "p10": -5.526402473449707, "median": 5.063658714294434, "p90": 19.45699691772461, "max": 31.490463256835938, "pos_frac": 0.734375, "sample": [11.376115798950195, 21.186996459960938, 11.242263793945312, 2.310802459716797, 16.383377075195312, 6.58270263671875, -3.0712509155273438, 14.423683166503906, -3.4493408203125, 4.548288345336914, -5.353851318359375, 6.1426849365234375, 0.59527587890625, -3.1485595703125, 0.1733551025390625, 6.5496063232421875, 3.01251220703125, 1.0284881591796875, -13.7418212890625, 14.048149108886719, -5.820671081542969, 4.745567321777344, 13.634185791015625, -8.52017593383789, -2.1969070434570312, -2.7357044219970703, 1.9628639221191406, 19.762619018554688, -8.376266479492188, -1.0116844177246094, 9.198204040527344, 16.516502380371094, 23.681140899658203, -12.618240356445312, 2.5858917236328125, 1.4305877685546875, 16.376800537109375, 17.676719665527344, 5.498466491699219, 25.738515853881836, 17.386585235595703, -12.614559173583984, 10.701667785644531, 12.490928649902344, 5.154024124145508, -2.41278076171875, 11.967580795288086, 31.490463256835938, -1.2671585083007812, 0.20603561401367188, 21.397804260253906, 1.5187435150146484, 16.615041732788086, 15.723762512207031, -4.622901916503906, 8.200592041015625, -7.401275634765625, 7.228364944458008, 6.225561141967773, -8.64202880859375, 5.268289566040039, -10.823989868164062, 4.207679748535156, 12.069877624511719, 19.476062774658203, 28.096145629882812, 19.44882583618164, 23.003868103027344, 18.98529815673828, 15.578712463378906, 4.973293304443359, 12.44537353515625, 8.47833251953125, 12.085639953613281, 0.2387847900390625, 7.398647308349609, 7.479949951171875, -6.7406463623046875, 28.364601135253906, 0.8517341613769531, 4.100862503051758, 24.592437744140625, -0.908050537109375, 6.222587585449219, 9.923894882202148, 7.109222412109375, 20.72317886352539, 2.9617919921875, 9.692840576171875, -3.7520370483398438, 5.2966461181640625, -4.492042541503906, 0.36046600341796875, -1.6369552612304688, 3.1684703826904297, 16.372718811035156, -3.1417007446289062, 6.253923416137695, 4.811229705810547, 1.3056182861328125, 9.757736206054688, -5.474843978881836, 4.546352386474609, 0.5246639251708984, 3.6492233276367188, 9.743927001953125, 2.3547897338867188, -18.893463134765625, 15.263023376464844, 8.665382385253906, 1.8934688568115234, -16.044811248779297, -2.2093887329101562, 10.762798309326172, -1.6826820373535156, 11.237045288085938, 8.312911987304688, 9.456245422363281, 2.4210128784179688, 3.1751708984375, -0.06818008422851562, 20.8708438873291, 2.8096141815185547, 6.758216857910156, -2.4854888916015625, -3.2304840087890625, -5.646705627441406, 12.344280242919922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000198.npy"}
{"epoch": 0.6, "step": 199, "batch_size": 128, "mean": 6.254915237426758, "std": 10.916333198547363, "min": -37.50761413574219, "p10": -5.987427902221679, "median": 7.004732131958008, "p90": 18.775133514404295, "max": 36.44830322265625, "pos_frac": 0.71875, "sample": [-2.5263099670410156, 5.680900573730469, -14.271846771240234, 11.729072570800781, -1.4045772552490234, 7.9200592041015625, 17.399751663208008, 4.316398620605469, -22.89678955078125, -5.847259521484375, 11.173309326171875, -1.0121307373046875, 8.47719955444336, 1.049224853515625, 10.860595703125, 13.180831909179688, 14.080215454101562, 0.5764007568359375, 17.919692993164062, 0.62933349609375, 5.772071838378906, 18.723037719726562, 15.062263488769531, 4.065929412841797, -12.124671936035156, 0.1271209716796875, 5.036102294921875, 2.340087890625, 4.107124328613281, 6.2157440185546875, -1.7000885009765625, -2.3169898986816406, 11.21649169921875, 8.460159301757812, 1.3531150817871094, 9.279350280761719, 10.04986572265625, 14.535324096679688, 15.40756607055664, 17.235557556152344, 4.071556091308594, -20.208938598632812, -8.88031005859375, 10.960578918457031, 16.954566955566406, 5.5658721923828125, 6.220024108886719, 3.8612613677978516, 13.599494934082031, 22.0716552734375, 19.615955352783203, 4.433624267578125, -37.50761413574219, -6.9326629638671875, 28.486114501953125, -7.134185791015625, 5.317935943603516, 8.36170768737793, -0.26479339599609375, 7.3860626220703125, -17.605140686035156, -5.686431884765625, -2.2678890228271484, 25.44031524658203, 8.681573867797852, 10.0269775390625, 15.485160827636719, 8.309188842773438, 17.41778564453125, 0.7591171264648438, 22.61475372314453, 7.055328369140625, 13.806495666503906, -5.0635223388671875, 11.769973754882812, -2.906759262084961, 13.066360473632812, 7.566764831542969, 24.272216796875, 27.721878051757812, 7.97650146484375, 11.264015197753906, 9.136466979980469, -4.912532806396484, -1.1369705200195312, 18.896690368652344, 10.745193481445312, 4.780914306640625, 1.2660484313964844, 17.523035049438477, -3.3686752319335938, 36.44830322265625, 6.6810302734375, 0.13080596923828125, 11.078376770019531, -10.014305114746094, -5.200233459472656, 22.928478240966797, 16.086322784423828, 12.248146057128906, -6.454826354980469, -2.1621665954589844, 12.114982604980469, -2.7735977172851562, -0.7918167114257812, 23.833194732666016, 24.004785537719727, 10.361785888671875, 5.404315948486328, 8.077430725097656, -0.41538238525390625, 8.7318115234375, -3.5520401000976562, -7.725341796875, 7.65031623840332, 10.544361114501953, 9.28740119934082, 17.972373962402344, -0.12774276733398438, -0.9522323608398438, 18.57586669921875, -6.314487457275391, 3.1117477416992188, -1.2240524291992188, 6.954135894775391, 12.7406005859375, 23.02791404724121, 1.8509445190429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000199.npy"}
{"epoch": 0.603030303030303, "step": 200, "batch_size": 128, "mean": 6.371861934661865, "std": 10.510930061340332, "min": -20.5196533203125, "p10": -4.7092750549316404, "median": 4.622261047363281, "p90": 21.344076538085936, "max": 38.85504150390625, "pos_frac": 0.6875, "sample": [18.07250213623047, 3.094940185546875, -1.6011505126953125, 14.424560546875, -0.928436279296875, 13.919647216796875, 22.525840759277344, 9.06460952758789, 10.641571044921875, 5.448760986328125, 1.4840774536132812, 27.544601440429688, -20.5196533203125, -0.3153533935546875, -12.44384765625, 11.805086135864258, 10.000442504882812, 4.353645324707031, 3.0678863525390625, 2.1262359619140625, 7.0717315673828125, 22.045440673828125, 10.020721435546875, 19.732406616210938, -3.4760665893554688, -1.8407669067382812, 22.40978240966797, 15.754318237304688, 16.903026580810547, 13.434295654296875, 13.933650970458984, 21.771087646484375, -3.3663711547851562, -3.3554763793945312, -9.185310363769531, 4.869049072265625, 11.368875503540039, -7.541912078857422, 4.6689605712890625, 3.033203125, 26.498779296875, -0.0304412841796875, 8.172611236572266, 38.85504150390625, 6.966648101806641, 2.2955589294433594, 1.3338546752929688, -2.244413375854492, -0.8419723510742188, -1.9001750946044922, 28.80060577392578, -3.4857025146484375, 18.85430145263672, 15.139764785766602, 4.032173156738281, 1.091958999633789, 5.2239837646484375, 25.97814178466797, 6.1457061767578125, 11.932331085205078, 22.929466247558594, 0.017669677734375, 15.201759338378906, -2.0577468872070312, -2.56622314453125, 4.5755615234375, 0.4560222625732422, 1.69097900390625, 17.675018310546875, 16.580841064453125, 6.013221740722656, 11.29859733581543, -2.088134765625, 20.088024139404297, 12.347938537597656, 13.094303131103516, 2.6864013671875, -1.1349964141845703, 11.596893310546875, -3.0155258178710938, 8.157058715820312, 3.1198577880859375, 22.28619956970215, -1.3817672729492188, -4.8018646240234375, -1.2223129272460938, 21.16107177734375, 12.959800720214844, 7.403297424316406, 28.568859100341797, -0.43603515625, 0.472808837890625, -6.786802291870117, 7.4505615234375, -8.50149917602539, 14.985374450683594, 16.763641357421875, 19.962099075317383, -0.7123222351074219, -7.504644393920898, 4.967376708984375, 2.5536231994628906, 1.3171443939208984, 1.429168701171875, -4.1087646484375, 9.743757247924805, -3.7873382568359375, 8.122997283935547, 0.08400726318359375, -4.892208099365234, -6.52294921875, -2.1192150115966797, 18.59973907470703, 13.291641235351562, -14.492210388183594, 13.581817626953125, -4.403083801269531, 3.6006011962890625, -4.669593811035156, -4.226936340332031, 30.439849853515625, 6.779335021972656, 4.8053741455078125, 4.41363525390625, -14.959247589111328, -5.63946533203125, 1.3253555297851562, 10.195144653320312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000200.npy"}
{"epoch": 0.6060606060606061, "step": 201, "batch_size": 128, "mean": 8.068527221679688, "std": 10.407020568847656, "min": -17.8643798828125, "p10": -3.6463214874267575, "median": 7.607501983642578, "p90": 21.581463241577147, "max": 36.277435302734375, "pos_frac": 0.78125, "sample": [-1.508697509765625, 10.609245300292969, 20.444793701171875, 12.273025512695312, -0.5978126525878906, 0.9448051452636719, 8.781143188476562, 4.309274673461914, 14.087112426757812, 23.039031982421875, 21.57464599609375, 19.590377807617188, 2.9922637939453125, 1.2820816040039062, 6.973241806030273, 10.453632354736328, 2.505136489868164, 16.54119873046875, -2.9303436279296875, 21.597370147705078, 0.13370132446289062, 16.159019470214844, 5.431825637817383, 1.008758544921875, 18.81048583984375, 19.23499298095703, 14.706634521484375, 2.2963790893554688, 1.6777381896972656, 0.51507568359375, -2.933582305908203, 10.13398551940918, 0.7189788818359375, -1.9492568969726562, 28.63189697265625, 2.8236541748046875, -9.404579162597656, -5.758674621582031, 3.752613067626953, 5.4734039306640625, -8.661407470703125, -9.532676696777344, 16.447265625, 10.805572509765625, 11.942752838134766, 10.885009765625, 16.01691436767578, 5.7773590087890625, -1.0476913452148438, 25.63544464111328, 10.716773986816406, -11.524677276611328, 23.683425903320312, 5.306846618652344, 7.28289794921875, 24.459091186523438, 17.957534790039062, 7.35589599609375, 20.831710815429688, 32.10814666748047, 11.133729934692383, -0.3357124328613281, 11.539077758789062, 36.120849609375, 12.26626205444336, 13.87530517578125, 7.859107971191406, 16.021652221679688, 9.818283081054688, 11.892044067382812, 28.480636596679688, 1.373016357421875, 3.7687911987304688, 5.20884895324707, -1.8421134948730469, 5.332855224609375, 18.669525146484375, -0.3750877380371094, -2.2993736267089844, -5.721961975097656, 8.283233642578125, 13.301036834716797, 6.11279296875, 3.558656692504883, 14.457351684570312, 16.477582931518555, 6.248222351074219, 2.7012405395507812, 2.492279052734375, -0.4409027099609375, 12.804771423339844, 13.227516174316406, 11.791322708129883, 16.85523223876953, -3.631866455078125, 3.8146820068359375, 10.416610717773438, 9.972335815429688, 3.033161163330078, 23.220054626464844, 11.019073486328125, -7.424858093261719, 17.723602294921875, 8.520370483398438, 19.482666015625, 0.7401123046875, 10.934669494628906, 22.158111572265625, 9.268314361572266, 5.042949676513672, 36.277435302734375, 26.186431884765625, -4.644992828369141, -2.0889930725097656, -17.8643798828125, 15.247425079345703, 12.71087646484375, -3.6800498962402344, 4.520790100097656, -8.294189453125, 0.7699203491210938, -13.9566650390625, -15.721168518066406, 11.216529846191406, 4.366935729980469, 16.070022583007812, -1.6541213989257812, -2.5071334838867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000201.npy"}
{"epoch": 0.6090909090909091, "step": 202, "batch_size": 128, "mean": 7.891841888427734, "std": 10.995524406433105, "min": -16.47530174255371, "p10": -6.657733154296874, "median": 7.32427978515625, "p90": 21.276247406005858, "max": 41.91009521484375, "pos_frac": 0.7421875, "sample": [0.26409912109375, 2.6755523681640625, 21.393478393554688, 9.921211242675781, 14.388004302978516, 14.488265991210938, 34.67662048339844, 12.232503890991211, 5.227415084838867, 2.1017913818359375, 8.436744689941406, 6.923601150512695, -2.5915756225585938, -5.003612518310547, 41.91009521484375, 23.921630859375, 8.265815734863281, 4.405689239501953, 3.242635726928711, -1.8822174072265625, 12.545953750610352, 30.501724243164062, 2.4060535430908203, 8.64617919921875, 17.553546905517578, 0.4005126953125, 15.791046142578125, 20.40659523010254, -16.47530174255371, -8.621463775634766, 4.23858642578125, -14.849113464355469, 5.984046936035156, 5.320858001708984, 18.833850860595703, -10.89483642578125, 12.721382141113281, 4.551738739013672, 19.3018741607666, 10.988517761230469, 2.44482421875, 19.925722122192383, 21.161766052246094, 0.5373477935791016, 18.785415649414062, 2.6562881469726562, 3.0830116271972656, 8.328849792480469, -1.1570587158203125, -2.1869544982910156, 20.011829376220703, 8.740242004394531, 17.695314407348633, -8.832656860351562, 10.646137237548828, 11.677978515625, 21.22600555419922, 7.566062927246094, 4.297893524169922, 13.321746826171875, -9.364067077636719, 0.9319419860839844, 6.587135314941406, 14.864849090576172, -1.2490081787109375, 14.9410400390625, 10.977073669433594, -4.006103515625, -0.8353023529052734, 2.2943267822265625, 27.428634643554688, 1.437662124633789, -0.05599212646484375, -2.818807601928711, 18.420913696289062, 18.81060028076172, 13.622749328613281, 9.539268493652344, 8.029739379882812, 24.191184997558594, -4.472131729125977, 18.66564178466797, -1.7033767700195312, -1.455078125, -3.6939926147460938, 11.747611999511719, 12.62554931640625, -8.744659423828125, 31.140533447265625, -0.6850433349609375, 2.062103271484375, 21.12713623046875, 24.811431884765625, 8.168319702148438, 22.061939239501953, 12.087242126464844, -3.8871936798095703, 8.050674438476562, 22.413284301757812, -6.5755462646484375, -10.478809356689453, -8.823797225952148, -0.055843353271484375, 23.368331909179688, -5.6905517578125, 18.444091796875, -8.863578796386719, 17.291961669921875, 13.829925537109375, 16.775653839111328, 0.9900665283203125, 13.69207763671875, 5.907070159912109, -3.0741500854492188, 18.268238067626953, 6.752067565917969, 18.340791702270508, 3.71575927734375, 8.954269409179688, 19.54344940185547, 21.838836669921875, -10.413627624511719, -6.8495025634765625, -8.247955322265625, 4.9111328125, 7.082496643066406, 6.567098617553711, 4.608802795410156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000202.npy"}
{"epoch": 0.6121212121212121, "step": 203, "batch_size": 128, "mean": 7.079778671264648, "std": 10.543706893920898, "min": -19.527732849121094, "p10": -5.07589988708496, "median": 6.424112319946289, "p90": 21.696714019775392, "max": 34.78535461425781, "pos_frac": 0.7109375, "sample": [23.4508056640625, -1.311065673828125, -6.791534423828125, 20.045425415039062, 3.1340713500976562, 1.3342132568359375, 14.642288208007812, -2.270050048828125, 28.635635375976562, -6.086240768432617, -1.4018096923828125, 4.265655517578125, 12.492523193359375, 8.023712158203125, 5.32720947265625, 9.265907287597656, 9.596420288085938, 12.298599243164062, -7.918941497802734, 12.909187316894531, 7.361522674560547, -11.145866394042969, 11.315532684326172, 16.326995849609375, 20.77471923828125, 5.854501724243164, -4.661630630493164, 9.762969970703125, 12.638362884521484, 11.688434600830078, 22.710689544677734, 31.14569854736328, -0.7448654174804688, -9.239364624023438, -0.3265724182128906, 3.0581893920898438, 0.040882110595703125, -1.6814556121826172, 11.301624298095703, 0.8614234924316406, -0.7424392700195312, 1.4608955383300781, 11.7913818359375, 25.168745040893555, -0.7329216003417969, 9.452791213989258, 2.5808258056640625, -4.570613861083984, 1.783823013305664, -2.5690345764160156, 3.3641815185546875, 1.911630630493164, 10.606185913085938, 6.542591094970703, -1.1871337890625, 8.695207595825195, -7.1931304931640625, 1.9278793334960938, 14.035778045654297, -0.5506210327148438, -6.852989196777344, 21.878036499023438, 16.103652954101562, -1.0091171264648438, 17.541358947753906, 3.4526901245117188, -4.534727096557617, -6.1923065185546875, 18.282421112060547, 10.000171661376953, 21.70899200439453, 16.255401611328125, 21.691452026367188, 3.1704368591308594, 4.133037567138672, 26.78646469116211, 30.096786499023438, 7.800266265869141, 9.917884826660156, 4.139373779296875, -19.527732849121094, -1.5746746063232422, 16.69396209716797, 15.5897216796875, 11.428993225097656, 23.041072845458984, -5.868518829345703, 28.044321060180664, 14.585315704345703, 19.48633575439453, 5.178497314453125, -4.7362060546875, 13.062103271484375, -13.265403747558594, 2.032846450805664, -18.41469955444336, -9.632795333862305, 34.78535461425781, 12.895820617675781, 19.750030517578125, -0.04639434814453125, -1.76092529296875, 18.895523071289062, 14.492095947265625, 8.011482238769531, 11.301048278808594, -4.108488082885742, 11.633705139160156, 1.6997203826904297, 7.105134963989258, -3.8327083587646484, -0.971435546875, 13.158531188964844, -3.6426849365234375, 6.305633544921875, 25.767555236816406, 2.5847244262695312, 20.253101348876953, -2.4743785858154297, 1.9789772033691406, 12.39031982421875, 2.932830810546875, 2.7144851684570312, 3.0666275024414062, 14.755508422851562, 10.15594482421875, 8.536529541015625, 8.925750732421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000203.npy"}
{"epoch": 0.6151515151515151, "step": 204, "batch_size": 128, "mean": 7.454894065856934, "std": 10.32541275024414, "min": -19.641799926757812, "p10": -5.101680183410644, "median": 5.970221519470215, "p90": 21.681142425537107, "max": 30.164627075195312, "pos_frac": 0.7421875, "sample": [1.0370903015136719, -0.7053451538085938, -12.958251953125, 15.217689514160156, -3.0188446044921875, 16.580799102783203, 24.238967895507812, -1.6707382202148438, 16.345489501953125, 12.8662109375, 24.746421813964844, 17.664384841918945, -7.3451080322265625, -1.6597118377685547, 16.06989288330078, 7.3294525146484375, 13.756851196289062, 17.896949768066406, 8.59698486328125, 4.443939208984375, -8.234344482421875, 13.441986083984375, -4.7640533447265625, 12.32290267944336, 12.120849609375, -0.6690578460693359, 5.185760498046875, -5.826019287109375, -4.041351318359375, 2.4745635986328125, -7.886346817016602, -1.2748908996582031, -19.641799926757812, 13.010761260986328, 5.751792907714844, 4.989353179931641, -0.31003570556640625, -3.3357696533203125, -5.7033843994140625, 3.8745269775390625, 7.878448486328125, 13.39444351196289, 1.1353988647460938, -5.623897552490234, 18.341552734375, 30.164627075195312, 9.818946838378906, 0.33518218994140625, -11.767349243164062, 20.80352783203125, 12.676200866699219, 6.681190490722656, 16.979415893554688, 12.363479614257812, 3.303478240966797, 4.357269287109375, -0.7472915649414062, 19.938810348510742, -6.008373260498047, -0.67889404296875, 1.365447998046875, 3.884918212890625, 13.600349426269531, 4.5608062744140625, 5.875776290893555, 21.49462890625, 18.280479431152344, 8.82406234741211, 3.2066192626953125, -2.2980079650878906, -11.666091918945312, 2.93499755859375, 4.401237487792969, -4.478118896484375, 16.641210556030273, 6.064666748046875, 10.797012329101562, -5.218330383300781, -5.051687240600586, 9.854986190795898, 4.04107666015625, 2.7832069396972656, 15.43143081665039, 24.23013687133789, 6.666034698486328, -3.9809627532958984, 29.874176025390625, 3.156280517578125, 4.7505950927734375, 3.20037841796875, 17.157058715820312, 15.810283660888672, 29.809051513671875, -4.138856887817383, 9.324974060058594, 14.417640686035156, 16.300886154174805, 11.480184555053711, 11.485950469970703, 24.24755859375, 0.9191398620605469, 7.0179901123046875, 23.251800537109375, 22.802978515625, 26.944686889648438, -7.7726898193359375, 0.6274337768554688, 28.2769775390625, 16.384571075439453, 16.160076141357422, 21.078685760498047, -1.81304931640625, 2.3058547973632812, 19.728900909423828, 22.11634063720703, 24.07634735107422, 15.166946411132812, 1.5042304992675781, 12.122749328613281, -3.658742904663086, 0.9968795776367188, -2.2169971466064453, 6.244014739990234, 3.2814102172851562, 7.262298583984375, 2.3073501586914062, 1.4963455200195312, 14.227176666259766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000204.npy"}
{"epoch": 0.6181818181818182, "step": 205, "batch_size": 128, "mean": 5.3129167556762695, "std": 9.777664184570312, "min": -20.316497802734375, "p10": -6.344407653808593, "median": 3.8547048568725586, "p90": 19.525541687011717, "max": 30.03502655029297, "pos_frac": 0.671875, "sample": [-2.4131698608398438, 5.446161270141602, -3.984588623046875, -2.8939056396484375, 3.088409423828125, 3.81451416015625, 2.229278564453125, 1.2585906982421875, -3.3273162841796875, -0.592926025390625, -3.0146865844726562, 24.901220321655273, 15.350265502929688, -6.857666015625, 20.436874389648438, 18.56153106689453, 14.050163269042969, -2.2697505950927734, 13.881874084472656, -1.4870891571044922, 18.577224731445312, 12.847976684570312, 1.8679580688476562, 1.4345817565917969, -20.316497802734375, 25.82988929748535, -1.599945068359375, 17.218162536621094, 1.2543926239013672, 19.423927307128906, -16.925460815429688, 6.7837371826171875, 0.31922149658203125, -14.364280700683594, -1.7112598419189453, 20.56139373779297, -7.717496871948242, 23.251251220703125, 13.872474670410156, -4.0771484375, 4.194793701171875, -6.1353607177734375, -2.6514892578125, 14.871139526367188, 3.894895553588867, -5.9595947265625, -6.832183837890625, -2.0135116577148438, -6.964714050292969, 0.8404464721679688, 1.85369873046875, -8.124189376831055, 6.98260498046875, 0.8061447143554688, 2.7513351440429688, -0.3660869598388672, 2.184722900390625, -0.7358970642089844, 5.056713104248047, 11.945808410644531, 22.575218200683594, -4.7653961181640625, 9.235626220703125, 8.859956741333008, 2.504304885864258, 5.0642547607421875, 7.649799346923828, 1.0318679809570312, 21.746410369873047, -1.361724853515625, -0.6240062713623047, 9.131843566894531, 16.29576873779297, -8.196258544921875, 10.862432479858398, 30.03502655029297, 24.404098510742188, 15.201614379882812, -0.96075439453125, 9.496007919311523, -0.20279884338378906, 19.81877899169922, 4.133033752441406, 18.691696166992188, 19.76264190673828, 4.843235015869141, 0.4096336364746094, 9.847526550292969, 7.410083770751953, 8.2701416015625, 5.92340087890625, 3.90313720703125, 21.600601196289062, 8.276237487792969, 8.327667236328125, 4.25927734375, -9.543594360351562, -13.34396743774414, 14.431995391845703, 9.352256774902344, -0.9382381439208984, -9.049957275390625, 14.214981079101562, 12.235729217529297, -2.1627960205078125, 13.121051788330078, -3.4454345703125, 6.0479278564453125, 3.032604217529297, -2.2332992553710938, 17.212417602539062, 1.5838737487792969, 2.3114089965820312, -7.1462249755859375, 15.834453582763672, 1.617401123046875, 17.246055603027344, 8.688896179199219, 18.057096481323242, -0.7580757141113281, 7.418128967285156, -1.8831672668457031, 1.1037044525146484, 6.749837875366211, 20.926002502441406, 10.567207336425781, -3.1506576538085938, 0.1881256103515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000205.npy"}
{"epoch": 0.6212121212121212, "step": 206, "batch_size": 128, "mean": 6.643813610076904, "std": 10.508429527282715, "min": -23.214035034179688, "p10": -5.566529846191406, "median": 5.8430023193359375, "p90": 21.006897544860838, "max": 35.902320861816406, "pos_frac": 0.71875, "sample": [-5.681373596191406, 14.572837829589844, 18.849599838256836, 22.409687042236328, 15.439308166503906, 4.940208435058594, -3.769683837890625, -14.144905090332031, 11.158784866333008, -3.2046051025390625, 16.29937744140625, 1.4903335571289062, 5.8490753173828125, 22.852996826171875, 25.216659545898438, 2.3088245391845703, 0.8546218872070312, 8.440750122070312, 12.774642944335938, 3.8664894104003906, 0.6835536956787109, 26.119918823242188, -1.8439445495605469, 32.139923095703125, 1.1315174102783203, -10.394546508789062, 3.3560924530029297, 4.1615753173828125, 14.462491989135742, 14.974441528320312, 3.8583450317382812, 5.866662979125977, 8.265857696533203, -0.9146499633789062, 11.304672241210938, 21.234474182128906, 14.013242721557617, 12.540031433105469, 15.353050231933594, -0.024662017822265625, -4.937248229980469, 5.8369293212890625, -0.7674789428710938, 3.0644683837890625, -7.195629119873047, -23.214035034179688, -14.293380737304688, -5.517311096191406, 17.53515625, 8.322578430175781, -5.3349609375, 21.74048614501953, 19.811843872070312, 22.55527687072754, -8.57699966430664, 9.11407470703125, 10.778793334960938, 6.56865119934082, 35.902320861816406, 1.8825531005859375, 0.863037109375, 0.9883766174316406, -7.922935485839844, 12.677240371704102, -3.4160232543945312, 20.51026153564453, 7.414203643798828, 26.03909683227539, 3.515186309814453, 10.214666366577148, 2.21112060546875, 9.071680068969727, 10.494832992553711, -0.7014312744140625, 1.2247848510742188, 0.7704734802246094, 17.467838287353516, 16.982284545898438, 6.5988616943359375, 9.622344970703125, 0.4834117889404297, 24.827621459960938, -5.4326934814453125, 6.215030670166016, -1.522857666015625, -4.231103897094727, 16.295551300048828, -6.4233551025390625, 29.887786865234375, -9.039810180664062, 12.982963562011719, 8.261289596557617, 15.103500366210938, -1.0434417724609375, 3.7126998901367188, 15.749237060546875, -8.777524948120117, -1.013711929321289, 2.8338546752929688, 3.7966384887695312, 11.559894561767578, 20.909364700317383, -1.8342456817626953, -0.7267913818359375, 14.550018310546875, 6.417842864990234, -1.309326171875, 7.387523651123047, 7.391023635864258, -0.15319061279296875, 1.69488525390625, 22.935592651367188, -8.830608367919922, 0.7105484008789062, 11.13653564453125, -0.105255126953125, 2.7768630981445312, 19.17950439453125, 18.51445770263672, 11.059112548828125, 0.6527519226074219, 3.8078575134277344, -8.581062316894531, 9.923072814941406, 15.84495735168457, -3.1224498748779297, 16.178813934326172, -4.938262939453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000206.npy"}
{"epoch": 0.6242424242424243, "step": 207, "batch_size": 128, "mean": 6.744357109069824, "std": 10.716816902160645, "min": -20.85773468017578, "p10": -6.365039825439453, "median": 6.0146484375, "p90": 21.152772903442383, "max": 33.50242614746094, "pos_frac": 0.734375, "sample": [3.06805419921875, -16.47113037109375, 11.921012878417969, 7.019659042358398, -6.253608703613281, 0.4178733825683594, 3.1196441650390625, 29.36178207397461, -2.5796165466308594, 24.88309097290039, -1.3951454162597656, 3.8855342864990234, 24.781105041503906, 11.957046508789062, 14.138175964355469, 6.278175354003906, 5.260047912597656, 3.9985809326171875, 26.23927116394043, -5.547332763671875, 13.363441467285156, 0.916259765625, 16.42761993408203, 4.296867370605469, 6.483591079711914, 0.8149299621582031, -7.6334075927734375, -3.9643096923828125, 18.999588012695312, -0.3080253601074219, -3.92108154296875, 11.353164672851562, -3.4998435974121094, 21.51092529296875, -14.473953247070312, 10.932373046875, 7.801666259765625, -0.8855209350585938, -8.006263732910156, 5.614902496337891, 7.5973358154296875, -2.8532047271728516, 25.966232299804688, 16.167236328125, 5.003326416015625, 8.13680648803711, 13.396270751953125, 13.40170669555664, -9.22796630859375, 16.539310455322266, 15.57142448425293, -13.055061340332031, 11.213790893554688, -2.8591842651367188, -20.85773468017578, -2.096160888671875, -0.9565200805664062, 27.876617431640625, 4.492595672607422, 14.609359741210938, 2.2459945678710938, 7.437347412109375, 15.132308959960938, 13.271526336669922, 11.365646362304688, 0.6103134155273438, 19.32977294921875, 9.011734008789062, -0.6273269653320312, 5.720636367797852, 4.390960693359375, 13.481903076171875, -8.48114013671875, -5.522087097167969, 6.821113586425781, 9.197898864746094, 5.382846832275391, 9.253992080688477, 29.19823455810547, 4.8444061279296875, 19.88929557800293, 6.449920654296875, 17.866317749023438, 7.052558898925781, 14.978897094726562, 4.328090667724609, 6.5986328125, 16.70790672302246, -19.591552734375, -0.44123077392578125, 26.406982421875, -0.6760139465332031, -12.713983535766602, 2.4661808013916016, 24.518203735351562, 33.50242614746094, 5.0787353515625, 4.5137939453125, 5.751121520996094, 13.550865173339844, 20.999279022216797, -7.442266464233398, 18.494789123535156, 5.200714111328125, 23.011869430541992, 9.058746337890625, 13.710521697998047, 7.858699798583984, 9.776878356933594, 6.842380523681641, 17.389249801635742, 0.6269683837890625, 0.833404541015625, -11.349380493164062, 0.0153350830078125, -0.8195571899414062, 17.38427734375, -3.0291671752929688, -3.010631561279297, 16.234058380126953, 5.295097351074219, 0.2272186279296875, 13.316326141357422, 10.91738510131836, -6.6250457763671875, -0.6929683685302734, 0.2638893127441406, 22.515060424804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000207.npy"}
{"epoch": 0.6272727272727273, "step": 208, "batch_size": 128, "mean": 5.500006675720215, "std": 10.890283584594727, "min": -23.073684692382812, "p10": -6.180890655517577, "median": 4.163143157958984, "p90": 21.10475044250488, "max": 31.44574737548828, "pos_frac": 0.6953125, "sample": [-0.6349849700927734, 14.147318840026855, 4.341730117797852, 6.09490966796875, -3.7789249420166016, 1.5187549591064453, 16.145095825195312, 25.596378326416016, 31.44574737548828, 5.888633728027344, 17.502334594726562, -1.3022823333740234, 5.956249237060547, 4.217376708984375, 10.390815734863281, -0.3405113220214844, 9.43370246887207, 0.8372688293457031, -21.067466735839844, 27.066070556640625, 10.23740005493164, 3.473175048828125, 17.289947509765625, -8.450897216796875, 3.5648422241210938, 0.4509716033935547, -4.653154373168945, 11.32220458984375, -3.132976531982422, 1.4475288391113281, 4.108909606933594, 15.257740020751953, -2.369293212890625, 23.19989776611328, 1.5309562683105469, -8.686134338378906, 17.08893585205078, 20.22528648376465, -4.40087890625, 20.565963745117188, 11.005752563476562, 26.71912384033203, -12.410491943359375, 8.517120361328125, 9.7318115234375, 16.154937744140625, -5.471923828125, 11.7293701171875, 8.266929626464844, 0.841796875, -11.749778747558594, -22.114768981933594, 9.526618957519531, 5.4078826904296875, -2.106128692626953, 0.45967864990234375, -1.0308475494384766, 3.656299591064453, 18.446624755859375, -0.16819000244140625, 0.38680267333984375, 5.284511566162109, 21.909225463867188, 22.0994873046875, 1.8805103302001953, -16.600940704345703, 1.81268310546875, 7.240745544433594, 1.2482013702392578, 19.854700088500977, 1.50897216796875, 0.0465240478515625, -0.2114086151123047, 10.795730590820312, -18.745567321777344, -3.29998779296875, 5.105384826660156, 12.904426574707031, 9.0498046875, 20.75997543334961, -5.847137451171875, -8.77593994140625, 1.7611656188964844, -6.959648132324219, -1.2030181884765625, -10.805580139160156, -5.445590972900391, 2.5166683197021484, -1.3652515411376953, 12.891250610351562, 0.9263114929199219, 6.055381774902344, 1.6422042846679688, 9.585258483886719, -2.707061767578125, 8.256362915039062, 5.736164093017578, 24.52462387084961, 6.564918518066406, 15.9622802734375, 27.1846923828125, 2.579669952392578, -2.5140819549560547, 8.219627380371094, -2.159219741821289, 9.118526458740234, 11.446044921875, 25.12065887451172, 28.481651306152344, 3.6835250854492188, 10.860635757446289, 7.3602294921875, -1.7846336364746094, 9.310466766357422, -9.548347473144531, -2.7771682739257812, -0.2544708251953125, -2.1800613403320312, 22.003767013549805, -23.073684692382812, 1.5514907836914062, 15.989028930664062, -0.651458740234375, 3.252788543701172, 12.231765747070312, 12.927146911621094, 23.384910583496094, 10.953775405883789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000208.npy"}
{"epoch": 0.6303030303030303, "step": 209, "batch_size": 128, "mean": 5.907308101654053, "std": 11.076680183410645, "min": -30.476806640625, "p10": -6.450274658203124, "median": 5.622652053833008, "p90": 20.11274719238281, "max": 35.979095458984375, "pos_frac": 0.703125, "sample": [-1.5471477508544922, 1.218709945678711, -6.229766845703125, -23.94029998779297, 19.74756622314453, 11.392059326171875, -30.476806640625, 16.831363677978516, 25.77808380126953, 7.18438720703125, -2.2578506469726562, 0.17232894897460938, -0.056915283203125, 21.101348876953125, 4.532796859741211, 1.83905029296875, 24.830039978027344, 3.443817138671875, 12.032501220703125, -5.142982482910156, 0.86334228515625, 3.669830322265625, 12.03110122680664, 13.197441101074219, -4.458759307861328, 12.410293579101562, 2.5278244018554688, 10.451070785522461, 4.875907897949219, -12.03411865234375, 8.505485534667969, -6.298076629638672, 14.363571166992188, 8.5294189453125, 35.458740234375, 10.321609497070312, 0.9736595153808594, 5.503393173217773, 11.815971374511719, -9.528352737426758, 9.046802520751953, -5.285530090332031, 23.56760025024414, 7.338676452636719, 15.395818710327148, 12.397903442382812, -1.1798744201660156, 27.44000244140625, 7.748928070068359, 5.738880157470703, 18.47426986694336, -1.3805313110351562, -1.5453109741210938, 11.020378112792969, 5.264751434326172, 10.33038330078125, 8.498174667358398, 14.375617980957031, 6.611301422119141, 0.039470672607421875, 18.900232315063477, 4.8638153076171875, 21.319881439208984, 34.30805206298828, 8.7869873046875, 13.909332275390625, 15.909294128417969, 7.6980743408203125, -1.2614669799804688, 13.235565185546875, 0.2986412048339844, 11.874786376953125, 13.911911010742188, -3.230236053466797, 20.96483612060547, -6.1482391357421875, -2.9930858612060547, 35.979095458984375, 6.9265289306640625, -0.9937286376953125, 10.341560363769531, 6.6649017333984375, 22.077930450439453, -3.259899139404297, 6.090705871582031, -3.0042037963867188, 2.257352828979492, 1.2506370544433594, -8.04793930053711, 0.8555145263671875, 5.896400451660156, 4.0836181640625, 16.495834350585938, 7.28546142578125, 12.95167350769043, -8.248260498046875, 29.986099243164062, -6.3649749755859375, 18.349170684814453, 26.060997009277344, -9.173004150390625, 11.463958740234375, 3.560791015625, -8.335304260253906, 16.687255859375, 8.382957458496094, -6.138641357421875, 4.9055633544921875, 6.1616668701171875, 5.5064239501953125, -14.573272705078125, -11.664642333984375, -9.350440979003906, -4.704139709472656, -6.6493072509765625, -1.0547027587890625, 3.426412582397461, -2.8979263305664062, 4.3901824951171875, 4.838573455810547, -9.558265686035156, 7.469673156738281, 7.888975143432617, 14.675195693969727, 8.642448425292969, -2.8595352172851562, 3.064208984375, -1.4818878173828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000209.npy"}
{"epoch": 0.6333333333333333, "step": 210, "batch_size": 128, "mean": 6.550052642822266, "std": 11.042656898498535, "min": -30.636276245117188, "p10": -6.110933303833007, "median": 4.787647247314453, "p90": 21.455448150634766, "max": 33.44142150878906, "pos_frac": 0.7421875, "sample": [3.3646469116210938, 24.639862060546875, 5.421230316162109, 7.379692077636719, -8.981306076049805, 1.2165451049804688, 11.794326782226562, 6.785366058349609, 13.67593002319336, 10.322219848632812, 21.012222290039062, -3.9615020751953125, 5.423881530761719, 10.346755981445312, -9.486583709716797, 20.589092254638672, -5.927501678466797, 22.246299743652344, 29.214279174804688, 12.987903594970703, 12.575260162353516, 4.298677444458008, -0.01865386962890625, 13.796966552734375, 2.7719345092773438, 26.626094818115234, 21.670753479003906, 1.9722537994384766, 7.0583953857421875, 5.31488037109375, 7.181718826293945, 10.453502655029297, -1.0852813720703125, 1.536977767944336, -7.530220031738281, 18.213642120361328, 21.363174438476562, 20.93768310546875, -0.7794723510742188, 4.8294677734375, 4.745826721191406, -1.0813865661621094, 21.07666015625, -5.8597259521484375, 20.691680908203125, 9.519729614257812, 5.626823425292969, -9.337970733642578, 5.170137405395508, 23.843420028686523, 2.9509353637695312, -5.4382476806640625, 19.812824249267578, -3.762847900390625, 14.812820434570312, 11.394359588623047, 21.859222412109375, 7.7279205322265625, 21.795257568359375, 2.85101318359375, 4.999731063842773, 7.188484191894531, -0.9009017944335938, 19.60932159423828, 0.21594619750976562, -7.01458740234375, 21.898326873779297, -1.9008636474609375, 19.57746124267578, -16.547161102294922, 2.8542022705078125, 2.5724716186523438, 32.54393005371094, 14.010169982910156, 33.44142150878906, 15.966873168945312, 0.8438568115234375, -5.150978088378906, 0.4560699462890625, 13.185859680175781, -6.5389404296875, 3.6932296752929688, -1.1671600341796875, -3.1580123901367188, 0.5748062133789062, -5.365287780761719, 12.201614379882812, 2.657184600830078, 4.446693420410156, 3.591644287109375, 20.118942260742188, -0.08100128173828125, 3.0748634338378906, -1.6009464263916016, 5.770305633544922, 12.137426376342773, 2.9849319458007812, -19.060863494873047, -2.914459228515625, 7.880558013916016, 4.0535888671875, 14.430740356445312, 1.6126213073730469, 15.328033447265625, 10.70920181274414, 16.659950256347656, -7.234540939331055, -12.50135612487793, 1.2937030792236328, 1.9290008544921875, -8.551300048828125, 9.130050659179688, 0.4721240997314453, -30.636276245117188, -10.153144836425781, 1.4923019409179688, 20.50848960876465, 4.271371841430664, 3.419574737548828, 24.13580322265625, -1.990875244140625, 9.376880645751953, 10.85031509399414, 20.26720428466797, 1.5226783752441406, 27.448348999023438, -4.465507507324219, 0.2830848693847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000210.npy"}
{"epoch": 0.6363636363636364, "step": 211, "batch_size": 128, "mean": 5.915665626525879, "std": 11.329885482788086, "min": -25.2067928314209, "p10": -7.559582519531249, "median": 5.401891708374023, "p90": 20.645656013488768, "max": 50.80718994140625, "pos_frac": 0.6953125, "sample": [-10.173297882080078, 13.695343017578125, 1.4490470886230469, 13.129602432250977, 13.731605529785156, 8.899036407470703, 6.851188659667969, 4.138023376464844, 6.062980651855469, -0.7639961242675781, 20.981048583984375, 9.771133422851562, 15.672748565673828, 1.6291732788085938, -1.0567054748535156, 0.8754730224609375, 15.131782531738281, 4.343196868896484, 5.999946594238281, 15.450325012207031, 22.48211669921875, 9.70138931274414, 1.15911865234375, -0.18158721923828125, -11.84490966796875, -3.2336349487304688, 24.751602172851562, 14.086875915527344, -6.171604156494141, 6.134346008300781, 0.405181884765625, -13.785232543945312, -4.173942565917969, 50.80718994140625, 22.599502563476562, -5.360450744628906, 24.109371185302734, 7.881763458251953, -5.136363983154297, 9.177339553833008, 5.733367919921875, -3.761920928955078, 14.558807373046875, -25.2067928314209, -13.988941192626953, -8.500797271728516, 3.0402088165283203, 20.761571884155273, 12.312896728515625, 0.25043296813964844, 7.080863952636719, -4.3357696533203125, -13.506752014160156, 12.15021800994873, 1.465057373046875, 7.070869445800781, 0.39577484130859375, 7.70311164855957, 14.36764144897461, -13.034202575683594, 16.006698608398438, 12.642158508300781, -1.6838645935058594, -0.5056915283203125, 18.474441528320312, 34.28211975097656, 2.2761802673339844, -1.0718994140625, 11.055328369140625, -0.7494316101074219, 4.392753601074219, -8.198699951171875, 13.280326843261719, 24.557960510253906, -5.5554351806640625, 5.5704193115234375, 13.769622802734375, -4.631500244140625, -1.2441978454589844, -2.8125686645507812, 14.867424011230469, -4.737344741821289, 5.233364105224609, 3.5964431762695312, 12.016958236694336, 0.05739402770996094, 19.438194274902344, 1.6631698608398438, -0.497222900390625, 3.0164356231689453, -1.1277904510498047, 29.731765747070312, -0.6659984588623047, -9.815269470214844, 2.195667266845703, 9.847793579101562, 5.0225982666015625, 33.39616394042969, 13.327774047851562, 13.07697868347168, -3.9233970642089844, 20.595977783203125, 23.474639892578125, 8.235572814941406, 7.85455322265625, -1.027252197265625, 14.518363952636719, 2.7813034057617188, 33.0355224609375, 0.14459991455078125, 5.01446533203125, 8.201408386230469, 2.8869247436523438, -7.285675048828125, 5.157096862792969, 9.971534729003906, 11.845016479492188, -8.569953918457031, -3.315032958984375, 7.276679992675781, 5.946521759033203, -12.42333984375, 9.337034225463867, 13.318763732910156, 6.745246887207031, 5.999855041503906, -12.650352478027344, 10.778533935546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000211.npy"}
{"epoch": 0.6393939393939394, "step": 212, "batch_size": 128, "mean": 7.682812690734863, "std": 10.29549789428711, "min": -16.755840301513672, "p10": -4.708954620361328, "median": 5.142080307006836, "p90": 22.806089782714842, "max": 32.090087890625, "pos_frac": 0.8125, "sample": [4.865386962890625, -4.655059814453125, 13.696495056152344, -8.274341583251953, 23.61737060546875, -0.5070438385009766, 2.6404876708984375, -5.614326477050781, 2.3306922912597656, 0.173095703125, 22.77039337158203, -16.755840301513672, 17.021764755249023, 16.791229248046875, -3.4731216430664062, 0.13420867919921875, 9.645004272460938, 10.190582275390625, 16.600568771362305, 6.93499755859375, 5.151893615722656, 6.1469879150390625, 29.61756134033203, 26.83587646484375, -0.8832054138183594, 2.1439952850341797, 28.538070678710938, 6.816734313964844, -0.8950614929199219, 9.487245559692383, 10.009880065917969, 28.1435546875, 25.94550323486328, 2.329967498779297, 3.7008514404296875, 16.608104705810547, 3.2089080810546875, -8.934524536132812, -0.22563934326171875, 1.53466796875, 11.121986389160156, 20.248130798339844, 16.555282592773438, 0.5959644317626953, -12.19959831237793, 4.866222381591797, 11.101318359375, 22.889381408691406, 7.796154022216797, 6.608709335327148, 4.4641571044921875, 22.322601318359375, 14.976890563964844, 16.582759857177734, 15.919322967529297, 1.3088836669921875, 0.15325927734375, -2.5073471069335938, -1.55126953125, 4.1599578857421875, 14.622772216796875, -4.834709167480469, 1.9434356689453125, -0.16807174682617188, -5.6961212158203125, 24.362396240234375, 3.5690155029296875, 13.853973388671875, 0.79608154296875, 8.83171272277832, 25.906097412109375, 4.90606689453125, 11.637931823730469, -8.823348999023438, 4.423583984375, 4.577182769775391, 5.132266998291016, 2.6415786743164062, 6.575836181640625, 4.115516662597656, 2.7261199951171875, 6.510915756225586, 22.142261505126953, 3.5399169921875, 15.355644226074219, 17.580223083496094, 18.3734130859375, 10.296043395996094, -14.79205322265625, 0.5526275634765625, 15.797439575195312, 3.672882080078125, 13.761581420898438, 26.462474822998047, 1.0180511474609375, 6.455986022949219, -7.019706726074219, -7.9964447021484375, 2.6644535064697266, 5.336986541748047, 2.452829360961914, -13.341278076171875, 24.26256561279297, 4.499259948730469, -1.5560951232910156, 14.662971496582031, 17.616806030273438, 7.0897369384765625, 32.090087890625, 2.903331756591797, 10.90799331665039, 2.9864578247070312, 3.9140968322753906, 8.731746673583984, 2.326353073120117, 25.524303436279297, 2.4802780151367188, 21.818925857543945, 19.509963989257812, 1.4086017608642578, 8.312789916992188, -9.126152038574219, 19.44634246826172, 21.29426383972168, 2.9339981079101562, 14.601081848144531, -1.5747528076171875, 7.58477783203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000212.npy"}
{"epoch": 0.6424242424242425, "step": 213, "batch_size": 128, "mean": 5.113101005554199, "std": 10.963488578796387, "min": -33.51513671875, "p10": -6.7774322509765605, "median": 3.841123580932617, "p90": 20.447597503662106, "max": 30.169689178466797, "pos_frac": 0.6953125, "sample": [15.247291564941406, 6.269447326660156, -2.716064453125, 2.6338043212890625, 14.209213256835938, 3.638134002685547, 25.74785614013672, -3.9025802612304688, -0.28438568115234375, 17.61962127685547, -1.0127792358398438, 10.7752685546875, -3.4095001220703125, 7.2866058349609375, -1.9854507446289062, 10.811563491821289, 6.328865051269531, 2.5707168579101562, 8.261062622070312, 3.9325180053710938, -3.6506118774414062, 23.653684616088867, 6.679943084716797, 0.5025100708007812, 14.637069702148438, 1.7066116333007812, 19.691116333007812, 8.606389999389648, -3.256589889526367, 28.216094970703125, -5.861358642578125, -0.9435615539550781, 14.708301544189453, 6.7614288330078125, 12.10498046875, -2.6561279296875, 7.579874038696289, -5.4833831787109375, -2.3465347290039062, 18.585708618164062, 20.102203369140625, 4.013078689575195, 3.7497291564941406, 23.3489990234375, -1.1895885467529297, 22.956802368164062, 14.554496765136719, 1.0549545288085938, 16.65624237060547, -5.2067413330078125, 3.658222198486328, 11.61517333984375, 8.606002807617188, 1.9469985961914062, 4.075408935546875, 8.757551193237305, 19.835731506347656, 3.6580429077148438, -4.300468444824219, 19.68456268310547, 7.119575500488281, 3.0415496826171875, 21.5166015625, -13.853805541992188, 2.48248291015625, -5.192863464355469, 10.378284454345703, 12.734840393066406, 11.178939819335938, 12.731121063232422, -10.321060180664062, 30.169689178466797, 7.721897125244141, 0.8104095458984375, 2.425546646118164, 1.3498115539550781, -17.507701873779297, 7.3256683349609375, -6.1360321044921875, 0.8363265991210938, 13.083402633666992, -8.899314880371094, -19.852630615234375, 4.453947067260742, -10.516498565673828, 2.187772750854492, 2.2269287109375, 28.3892822265625, -4.6849822998046875, 2.199615478515625, 1.7864055633544922, 7.408424377441406, -1.1656723022460938, 22.768798828125, 10.13153076171875, -9.888008117675781, 1.7390365600585938, 11.311355590820312, -11.633674621582031, -3.6862106323242188, 5.979085922241211, -16.06893539428711, 4.361297607421875, 0.6547698974609375, -2.768962860107422, 5.103874206542969, -0.5139503479003906, -33.51513671875, 5.220584869384766, 1.0604476928710938, 7.030242919921875, 17.88532257080078, 24.813940048217773, 23.83740234375, -8.785499572753906, -13.99462890625, -1.0099525451660156, 28.127403259277344, -2.6587066650390625, 0.017444610595703125, 7.447845458984375, 13.452468872070312, 1.4555816650390625, -8.274032592773438, 16.453475952148438, 5.71856689453125, 21.253517150878906, -2.8115406036376953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000213.npy"}
{"epoch": 0.6454545454545455, "step": 214, "batch_size": 128, "mean": 6.482856750488281, "std": 11.481325149536133, "min": -16.846343994140625, "p10": -9.156676864624023, "median": 4.739679336547852, "p90": 22.377317237854, "max": 37.713043212890625, "pos_frac": 0.71875, "sample": [10.677360534667969, 9.768783569335938, -3.423826217651367, -3.0076637268066406, 0.8621368408203125, 9.27163314819336, 20.729909896850586, -3.418437957763672, 7.576568603515625, -1.3848114013671875, -2.5317916870117188, -1.04412841796875, -0.348968505859375, 7.036613464355469, 26.984527587890625, 7.9170074462890625, 15.31011962890625, 2.909088134765625, 1.240631103515625, 21.947338104248047, -3.085826873779297, -16.846343994140625, 25.60832977294922, 21.93121337890625, 6.638343811035156, -8.837181091308594, 18.478715896606445, 17.215972900390625, 4.846641540527344, 2.655426025390625, 13.048824310302734, 3.3487396240234375, 12.316627502441406, -12.397468566894531, 19.558425903320312, -8.896484375, -13.631399154663086, -13.978134155273438, 11.874008178710938, 14.36376953125, 3.4683303833007812, 8.968551635742188, 7.760540008544922, 15.623695373535156, 2.1041431427001953, 2.8931236267089844, 23.895790100097656, 11.296741485595703, 4.545753479003906, 4.471549987792969, 2.5721893310546875, -2.3839569091796875, -3.8902130126953125, -14.773202896118164, 8.804618835449219, 18.872817993164062, 9.617725372314453, 6.320838928222656, 1.4643020629882812, -3.0051727294921875, 11.020889282226562, -9.8387451171875, 37.713043212890625, 31.125877380371094, -10.742813110351562, 14.875946044921875, 19.404388427734375, -0.6874790191650391, 0.6093368530273438, 2.1745262145996094, -0.9761123657226562, -16.118553161621094, 26.37255096435547, 3.976856231689453, 0.30045318603515625, 3.6203689575195312, 14.686702728271484, 4.632717132568359, 0.7704620361328125, 6.331245422363281, 4.290483474731445, 10.884284973144531, 18.670196533203125, 28.17255401611328, 16.136062622070312, 18.11742401123047, 5.027225494384766, -3.2804622650146484, 2.1529922485351562, -8.58203125, 3.5583724975585938, -9.385181427001953, 2.4837875366210938, -0.2104511260986328, 21.913959503173828, 23.38060188293457, -11.607887268066406, 2.46002197265625, 23.60765838623047, -11.963905334472656, 6.6336212158203125, -5.523067474365234, 12.899715423583984, 0.1084747314453125, 23.72840118408203, 9.3355712890625, 1.8988208770751953, -11.064987182617188, 14.750244140625, -11.413558959960938, 11.705947875976562, -9.058746337890625, 16.037628173828125, 26.297378540039062, 14.383625030517578, 20.25726318359375, 0.7562408447265625, -7.689903259277344, 10.373130798339844, -1.382843017578125, 27.67667007446289, 17.141559600830078, 9.347763061523438, 11.200984954833984, 26.123321533203125, 12.601541519165039, 0.708465576171875, -3.017406463623047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000214.npy"}
{"epoch": 0.6484848484848484, "step": 215, "batch_size": 128, "mean": 7.324078559875488, "std": 11.183963775634766, "min": -22.885452270507812, "p10": -5.621547508239744, "median": 7.1783552169799805, "p90": 20.628216552734372, "max": 33.49513244628906, "pos_frac": 0.7578125, "sample": [18.044403076171875, -13.365493774414062, 0.7716064453125, 8.738502502441406, 15.844406127929688, 14.05845832824707, 14.538261413574219, 11.125679016113281, -15.492332458496094, 7.621788024902344, 3.5643463134765625, 12.021480560302734, 13.545158386230469, 3.1470870971679688, 12.903099060058594, 22.431976318359375, 4.3148040771484375, -2.8878707885742188, 9.298107147216797, -6.631547927856445, -8.783462524414062, 11.690505981445312, 0.5687637329101562, 1.3665237426757812, -0.018304824829101562, 15.586690902709961, 3.698141098022461, 32.550201416015625, 4.320587158203125, -3.7824020385742188, 13.757261276245117, 3.186279296875, -4.4269866943359375, 17.337142944335938, -4.338787078857422, -3.172149658203125, 6.210063934326172, -0.6045188903808594, 5.213676452636719, 10.043876647949219, 17.052574157714844, 21.232620239257812, 27.122447967529297, 7.06683349609375, 5.185478210449219, -12.633842468261719, -3.5360450744628906, -19.672645568847656, 18.78866958618164, 25.128097534179688, 2.8496780395507812, 18.15932273864746, 21.368484497070312, -15.510372161865234, -15.950515747070312, 2.287240982055664, 19.650840759277344, 8.486242294311523, 31.41114044189453, 16.228103637695312, 6.819854736328125, 8.861053466796875, 8.062637329101562, 8.40993881225586, 12.385818481445312, 2.576061248779297, 5.225624084472656, 6.1024932861328125, -3.075176239013672, 15.617584228515625, 0.1701221466064453, 19.66585922241211, 14.742996215820312, -0.0528106689453125, 25.069236755371094, -8.064231872558594, 0.18076324462890625, 27.38101577758789, 9.089080810546875, 0.7982063293457031, 3.3141098022460938, 11.258438110351562, 0.474853515625, 33.49513244628906, 13.011459350585938, 18.826614379882812, -14.133419036865234, 15.522872924804688, -22.885452270507812, -5.188690185546875, -3.0324859619140625, 12.358203887939453, -1.836181640625, 15.413856506347656, -1.1519966125488281, 18.204376220703125, 8.386505126953125, 14.81341552734375, -8.381010055541992, 18.875282287597656, 22.777610778808594, 7.289876937866211, 27.01889419555664, 16.806060791015625, -2.6851272583007812, 24.87906265258789, 3.1649742126464844, 20.272987365722656, 20.369186401367188, 12.364952087402344, 6.837760925292969, 0.7930145263671875, 0.01477813720703125, 13.875236511230469, -0.6364994049072266, 3.6897811889648438, 4.6638031005859375, 12.375041961669922, -0.049774169921875, 1.1505680084228516, 16.785308837890625, 14.177131652832031, 11.87441635131836, 8.988296508789062, 4.595863342285156, -13.71882438659668, -0.0839080810546875, 3.87017822265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000215.npy"}
{"epoch": 0.6515151515151515, "step": 216, "batch_size": 128, "mean": 6.809790134429932, "std": 11.12187385559082, "min": -19.257476806640625, "p10": -5.050850677490234, "median": 5.577482223510742, "p90": 23.081557273864746, "max": 31.329280853271484, "pos_frac": 0.6796875, "sample": [-13.91268539428711, 13.676031112670898, 7.648200988769531, -0.3552074432373047, 0.1511974334716797, 15.755477905273438, 28.129470825195312, 8.551933288574219, 24.943336486816406, -1.2458515167236328, 19.85491943359375, 10.351058959960938, 8.827033996582031, -13.998176574707031, -0.6194000244140625, 24.292667388916016, 7.4022674560546875, 24.319610595703125, -5.068939208984375, -0.33089447021484375, 23.807723999023438, 2.1360855102539062, -17.599090576171875, 3.1323204040527344, 4.782146453857422, 31.329280853271484, -0.689910888671875, 7.483299255371094, 0.7687530517578125, 6.733249664306641, 5.455009460449219, 10.494392395019531, 17.09539794921875, -1.553466796875, 1.029266357421875, 27.34836196899414, 12.590923309326172, -0.3153495788574219, 4.156757354736328, 5.014698028564453, -5.764793395996094, 23.250886917114258, 16.357982635498047, -9.06464958190918, 6.059425354003906, -1.9478492736816406, 0.5902214050292969, -1.0868396759033203, -2.1266326904296875, -1.5674057006835938, 6.8193817138671875, 12.354606628417969, 3.4015426635742188, 1.4764022827148438, 21.03133773803711, 9.648727416992188, -0.46652984619140625, -1.7406425476074219, -0.7145919799804688, 19.492591857910156, 21.182361602783203, 6.795244216918945, 17.372772216796875, 5.699954986572266, -1.7417144775390625, -5.043098449707031, -5.398590087890625, -19.257476806640625, 2.1187610626220703, 1.8701934814453125, 0.22530174255371094, 2.5381202697753906, 5.294059753417969, 10.750900268554688, 22.587501525878906, 26.897216796875, -6.871604919433594, 14.020988464355469, 20.747100830078125, -1.0444793701171875, 1.465890884399414, -3.8210372924804688, 10.320541381835938, 5.864885330200195, 23.008987426757812, 19.754737854003906, -0.2727508544921875, 13.190742492675781, 0.9438209533691406, 28.865585327148438, 10.68719482421875, 12.004539489746094, 15.604320526123047, 8.220260620117188, 5.983818054199219, 26.649925231933594, -4.1436767578125, 13.747295379638672, 7.937128067016602, -13.226585388183594, 3.73193359375, -4.378345489501953, 20.058990478515625, -19.039016723632812, -1.4492607116699219, 22.405044555664062, 9.918426513671875, 18.947906494140625, 16.79549789428711, -0.022388458251953125, -6.915596008300781, -0.7758293151855469, -4.848625183105469, 7.5194549560546875, 20.830169677734375, -0.22826004028320312, 25.153274536132812, 7.4626922607421875, 3.7387561798095703, 23.77601432800293, -16.70941162109375, 11.223644256591797, 14.043243408203125, 1.8725624084472656, -0.2760753631591797, 20.28496551513672, -2.475341796875, 1.9045181274414062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000216.npy"}
{"epoch": 0.6545454545454545, "step": 217, "batch_size": 128, "mean": 8.224449157714844, "std": 12.369223594665527, "min": -27.09123992919922, "p10": -6.9499870300292965, "median": 8.119396209716797, "p90": 24.93671703338623, "max": 35.30635452270508, "pos_frac": 0.7265625, "sample": [5.083240509033203, 10.398439407348633, -16.540847778320312, 0.9663734436035156, -0.060546875, -0.846160888671875, -2.0814132690429688, 33.06813049316406, 5.094926834106445, 15.207456588745117, 12.184467315673828, -11.827007293701172, 16.462268829345703, 5.698722839355469, -5.8873138427734375, 18.628767013549805, -6.328910827636719, 2.5823440551757812, 11.949151992797852, -7.437234878540039, -10.195999145507812, -7.0896453857421875, 0.0170745849609375, 35.30635452270508, 0.05663299560546875, 24.993013381958008, -1.7674942016601562, 6.455955505371094, 15.559783935546875, -6.688346862792969, 3.7708740234375, 13.579610824584961, -2.217924118041992, -5.392425537109375, 2.9739532470703125, 10.221786499023438, 3.5964813232421875, -4.879051208496094, 27.44721221923828, -6.65570068359375, -19.40158462524414, 3.423738479614258, 10.856826782226562, 4.524875640869141, 24.042741775512695, 12.461158752441406, 0.4375438690185547, 13.062889099121094, 28.820037841796875, 24.369693756103516, 23.684249877929688, 8.6455078125, 19.015960693359375, -10.693534851074219, -1.3320541381835938, 13.16501235961914, 17.710145950317383, 8.485343933105469, 27.058650970458984, 19.952491760253906, 26.224021911621094, 2.065692901611328, 30.30766487121582, 2.7923583984375, -17.521041870117188, 11.27117919921875, 11.582359313964844, -6.7976837158203125, 16.514129638671875, 26.71164894104004, -7.348012924194336, 18.53564453125, 21.98474884033203, 10.514474868774414, 16.620941162109375, 23.919519424438477, 7.60955810546875, -1.178548812866211, 30.93963623046875, -0.6308212280273438, 16.279983520507812, 30.59888458251953, 29.64881134033203, 13.329910278320312, -2.726970672607422, 6.759880065917969, 23.442977905273438, -6.64204216003418, 11.44586181640625, 17.512983322143555, 2.6058692932128906, 26.78689193725586, -9.334388732910156, 5.0065765380859375, 4.81494140625, 17.19635772705078, -11.904800415039062, 10.863914489746094, -0.102569580078125, -6.948764801025391, -27.09123992919922, 10.250518798828125, 17.032669067382812, -2.0924835205078125, -6.952838897705078, 7.583984375, 24.91259002685547, 12.298088073730469, 2.4969940185546875, 14.869548797607422, 2.417194366455078, 12.498489379882812, 22.4345760345459, -4.14666748046875, 8.916717529296875, 12.587182998657227, 4.836006164550781, 7.753448486328125, 1.3472766876220703, 17.94995880126953, 21.06317901611328, 19.82988739013672, 6.5335845947265625, -0.38475990295410156, 9.279499053955078, 19.866928100585938, 15.158380508422852, 5.032228469848633], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000217.npy"}
{"epoch": 0.6575757575757576, "step": 218, "batch_size": 128, "mean": 6.00943660736084, "std": 11.02207088470459, "min": -19.833518981933594, "p10": -9.421382522583007, "median": 6.187505722045898, "p90": 20.62817687988281, "max": 33.134918212890625, "pos_frac": 0.7265625, "sample": [15.588756561279297, 11.064435958862305, -0.6950187683105469, 6.443397521972656, -5.833913803100586, 2.1670913696289062, 1.6392669677734375, 1.7370796203613281, 3.2627716064453125, -13.507888793945312, -6.461402893066406, 23.034957885742188, 11.345001220703125, -2.4654674530029297, 20.05232048034668, 23.112655639648438, -16.02611541748047, 10.542638778686523, 15.831008911132812, 13.420196533203125, -9.954154968261719, -4.5803680419921875, 11.005056381225586, 9.507675170898438, -8.290996551513672, 25.00848388671875, -5.504444122314453, 5.8955078125, -5.173101425170898, 8.764312744140625, 7.910011291503906, 0.18435287475585938, 13.205642700195312, 1.5263862609863281, 4.203804016113281, 17.042442321777344, -4.412078857421875, 7.8492584228515625, 18.545408248901367, 14.186927795410156, 10.883575439453125, 2.5363922119140625, -0.15731048583984375, -3.1983566284179688, 25.672821044921875, 33.134918212890625, 1.0784759521484375, 18.9315185546875, 10.703353881835938, 24.046279907226562, 25.841018676757812, 9.307525634765625, 1.0244712829589844, 9.970712661743164, 0.8030624389648438, -2.1198482513427734, 10.460845947265625, -12.780731201171875, -19.833518981933594, 6.1095733642578125, -3.6860904693603516, -9.28192138671875, 13.108718872070312, 8.180892944335938, 8.550495147705078, -0.6941413879394531, 16.532798767089844, 0.10330390930175781, 3.4980850219726562, 0.9645442962646484, 7.484235763549805, 16.99099349975586, 24.472900390625, 15.524940490722656, -13.798965454101562, 10.478538513183594, 7.451606750488281, 3.8741989135742188, -10.467109680175781, 4.473670959472656, -17.665252685546875, 7.446533203125, -4.028844833374023, 0.4725799560546875, 15.737136840820312, -9.74679183959961, -0.6414413452148438, -2.706268310546875, 10.623069763183594, 17.21331787109375, 6.265438079833984, 1.4709148406982422, 21.7744140625, 13.405914306640625, -2.3921546936035156, 5.55743408203125, -12.276321411132812, 6.544548034667969, 11.871295928955078, 24.177898406982422, 5.663444519042969, 19.613388061523438, 4.995553970336914, 26.89156723022461, 8.574810028076172, 0.9137039184570312, 20.136932373046875, 7.47462272644043, 13.68414306640625, 5.92631721496582, 8.281539916992188, -19.3773193359375, 17.74462890625, 5.373615264892578, 4.623115539550781, 4.944053649902344, -0.11027908325195312, 7.352409362792969, -15.333580017089844, 19.289566040039062, 22.011425018310547, -8.616279602050781, 9.421859741210938, 0.3610363006591797, -1.6065216064453125, 24.52240753173828, -10.116649627685547, 16.116600036621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000218.npy"}
{"epoch": 0.6606060606060606, "step": 219, "batch_size": 128, "mean": 7.56790018081665, "std": 12.319698333740234, "min": -31.189010620117188, "p10": -5.712069702148438, "median": 6.738089561462402, "p90": 23.298120117187498, "max": 41.23682403564453, "pos_frac": 0.6953125, "sample": [30.464080810546875, 22.086570739746094, 14.587020874023438, 4.4394073486328125, 9.116462707519531, 0.5537643432617188, 22.88555908203125, 10.119842529296875, 14.534584045410156, 11.533332824707031, 14.256217956542969, -15.253562927246094, 27.219219207763672, 41.23682403564453, 21.893157958984375, -5.200477600097656, 1.413177490234375, -4.945457458496094, 4.251352310180664, 25.695337295532227, 11.274345397949219, -5.84039306640625, -1.1324996948242188, 2.1805572509765625, 14.079597473144531, -6.113555908203125, 5.914344787597656, 13.55272102355957, 9.649436950683594, 35.59454345703125, 7.935455322265625, 6.963993072509766, 36.44226837158203, 3.4092750549316406, 16.084381103515625, 4.1320648193359375, -31.189010620117188, 7.040199279785156, -9.670196533203125, -3.8048744201660156, -2.545818328857422, 16.375869750976562, 17.7608642578125, -16.737398147583008, -14.704141616821289, -12.075515747070312, -3.3804779052734375, -1.8487052917480469, 10.803565979003906, -1.3494091033935547, -2.0342540740966797, 16.17186737060547, 7.056709289550781, 25.367605209350586, -0.8063888549804688, 5.925403594970703, 15.21649169921875, 17.235122680664062, -6.569427490234375, 0.8626708984375, -2.5718765258789062, -4.015739440917969, -1.3800621032714844, 19.2806396484375, 29.2166748046875, -3.9487228393554688, 26.707794189453125, -3.2503299713134766, 1.3360786437988281, -1.8392410278320312, 2.1576061248779297, 14.85366439819336, 16.742382049560547, -16.972665786743164, 6.512186050415039, 16.796905517578125, 15.564971923828125, 12.359123229980469, -6.595802307128906, 8.746124267578125, 22.080833435058594, 0.8394565582275391, -2.5599212646484375, 13.519744873046875, 9.320137023925781, -2.6330642700195312, 9.000894546508789, 7.793157577514648, -5.657073974609375, 2.3144760131835938, -4.6684112548828125, -1.9894180297851562, 5.300504684448242, 23.597320556640625, 16.650962829589844, -14.671127319335938, 1.9396400451660156, 33.00083923339844, 4.666568756103516, 30.477882385253906, -3.445781707763672, -0.6538543701171875, 16.759565353393555, 1.6898307800292969, 8.377029418945312, 15.897228240966797, 11.350261688232422, 4.453727722167969, 18.05571937561035, 26.95159149169922, 10.590347290039062, -1.9745025634765625, 21.280719757080078, -2.1995391845703125, 4.930812835693359, 23.169891357421875, 14.388599395751953, 6.059173583984375, 11.21441650390625, -0.2620124816894531, 21.85772705078125, 6.0195465087890625, -8.188407897949219, 17.01732635498047, 11.578872680664062, 2.5938167572021484, 4.226299285888672, 10.816034317016602], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000219.npy"}
{"epoch": 0.6636363636363637, "step": 220, "batch_size": 128, "mean": 7.632498741149902, "std": 10.960469245910645, "min": -18.131988525390625, "p10": -4.768729400634766, "median": 5.989143371582031, "p90": 24.51206932067871, "max": 32.2559814453125, "pos_frac": 0.7265625, "sample": [5.809120178222656, -1.3070220947265625, 2.5070228576660156, -3.6043167114257812, 8.706024169921875, 7.276576995849609, 1.8371524810791016, 3.7445526123046875, 26.97296905517578, 2.5451812744140625, 22.677230834960938, -3.007568359375, 3.4933853149414062, 13.585918426513672, 4.1594390869140625, -5.201468467712402, -4.4414215087890625, -4.8643951416015625, 11.041589736938477, -0.24275970458984375, 0.10741806030273438, 8.72763442993164, -18.131988525390625, -0.8975563049316406, 6.9131317138671875, 0.4366283416748047, 9.450996398925781, 0.5799102783203125, 7.571624755859375, 19.596145629882812, 5.81541633605957, 25.814132690429688, -0.2342376708984375, 25.226913452148438, 25.22760009765625, 4.26909065246582, 20.907207489013672, 4.014656066894531, 14.583206176757812, 16.143173217773438, 12.70888900756836, 8.008430480957031, 15.589179992675781, -4.986541748046875, 3.725170135498047, 26.347309112548828, -6.494443893432617, 26.07079315185547, -15.154373168945312, 0.6929168701171875, 18.110794067382812, 13.687850952148438, 16.17865562438965, 19.738834381103516, 6.1133575439453125, 24.01136016845703, -8.5108642578125, -2.8143157958984375, -4.1002960205078125, -4.3770904541015625, 9.37841796875, 14.024734497070312, 1.4727191925048828, -4.727729797363281, -0.7230224609375, 5.7876739501953125, 19.624908447265625, 27.7506103515625, 27.87086296081543, 32.2559814453125, 23.736175537109375, 1.054412841796875, 20.00088882446289, 4.073036193847656, -16.59600067138672, 2.2567501068115234, 11.040733337402344, -0.23497581481933594, 1.4087448120117188, 13.570941925048828, 11.487289428710938, -0.1005859375, -0.45513916015625, -0.5240478515625, 2.8331565856933594, -14.670459747314453, 8.084121704101562, 18.312257766723633, 14.856658935546875, -0.3937835693359375, -0.6662254333496094, -8.283109664916992, 15.280933380126953, 0.3293914794921875, 6.8323516845703125, 6.053260803222656, 25.871719360351562, 8.1697998046875, -3.6882781982421875, 27.684722900390625, -9.043571472167969, 16.16497039794922, -0.09619903564453125, 5.925025939941406, 12.049980163574219, 14.209938049316406, 12.680526733398438, 15.572479248046875, 3.6305313110351562, 3.1753387451171875, 1.8906135559082031, 4.357505798339844, 8.708450317382812, 23.71149253845215, 8.008460998535156, 19.857364654541016, -6.461982727050781, -5.35791015625, 30.085479736328125, 6.807407379150391, -1.978515625, -3.724567413330078, 20.745155334472656, 6.2494049072265625, 3.482574462890625, 26.346817016601562, 24.205707550048828, 13.363456726074219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000220.npy"}
{"epoch": 0.6666666666666666, "step": 221, "batch_size": 128, "mean": 7.47598934173584, "std": 10.569465637207031, "min": -21.367225646972656, "p10": -5.782778930664062, "median": 6.650707244873047, "p90": 21.127059936523438, "max": 29.708187103271484, "pos_frac": 0.7734375, "sample": [17.231075286865234, 14.050655364990234, 22.29041290283203, 24.086402893066406, 4.878353118896484, 5.908384323120117, 18.845443725585938, 1.0830154418945312, 18.25882339477539, 4.713447570800781, -11.412590026855469, 15.056640625, 9.124954223632812, 15.189468383789062, 13.625457763671875, 17.995704650878906, 0.5207672119140625, 14.95456314086914, 6.8675689697265625, -2.9373626708984375, -2.7976531982421875, 19.530426025390625, 19.770729064941406, 5.419090270996094, 11.765716552734375, -8.681652069091797, 21.15216064453125, -4.903841018676758, 20.860153198242188, 25.712860107421875, -6.618120193481445, 11.824302673339844, 11.036773681640625, -3.6446189880371094, -21.367225646972656, 10.360115051269531, 0.4125213623046875, 24.74053192138672, 5.9656524658203125, -6.12152099609375, -1.5062141418457031, 26.52054214477539, 1.3617324829101562, 0.23212432861328125, 28.407630920410156, -5.637603759765625, 1.3631591796875, 6.032279968261719, 12.485214233398438, 20.722152709960938, 3.8333473205566406, 11.884666442871094, 13.13232421875, 3.4258880615234375, 11.273929595947266, -1.100046157836914, 3.6590805053710938, 7.117500305175781, 6.3945159912109375, 2.397369384765625, -1.5313606262207031, 8.977394104003906, 13.510051727294922, -0.3096466064453125, 1.2594985961914062, 12.858428955078125, 0.28924560546875, -6.883209228515625, 10.814579010009766, 21.116302490234375, 28.39862060546875, 23.756147384643555, -1.7427864074707031, 2.8574981689453125, 5.8399200439453125, -2.7455196380615234, -0.6883621215820312, 10.247053146362305, 13.187980651855469, 0.16854095458984375, 14.684577941894531, 25.3326416015625, 28.23870849609375, -2.9523487091064453, 2.5948867797851562, 1.6824913024902344, 29.708187103271484, 12.34079360961914, -14.5374755859375, 8.824859619140625, 8.7987060546875, 5.5382080078125, -8.813926696777344, 10.309959411621094, 13.994918823242188, -8.13909912109375, 6.29411506652832, 10.305438995361328, 5.22125244140625, 16.68871307373047, 20.35430145263672, -12.32794189453125, 7.719078063964844, 10.16169548034668, 15.223312377929688, 14.877483367919922, -18.12244415283203, 10.276884078979492, 6.433845520019531, 16.07874298095703, 0.6610145568847656, 4.0772705078125, -2.1219539642333984, -4.5806427001953125, 4.314586639404297, 2.7232627868652344, 0.5528488159179688, -14.244556427001953, -7.58837890625, 22.65894317626953, 12.838956832885742, 4.011962890625, -4.3586578369140625, 20.69327163696289, 14.1463623046875, 18.188934326171875, 4.619823455810547, 7.415464401245117], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000221.npy"}
{"epoch": 0.6696969696969697, "step": 222, "batch_size": 128, "mean": 6.207047462463379, "std": 12.246824264526367, "min": -28.244949340820312, "p10": -6.917403221130371, "median": 4.198724746704102, "p90": 20.912787628173827, "max": 43.819549560546875, "pos_frac": 0.6796875, "sample": [-1.1794548034667969, 17.595901489257812, 4.822662353515625, 24.88818359375, 3.5179977416992188, 21.507049560546875, 16.664268493652344, -8.558616638183594, 0.8580493927001953, 9.234352111816406, 7.857975006103516, 19.526302337646484, 43.819549560546875, -14.012998580932617, -3.2290573120117188, 17.58486557006836, 10.479141235351562, 16.492298126220703, 6.855194091796875, 17.33989715576172, 16.798898696899414, 20.10265350341797, 15.895051956176758, 27.18743133544922, 2.9007606506347656, 0.4080829620361328, 1.3970184326171875, -17.150169372558594, -0.15008544921875, 4.4087066650390625, -0.42002105712890625, -4.957618713378906, 10.288551330566406, 2.014789581298828, -2.4939422607421875, -4.294460296630859, 9.228866577148438, 13.470155715942383, 2.471118927001953, 20.658103942871094, 1.324289321899414, 34.670265197753906, 26.845779418945312, 4.017528533935547, 30.002456665039062, 24.6720027923584, 11.61428451538086, 13.209293365478516, 11.447654724121094, 13.851436614990234, -2.072652816772461, 9.817115783691406, 18.30590057373047, 19.896493911743164, -1.6798858642578125, 3.2547149658203125, -18.88934326171875, -9.920326232910156, 30.317367553710938, 4.570985794067383, -0.2824668884277344, -6.912467956542969, 11.060234069824219, 28.068389892578125, 19.063142776489258, -1.8875160217285156, -7.041587829589844, -2.637796401977539, -4.38873291015625, 7.7310028076171875, 9.620361328125, -5.5686798095703125, 8.216276168823242, 3.2071075439453125, 3.125904083251953, -6.1320953369140625, 28.033416748046875, 14.478172302246094, 3.267547607421875, -2.363922119140625, 7.286811828613281, -2.6740341186523438, 3.8123703002929688, 23.357677459716797, 4.298377990722656, 18.88055419921875, -0.7320480346679688, -22.201885223388672, 20.050296783447266, 2.0194549560546875, 4.908843994140625, 10.286956787109375, 1.3791656494140625, 5.090660095214844, 10.991874694824219, -1.8242950439453125, -5.923374176025391, 15.348630905151367, 9.322750091552734, 9.995796203613281, -4.645729064941406, -19.072479248046875, 1.485260009765625, 7.057098388671875, -7.523002624511719, 0.8898468017578125, 0.6865062713623047, -4.377046585083008, -0.7874374389648438, -3.20867919921875, 19.5025634765625, 2.8855152130126953, 4.727783203125, 30.2164306640625, -28.244949340820312, -9.980140686035156, 1.28472900390625, 19.005550384521484, 6.34039306640625, 20.113739013671875, -6.183998107910156, 4.099071502685547, 2.591949462890625, -6.928918838500977, -14.946090698242188, 14.999862670898438, -1.7415657043457031, -1.1579036712646484], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000222.npy"}
{"epoch": 0.6727272727272727, "step": 223, "batch_size": 128, "mean": 9.515880584716797, "std": 10.732564926147461, "min": -12.095230102539062, "p10": -3.7631633758544916, "median": 9.913297653198242, "p90": 23.40355396270752, "max": 32.6734619140625, "pos_frac": 0.796875, "sample": [8.85824203491211, 28.465709686279297, 6.199554443359375, -2.0031280517578125, -12.095230102539062, 17.857139587402344, 21.967178344726562, 9.052688598632812, -10.435012817382812, 1.8829193115234375, 0.653564453125, 11.489490509033203, 0.7111988067626953, 27.327957153320312, -11.961570739746094, 9.562667846679688, 11.121818542480469, 25.63446807861328, 31.80529022216797, 10.954109191894531, 20.881370544433594, 20.792009353637695, 7.668485641479492, 11.629981994628906, 23.431732177734375, -1.2389507293701172, 5.356113433837891, 5.965606689453125, 1.8563385009765625, 2.2971267700195312, 11.476119995117188, 9.373825073242188, 12.19927978515625, 11.177635192871094, -3.6343040466308594, -11.555486679077148, 6.357366561889648, 4.59259033203125, -8.920516967773438, 19.49771499633789, 0.8939743041992188, 20.081581115722656, 10.071380615234375, 17.597064971923828, 2.9844818115234375, -4.063835144042969, -7.37005615234375, -6.223932266235352, 17.806011199951172, 17.942909240722656, 10.632301330566406, 25.8076171875, 10.575454711914062, 19.790170669555664, 30.14397430419922, 1.8616600036621094, 23.370527267456055, 20.473989486694336, 2.951519012451172, 7.246406555175781, 9.913686752319336, 10.297122955322266, 25.236629486083984, -0.18565750122070312, -0.5125789642333984, -4.6899871826171875, -10.100076675415039, 16.066864013671875, 5.9195709228515625, 11.546890258789062, 4.365303039550781, -3.252655029296875, 4.927825927734375, 8.88786506652832, 17.659942626953125, 8.989402770996094, 1.7036914825439453, 22.125173568725586, -2.1400299072265625, 14.409934997558594, -0.9872665405273438, 0.876312255859375, -5.57750129699707, -7.533090591430664, 7.826225280761719, -2.444965362548828, 9.969261169433594, 11.532806396484375, -0.22640037536621094, 3.095033645629883, 17.034912109375, 1.9974822998046875, 10.28277587890625, 13.280094146728516, 16.85660743713379, 21.311214447021484, 26.641571044921875, 22.42481231689453, 11.08035659790039, 12.505706787109375, 4.4835968017578125, 19.560346603393555, 12.727304458618164, 19.20153045654297, 23.391477584838867, 9.912908554077148, 0.09703826904296875, 24.123340606689453, 21.598915100097656, -7.3182220458984375, 20.2235164642334, 1.0813007354736328, 6.042839050292969, 32.6734619140625, 21.591419219970703, -3.423715591430664, 30.608749389648438, 0.2786426544189453, 28.331222534179688, 22.705198287963867, 8.900505065917969, 11.168920516967773, -1.9975318908691406, 14.947853088378906, -2.838033676147461, 2.0884437561035156, 12.554084777832031, 15.37639045715332], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000223.npy"}
{"epoch": 0.6757575757575758, "step": 224, "batch_size": 128, "mean": 8.142193794250488, "std": 11.977149963378906, "min": -30.461776733398438, "p10": -7.393431282043456, "median": 7.052921295166016, "p90": 23.977857971191405, "max": 35.386146545410156, "pos_frac": 0.8046875, "sample": [24.475418090820312, 22.452842712402344, 11.864278793334961, 2.5518264770507812, -9.429466247558594, -7.163810729980469, 23.764617919921875, -0.570953369140625, 3.159038543701172, 21.327425003051758, 5.447589874267578, 15.40814208984375, 4.083610534667969, -8.287406921386719, 2.101165771484375, -7.039787292480469, 8.456300735473633, 27.438201904296875, 14.428688049316406, 13.717086791992188, 28.916946411132812, 9.584564208984375, 21.230796813964844, 1.0000648498535156, 11.304512023925781, 18.842639923095703, 2.920337677001953, 12.605987548828125, -11.74801254272461, 25.32141876220703, 8.135574340820312, 15.681076049804688, -10.035018920898438, 0.863525390625, 6.519157409667969, 12.329963684082031, 2.8272705078125, 6.028350830078125, 17.955093383789062, 25.284595489501953, 2.8538131713867188, 23.129608154296875, 14.761123657226562, 16.60906982421875, 13.3193359375, 2.1628570556640625, 15.013664245605469, 11.090339660644531, 12.562437057495117, 5.2329559326171875, 22.471633911132812, -9.208480834960938, 0.7138347625732422, -10.6727294921875, 1.045318603515625, 26.250301361083984, 12.37176513671875, -30.461776733398438, 6.231639862060547, 35.386146545410156, 27.003829956054688, -1.44757080078125, 15.2271728515625, 9.620920181274414, 11.7213134765625, -14.106010437011719, 23.122770309448242, 5.2246551513671875, 6.565391540527344, 4.409965515136719, 2.1494598388671875, 12.327205657958984, -4.870426177978516, 23.28321075439453, 12.833316802978516, -2.5244712829589844, -5.338539123535156, 24.512929916381836, -2.2780532836914062, 10.490020751953125, 2.4267539978027344, 5.3394775390625, 1.2447052001953125, 4.913900375366211, -13.608192443847656, 9.785146713256836, 31.555206298828125, -7.92921257019043, 12.771259307861328, -6.249696731567383, 1.0411262512207031, 7.972103118896484, 5.990409851074219, 18.488998413085938, -2.5186004638671875, 25.463531494140625, 7.5404510498046875, 4.0104217529296875, 17.06414794921875, 14.673912048339844, 5.4161834716796875, 0.415069580078125, 13.018753051757812, 25.943466186523438, 11.092954635620117, 1.6782398223876953, 18.466014862060547, 5.878047943115234, 4.743690490722656, -11.29693603515625, -4.146350860595703, 2.4038238525390625, 2.9832706451416016, -22.24683952331543, 15.772109985351562, 0.97283935546875, 5.53155517578125, 18.574745178222656, 11.753692626953125, 21.520835876464844, -0.8614959716796875, -21.547378540039062, 23.507946014404297, 16.988937377929688, 16.445541381835938, 1.6914100646972656, 2.3880615234375, 32.59309005737305], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000224.npy"}
{"epoch": 0.6787878787878788, "step": 225, "batch_size": 128, "mean": 4.502458095550537, "std": 10.561274528503418, "min": -21.16603660583496, "p10": -7.857348632812498, "median": 4.021116256713867, "p90": 19.131435775756835, "max": 29.975799560546875, "pos_frac": 0.671875, "sample": [-2.3513641357421875, 2.0171051025390625, -10.772315979003906, 23.886993408203125, 4.5868072509765625, -4.669429779052734, 3.1606903076171875, 8.920318603515625, 28.779953002929688, 5.313085556030273, 1.269195556640625, 4.88067626953125, 4.990211486816406, 10.609991073608398, 17.631690979003906, 26.163890838623047, -2.3178863525390625, -21.09481430053711, 11.564956665039062, 3.9320297241210938, 23.321426391601562, -3.776275634765625, 0.7216110229492188, 5.146522521972656, 1.9394302368164062, 11.11090087890625, 7.0367279052734375, -1.6290359497070312, 6.422264099121094, 3.6167373657226562, -8.940460205078125, 11.8135986328125, 0.7572402954101562, 19.312820434570312, 14.054515838623047, 13.765995025634766, 1.4760894775390625, -2.3809661865234375, -3.234344482421875, 12.922874450683594, -13.125112533569336, 6.1707305908203125, 16.03891372680664, -5.623748779296875, 5.4990386962890625, 3.607633590698242, 5.236785888671875, -13.597724914550781, -3.230947494506836, -9.200729370117188, -7.393157958984375, 0.2050018310546875, -6.665218353271484, 6.451896667480469, -7.337169647216797, 13.487335205078125, 12.260700225830078, -16.766036987304688, 11.569526672363281, -1.7599830627441406, 5.5276641845703125, -13.781768798828125, -3.464641571044922, -5.537933349609375, -0.32303619384765625, -6.880962371826172, 2.980377197265625, -0.8767318725585938, 10.67254638671875, 19.053699493408203, 1.7126007080078125, 1.886627197265625, 3.7145004272460938, 10.261611938476562, 12.556983947753906, 29.975799560546875, -6.9555511474609375, 12.692359924316406, 1.78814697265625, -6.195648193359375, 2.107269287109375, 9.722625732421875, -21.16603660583496, 21.18126678466797, 11.960792541503906, -1.4642372131347656, 13.531890869140625, 7.642023086547852, 8.205612182617188, -9.326934814453125, 3.9591331481933594, -6.513561248779297, 4.231285095214844, 9.099851608276367, 28.910903930664062, 4.083099365234375, 15.613937377929688, 24.272933959960938, 18.8399658203125, 4.269655227661133, 9.817737579345703, 2.4260826110839844, 1.0087203979492188, 19.834991455078125, -1.7865142822265625, -3.2143936157226562, -1.1769638061523438, 19.595062255859375, 17.290771484375, -10.119588851928711, 9.413219451904297, 22.23779296875, 9.191543579101562, 3.0894241333007812, 14.422679901123047, -20.792747497558594, 13.90859603881836, 3.219928741455078, 8.998855590820312, 5.9656982421875, -13.099868774414062, 10.239187240600586, -1.5350341796875, 19.35921859741211, -0.1774749755859375, 4.8281707763671875, -5.1421661376953125, -5.243572235107422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000225.npy"}
{"epoch": 0.6818181818181818, "step": 226, "batch_size": 128, "mean": 5.97282075881958, "std": 10.59555435180664, "min": -21.254135131835938, "p10": -6.332225799560546, "median": 5.721456527709961, "p90": 19.80321159362793, "max": 34.73011779785156, "pos_frac": 0.6953125, "sample": [15.437149047851562, -0.181976318359375, 7.029319763183594, 3.0621337890625, 20.567489624023438, 0.0773468017578125, -20.516143798828125, 14.36883544921875, 8.596359252929688, -1.4483680725097656, 2.2106876373291016, 6.631261825561523, 0.7744617462158203, -8.924636840820312, -6.697837829589844, 34.73011779785156, -3.724241256713867, 1.6977214813232422, 12.61024284362793, -0.2524261474609375, 20.82143211364746, 1.9759140014648438, -1.8923835754394531, 5.7530517578125, 21.339920043945312, 8.272125244140625, 12.14068603515625, 5.122505187988281, 6.351768493652344, -3.560352325439453, -5.287528991699219, -6.637306213378906, 2.4991703033447266, 12.6953125, 13.596633911132812, 8.444435119628906, 12.009742736816406, 2.3698883056640625, 13.616249084472656, 9.246795654296875, 17.68157958984375, -10.614456176757812, 10.195037841796875, -16.435348510742188, -5.170234680175781, -0.7787761688232422, 9.579246520996094, 16.01531982421875, 0.22510337829589844, 3.5365219116210938, 0.9929580688476562, 6.7191314697265625, 10.570001602172852, 2.4141082763671875, 3.1740798950195312, 19.672637939453125, 19.140222549438477, -0.8716812133789062, -3.4162368774414062, 8.789443969726562, 24.57798194885254, -1.6968650817871094, -1.8743133544921875, -5.640449523925781, 7.268890380859375, -2.0754241943359375, -0.24126625061035156, 14.673236846923828, -6.696250915527344, 13.529029846191406, 5.689861297607422, 24.606552124023438, 30.161773681640625, 0.94970703125, 11.10760498046875, 11.853759765625, 5.334720611572266, -0.4624176025390625, 17.931060791015625, 3.019439697265625, -5.306186676025391, 25.481689453125, -3.1996231079101562, 16.173667907714844, 10.951480865478516, 6.0435943603515625, -6.20147705078125, 4.786527633666992, 2.8512496948242188, -3.870330810546875, 6.739370346069336, -0.4996299743652344, 9.830528259277344, 12.826919555664062, -5.213508605957031, -14.555007934570312, 15.483867645263672, 22.99871826171875, 4.962505340576172, 23.787742614746094, -16.361114501953125, 18.815536499023438, 16.31140899658203, 10.888259887695312, 1.2054481506347656, -3.0152225494384766, 8.120643615722656, 6.8881683349609375, 8.428810119628906, 1.529336929321289, -2.1056976318359375, 29.806228637695312, 16.62535858154297, -6.9026641845703125, 12.020462036132812, 5.9034881591796875, 20.10788345336914, -12.811588287353516, 25.975074768066406, 9.168624877929688, 3.2479400634765625, 13.100078582763672, 15.209383010864258, -1.6218795776367188, -8.412956237792969, 18.061370849609375, -21.254135131835938, 3.159881591796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000226.npy"}
{"epoch": 0.6848484848484848, "step": 227, "batch_size": 128, "mean": 7.3904523849487305, "std": 11.427128791809082, "min": -28.34947967529297, "p10": -6.071503639221191, "median": 6.87681770324707, "p90": 21.100276947021484, "max": 37.20216369628906, "pos_frac": 0.765625, "sample": [14.732803344726562, 13.522645950317383, -0.10652923583984375, 1.446685791015625, 22.36962890625, 7.899269104003906, 0.12494659423828125, 11.478073120117188, 14.232681274414062, 8.906768798828125, 26.16455078125, 0.6971206665039062, 0.13291549682617188, -7.650382995605469, 1.7591094970703125, 6.794746398925781, 5.6781158447265625, -13.573455810546875, -6.120883941650391, 1.1957836151123047, 16.139585494995117, 4.98040771484375, -18.604904174804688, 16.440330505371094, 11.862159729003906, 5.917137145996094, 7.605690002441406, 1.4351119995117188, 6.958889007568359, 9.140924453735352, 7.99609375, -8.344749450683594, 17.872779846191406, 0.2331695556640625, -20.515335083007812, 2.6592254638671875, 17.60406494140625, -0.15190505981445312, 17.059890747070312, 20.5435791015625, -0.19361495971679688, -18.208900451660156, -11.249736785888672, 21.020814895629883, 0.37870025634765625, 18.462039947509766, 13.96490478515625, 3.611055374145508, 3.9738006591796875, 28.872703552246094, -7.747550964355469, 2.3215103149414062, -1.528350830078125, 7.011543273925781, -4.184337615966797, 4.7536468505859375, 1.1190052032470703, 25.990798950195312, 23.749465942382812, 4.9200439453125, 18.22107696533203, 20.054183959960938, 24.051666259765625, 8.148529052734375, 21.285688400268555, -9.141162872314453, 1.7876472473144531, 13.526237487792969, -28.34947967529297, -1.7429275512695312, 15.741065979003906, 5.1122283935546875, 19.446474075317383, 19.44865608215332, 14.409744262695312, 12.717430114746094, -4.427448272705078, -11.741325378417969, 0.7879066467285156, 20.890827178955078, 29.758224487304688, 4.234809875488281, 15.444343566894531, 2.908536911010742, 37.20216369628906, -8.131698608398438, 26.040786743164062, 9.795001983642578, -1.5327033996582031, 10.248847961425781, 17.276931762695312, 7.847438812255859, 16.49195098876953, 18.26068878173828, 8.067314147949219, 8.808860778808594, 3.230297088623047, -2.5654678344726562, 12.441539764404297, 15.192537307739258, 7.039237976074219, -5.121063232421875, 19.398361206054688, 7.2434844970703125, 26.43543243408203, 37.078887939453125, -3.383108139038086, 11.552841186523438, 2.7967071533203125, 5.346534729003906, -0.743865966796875, 7.952392578125, 2.565509796142578, -0.06640625, -1.0554656982421875, 0.6751194000244141, -0.13529396057128906, 6.367462158203125, 20.450088500976562, 9.513168334960938, 0.20354652404785156, 17.237075805664062, 3.2154006958007812, 22.550750732421875, 14.62081527709961, 6.407554626464844, -0.9186325073242188, -6.05034065246582], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000227.npy"}
{"epoch": 0.6878787878787879, "step": 228, "batch_size": 128, "mean": 7.773309707641602, "std": 10.972126007080078, "min": -18.020187377929688, "p10": -3.2537303924560543, "median": 5.788609504699707, "p90": 22.700299835205076, "max": 38.20994186401367, "pos_frac": 0.7578125, "sample": [21.302032470703125, 4.0739898681640625, 10.893295288085938, 22.150421142578125, 4.295982360839844, 2.3008365631103516, 17.229476928710938, 32.26166915893555, 3.9613113403320312, -6.076385498046875, 3.4456710815429688, -3.5811538696289062, 22.995147705078125, 2.591920852661133, 20.1005859375, -2.2875518798828125, 22.618854522705078, 3.34619140625, 7.20673942565918, 4.487140655517578, -15.331951141357422, 18.70337677001953, 22.890338897705078, 23.591339111328125, 3.8697357177734375, -1.6421051025390625, 38.20994186401367, 5.0460052490234375, 3.9191360473632812, 18.0064640045166, -0.83636474609375, 0.732330322265625, -1.3397064208984375, 1.2680740356445312, 10.78701400756836, 0.5393524169921875, 7.084053039550781, 13.313339233398438, 14.910648345947266, 35.8863525390625, -1.1952190399169922, 20.240936279296875, 5.011787414550781, 17.815452575683594, 22.468841552734375, 3.7865447998046875, 5.583118438720703, 6.2852630615234375, -0.9951629638671875, 1.0083236694335938, 11.263236999511719, -18.020187377929688, 14.234235763549805, 22.052154541015625, 11.164230346679688, 8.468833923339844, 5.682464599609375, -1.905975341796875, -2.0867156982421875, 7.666374206542969, 4.653190612792969, 8.180980682373047, 9.274389266967773, 34.0396728515625, 17.230850219726562, -5.0717315673828125, -0.44756507873535156, 14.7440185546875, 27.111724853515625, 0.4747161865234375, -3.4650917053222656, 12.35247802734375, -1.7125778198242188, 12.689567565917969, -1.773733139038086, 9.998725891113281, -14.953376770019531, 8.446952819824219, 10.482536315917969, -5.961677551269531, 5.768320083618164, -1.4473800659179688, 5.993804931640625, 12.928544998168945, 3.5934791564941406, -9.668136596679688, 12.183372497558594, 3.588052749633789, -0.21008682250976562, 1.2993888854980469, 26.699783325195312, 24.108139038085938, 1.6599674224853516, 23.17169189453125, 12.908233642578125, 12.437103271484375, -1.6995658874511719, 22.959943771362305, 4.3270111083984375, 11.710205078125, 20.41539764404297, 10.732749938964844, 5.410484313964844, 6.587165832519531, 12.942350387573242, 2.6783199310302734, -14.382652282714844, -17.134735107421875, 5.042835235595703, -5.747154235839844, -0.6354293823242188, 5.80889892578125, -3.16314697265625, 8.109580993652344, 4.007352828979492, 14.294357299804688, -2.2293472290039062, -14.705207824707031, 22.215347290039062, 11.6204833984375, 23.445053100585938, 1.9065399169921875, 8.916191101074219, 21.540000915527344, 8.95018196105957, -2.8416748046875, 1.0568313598632812, 20.08477020263672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000228.npy"}
{"epoch": 0.6909090909090909, "step": 229, "batch_size": 128, "mean": 6.661562919616699, "std": 9.642495155334473, "min": -19.31280517578125, "p10": -4.430708312988281, "median": 6.38908576965332, "p90": 21.885221481323242, "max": 30.066993713378906, "pos_frac": 0.7734375, "sample": [1.7812652587890625, -3.4017715454101562, -0.2511405944824219, 9.350349426269531, 8.424179077148438, -13.710113525390625, 4.97096061706543, 4.274394989013672, 20.669830322265625, 1.3086624145507812, 1.4647750854492188, 6.64886474609375, 23.48809051513672, 2.1498889923095703, 30.066993713378906, -0.0041351318359375, 0.550689697265625, 6.671173095703125, 9.809791564941406, -7.713348388671875, -6.9662933349609375, 12.870376586914062, 10.043067932128906, 1.4137535095214844, 6.615386962890625, 20.504053115844727, 11.109504699707031, 2.6269969940185547, 22.358245849609375, 4.802152633666992, 10.027189254760742, 21.983821868896484, -2.8900909423828125, 3.5166168212890625, 0.9853267669677734, 18.354827880859375, 7.995025634765625, 8.422073364257812, -7.732282638549805, -3.6299362182617188, -0.19295310974121094, 3.326984405517578, -1.9721183776855469, 12.557519912719727, -4.718757629394531, -2.3102149963378906, -4.257957458496094, -8.158958435058594, 14.052413940429688, -7.226844787597656, 26.728918075561523, 11.243011474609375, 1.1201362609863281, 3.3915252685546875, 11.177520751953125, 5.813287734985352, 9.554328918457031, 3.9486541748046875, 1.8013877868652344, 4.6807098388671875, 5.72674560546875, 8.983558654785156, 10.736360549926758, 0.5017204284667969, 6.666255950927734, 8.936592102050781, 4.616268157958984, 11.0487060546875, 4.9675140380859375, 16.733078002929688, 0.8935165405273438, -19.31280517578125, 9.477874755859375, 12.947454452514648, 6.604084014892578, 11.837539672851562, 12.190511703491211, 7.946384429931641, 6.517307281494141, -3.2134628295898438, 6.596103668212891, -9.696586608886719, 13.796836853027344, -1.0359115600585938, 5.9849395751953125, 9.615959167480469, 13.715988159179688, 22.92034149169922, 23.15442657470703, 25.141387939453125, -9.438333511352539, 19.6102294921875, 15.577529907226562, 11.079212188720703, -10.828590393066406, 28.330280303955078, -0.3480491638183594, 1.2352256774902344, 3.054912567138672, -3.6234130859375, -4.307258605957031, 11.234268188476562, 24.113792419433594, 1.4773845672607422, 15.60162353515625, 7.167259216308594, 28.7591552734375, 8.470634460449219, 6.2608642578125, 15.038108825683594, 23.58343505859375, 5.858650207519531, 11.745073318481445, 4.318511962890625, -13.422744750976562, 0.8633861541748047, 13.5736083984375, 2.219696044921875, 8.9451904296875, -0.8673534393310547, 6.917633056640625, 26.152297973632812, -6.6038970947265625, -2.82061767578125, 2.157573699951172, 9.594154357910156, 21.84296417236328, 5.64111328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000229.npy"}
{"epoch": 0.693939393939394, "step": 230, "batch_size": 128, "mean": 8.491394996643066, "std": 11.349309921264648, "min": -19.466651916503906, "p10": -4.975559616088867, "median": 7.632338523864746, "p90": 24.702071952819825, "max": 35.42552185058594, "pos_frac": 0.765625, "sample": [4.6883087158203125, 11.138429641723633, -19.466651916503906, 14.152961730957031, -2.0508460998535156, -12.485488891601562, 3.3894214630126953, 8.424346923828125, 1.8480224609375, 4.05169677734375, 10.99774169921875, 14.722757339477539, 10.692817687988281, 9.744277954101562, 5.641033172607422, 9.79632568359375, -2.871438980102539, 5.086877822875977, -5.737678527832031, 13.272428512573242, 5.16845703125, -9.793746948242188, 13.43342399597168, -2.6897201538085938, -12.507484436035156, 19.896743774414062, -2.484088897705078, -2.909473419189453, -8.773435592651367, 17.63733673095703, 10.830726623535156, -16.832107543945312, 7.547271728515625, 12.61109733581543, 19.34222412109375, -1.6911449432373047, 22.960189819335938, -12.499649047851562, 24.702089309692383, 28.269515991210938, 7.717405319213867, 15.620834350585938, 0.46396636962890625, 15.337043762207031, 9.251327514648438, 7.509513854980469, 27.56903076171875, 2.216156005859375, 4.698945999145508, 9.749557495117188, 15.689105987548828, 1.0170650482177734, 17.111358642578125, -2.0126876831054688, 11.518081665039062, 4.8500518798828125, 14.39309310913086, 5.3978424072265625, 30.252342224121094, 22.89971923828125, 33.69031524658203, 10.707847595214844, 24.293495178222656, 5.4823455810546875, 14.768890380859375, 17.19598388671875, 35.42552185058594, -4.9085235595703125, 5.414882659912109, 18.329917907714844, 14.442703247070312, 10.157127380371094, 7.417898178100586, 16.69390296936035, -5.131977081298828, 27.422582626342773, -0.845123291015625, 31.610145568847656, 7.232933044433594, 19.71831512451172, 5.407739639282227, 3.6396942138671875, -1.1950874328613281, 10.790878295898438, 3.7451705932617188, 0.7231521606445312, 7.1834564208984375, -2.3816490173339844, 18.758407592773438, -11.416366577148438, -0.34874725341796875, 4.036895751953125, 18.228546142578125, 8.82905387878418, -0.9417724609375, 25.388916015625, 30.016021728515625, 0.7105865478515625, -6.117622375488281, 9.680587768554688, 1.2787094116210938, 11.820388793945312, 0.0159454345703125, 7.219886779785156, 21.371761322021484, 16.938125610351562, 8.133247375488281, 2.5850830078125, 22.113399505615234, 4.041025161743164, 10.338844299316406, 24.702064514160156, 29.476242065429688, 17.364044189453125, -0.7559661865234375, 1.5720767974853516, 11.955772399902344, 26.445655822753906, -4.6308135986328125, -5.878486633300781, -3.0485153198242188, 28.53985595703125, -11.445083618164062, -3.688629150390625, 9.294586181640625, 5.635833740234375, 2.928253173828125, 20.21486473083496], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000230.npy"}
{"epoch": 0.696969696969697, "step": 231, "batch_size": 128, "mean": 8.621421813964844, "std": 11.205891609191895, "min": -21.00548553466797, "p10": -4.933973693847656, "median": 7.3018388748168945, "p90": 22.89862899780273, "max": 43.112060546875, "pos_frac": 0.7734375, "sample": [20.479900360107422, 12.308231353759766, 2.714385986328125, 6.0717620849609375, 23.54351806640625, 18.01355743408203, 12.078971862792969, 13.472726821899414, -3.4261322021484375, 19.59811019897461, 3.7814807891845703, 4.417211532592773, 7.801219940185547, 15.90469741821289, 16.593795776367188, 3.2102737426757812, -1.6754989624023438, 14.591365814208984, 15.674776077270508, 19.891845703125, 28.63262939453125, 5.4355621337890625, 6.043060302734375, 37.54075622558594, 13.67325210571289, 10.990997314453125, 20.235260009765625, 17.04128074645996, 3.7628211975097656, 1.8154792785644531, 23.902572631835938, -5.097801208496094, 11.560943603515625, -6.021650314331055, -0.0655975341796875, 3.503021240234375, 19.336029052734375, 3.479795455932617, 7.084329605102539, -2.8539505004882812, 1.9767265319824219, 18.879379272460938, -21.00548553466797, -6.213104248046875, -4.863761901855469, 6.008880615234375, 13.30141830444336, -2.0735092163085938, 11.060302734375, 21.83304214477539, 13.59365463256836, 6.609645843505859, 2.459484100341797, 2.737884521484375, 8.759010314941406, -4.104007720947266, 23.248207092285156, -5.137332916259766, -6.8338623046875, -5.392751693725586, 20.885841369628906, -3.170948028564453, 30.90133285522461, 1.2074737548828125, 26.352169036865234, 29.113967895507812, 19.87009620666504, 19.039031982421875, -2.1068649291992188, 5.755279541015625, 18.854476928710938, 0.6141223907470703, -2.2107620239257812, 7.7023162841796875, 10.982574462890625, 23.620941162109375, 10.05295181274414, 4.897972106933594, 11.97247314453125, 11.198204040527344, 3.6382293701171875, 17.856224060058594, 25.8211669921875, -0.1301116943359375, -1.2138328552246094, 15.442008972167969, 10.523977279663086, 5.768310546875, 14.867584228515625, 13.275449752807617, -0.0321197509765625, -19.450462341308594, 27.344505310058594, 22.281858444213867, 7.988166809082031, -0.5008430480957031, -6.0742340087890625, 2.4632225036621094, 1.88916015625, 22.618148803710938, 14.762886047363281, 4.48199462890625, 20.676956176757812, 23.260848999023438, 5.7933349609375, 1.5152397155761719, 13.633440017700195, 22.748809814453125, -2.0173492431640625, 12.713481903076172, 4.923004150390625, -7.481239318847656, 11.614456176757812, 7.51934814453125, 11.163345336914062, 0.48976898193359375, 2.8477325439453125, 20.38642120361328, -10.701435089111328, 8.337398529052734, 3.6883277893066406, 43.112060546875, 3.3365859985351562, 0.5161933898925781, -18.48904800415039, -7.609745025634766, 4.264551162719727, -1.743246078491211], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000231.npy"}
{"epoch": 0.7, "step": 232, "batch_size": 128, "mean": 6.731924057006836, "std": 11.349102973937988, "min": -24.904754638671875, "p10": -6.863164329528808, "median": 5.302066802978516, "p90": 21.443961524963377, "max": 33.7518424987793, "pos_frac": 0.703125, "sample": [-0.5707321166992188, 13.370048522949219, 11.342266082763672, -21.347381591796875, 15.947399139404297, 12.651165008544922, -2.3193817138671875, 25.03227996826172, 5.2728729248046875, 3.248626708984375, -1.1652240753173828, -2.4332351684570312, -2.602558135986328, 5.331260681152344, -10.831558227539062, 12.708152770996094, 23.348052978515625, 22.673372268676758, -0.08154869079589844, 19.70049285888672, 15.893211364746094, -0.1617279052734375, 6.6371307373046875, 18.673797607421875, 18.926315307617188, 17.451187133789062, 3.1114425659179688, -0.7736434936523438, -0.7163162231445312, 8.497888565063477, 13.125335693359375, 14.698104858398438, 13.753562927246094, 17.95462417602539, -1.06524658203125, -1.44830322265625, 3.726408004760742, 17.454078674316406, 18.4993896484375, -5.02239990234375, 10.780710220336914, 5.1304931640625, 7.492603302001953, -0.6971893310546875, -3.2826461791992188, -4.879112243652344, 2.088592529296875, 2.6195640563964844, 3.0880355834960938, 3.7283706665039062, 15.1058349609375, -6.6907196044921875, -11.813608169555664, -9.92401123046875, 26.3555908203125, -3.7378082275390625, 10.87672233581543, 2.0056915283203125, 16.96717071533203, 12.736564636230469, -4.863391876220703, 2.4601974487304688, 0.1949462890625, 19.02043914794922, -5.154121398925781, 4.6564178466796875, 15.343050003051758, -10.235599517822266, 24.30323600769043, -0.42404937744140625, 0.6490192413330078, 21.120197296142578, 23.22052001953125, 2.0739669799804688, 8.812179565429688, 18.293224334716797, 4.977958679199219, 26.435333251953125, -8.027172088623047, 9.833364486694336, 17.035526275634766, 31.149673461914062, -24.904754638671875, 8.587905883789062, 22.199411392211914, 6.936237335205078, -19.982284545898438, -7.567012786865234, -9.710014343261719, 13.981857299804688, 10.441654205322266, 24.61901092529297, -20.81542205810547, -3.039030075073242, 8.190521240234375, -7.265535354614258, 5.429496765136719, 11.8131103515625, 18.869319915771484, -7.8993988037109375, 16.964561462402344, 10.487892150878906, 5.00396728515625, 1.702545166015625, 2.397502899169922, 16.175689697265625, 4.942211151123047, 33.7518424987793, 6.939002990722656, 9.212718963623047, 0.9794769287109375, 0.2984619140625, 18.342838287353516, 2.4864730834960938, 7.660181045532227, 31.032577514648438, 2.0943603515625, -2.149219512939453, -3.759845733642578, -2.4639968872070312, -0.0602569580078125, 17.59342384338379, 11.173088073730469, 29.10989761352539, 2.746858596801758, 11.360246658325195, 7.23870849609375, 3.2250595092773438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000232.npy"}
{"epoch": 0.703030303030303, "step": 233, "batch_size": 128, "mean": 5.612156867980957, "std": 10.413337707519531, "min": -17.750381469726562, "p10": -7.490789604187011, "median": 5.3242950439453125, "p90": 21.173274993896484, "max": 29.509109497070312, "pos_frac": 0.71875, "sample": [-8.405906677246094, 19.268402099609375, 9.969108581542969, -5.5328216552734375, 13.797470092773438, 12.339897155761719, -6.764152526855469, 25.758594512939453, 1.4304428100585938, 21.08489227294922, 19.04754638671875, 0.455291748046875, 7.3777313232421875, -7.993598937988281, 12.782814025878906, 24.810218811035156, 6.403045654296875, 13.075836181640625, 13.840545654296875, 1.7838668823242188, 0.6658477783203125, -6.7976531982421875, -1.5206279754638672, 14.04883861541748, 13.063560485839844, 23.78168487548828, 4.287555694580078, 23.289505004882812, 1.2957382202148438, 8.90900993347168, -4.797569274902344, 2.615427017211914, 2.038543701171875, 18.62457275390625, -2.097270965576172, 21.57369613647461, 15.259069442749023, 29.509109497070312, -10.649114608764648, 15.082130432128906, 21.379501342773438, 24.711349487304688, -11.34184455871582, -11.226280212402344, 1.2675399780273438, 0.2808971405029297, 0.9782180786132812, -6.1551666259765625, -12.425033569335938, 2.927734375, 2.0606861114501953, 3.1213150024414062, -0.896240234375, -17.750381469726562, 4.702980041503906, 1.9138965606689453, 2.755474090576172, 8.269977569580078, 4.515342712402344, 7.146598815917969, 25.59711456298828, 16.4293270111084, 10.240779876708984, 6.092559814453125, -2.5364913940429688, 13.42926025390625, -14.745262145996094, -16.36993408203125, 11.841924667358398, 1.3788375854492188, 6.55804443359375, -0.5316734313964844, 8.108673095703125, 23.706695556640625, 5.8047943115234375, 19.011756896972656, 11.774375915527344, 3.7095890045166016, 15.552845001220703, -1.4079608917236328, -4.337322235107422, 6.360847473144531, 5.27642822265625, 6.29243278503418, 6.8441009521484375, 9.368766784667969, -7.955497741699219, 6.345661163330078, 28.446609497070312, 1.0866317749023438, -14.375774383544922, 7.112213134765625, 0.35147857666015625, 10.097854614257812, -1.2417449951171875, -0.7663516998291016, -4.3478851318359375, -7.76947021484375, 14.26055908203125, 8.751983642578125, 15.239555358886719, -11.475746154785156, 12.570259094238281, 1.4984893798828125, 4.00103759765625, 9.26123046875, 5.372161865234375, -7.135555267333984, 28.917007446289062, 4.11248779296875, 1.440664291381836, -3.1599578857421875, -2.90679931640625, -3.71844482421875, -3.4979686737060547, -7.371355056762695, 14.5054931640625, 0.3369560241699219, -3.0679168701171875, 12.463661193847656, -0.8565101623535156, 5.797231674194336, 8.324567794799805, 14.937934875488281, 22.825883865356445, 5.648902893066406, 5.730567932128906, 6.16766357421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000233.npy"}
{"epoch": 0.706060606060606, "step": 234, "batch_size": 128, "mean": 8.070612907409668, "std": 10.93929386138916, "min": -19.207290649414062, "p10": -5.636127471923827, "median": 6.427845001220703, "p90": 24.293233108520504, "max": 39.617645263671875, "pos_frac": 0.78125, "sample": [2.392284393310547, 13.817825317382812, 3.3608551025390625, 12.624412536621094, -0.6145248413085938, -1.2461185455322266, -1.3936767578125, -4.5736083984375, 7.615116119384766, 9.997734069824219, -6.899566650390625, 3.2705421447753906, 12.918212890625, -6.069007873535156, 0.27466773986816406, 19.779251098632812, 4.794097900390625, -0.8287200927734375, 0.5671234130859375, 3.462738037109375, 16.441131591796875, 15.645950317382812, 8.758438110351562, 11.371139526367188, 17.959827423095703, 19.887662887573242, 39.617645263671875, 7.216342926025391, 2.5027923583984375, 6.330484390258789, 1.882720947265625, 1.1490707397460938, -1.48614501953125, 5.297389984130859, 5.122428894042969, -9.24943733215332, 15.953868865966797, -2.699014663696289, 5.7117919921875, 17.786901473999023, 26.146865844726562, 7.087650299072266, 10.961795806884766, 4.311634063720703, -11.35936164855957, 15.82037353515625, -3.028768539428711, 18.863731384277344, 0.9447441101074219, -0.13970375061035156, -0.19300079345703125, 3.3716354370117188, 2.4439563751220703, -6.527984619140625, 9.622196197509766, 0.5896148681640625, -5.4506072998046875, 27.313159942626953, 9.237689971923828, 27.387710571289062, 6.3509521484375, 16.904029846191406, 13.203399658203125, 6.5064849853515625, 11.825922012329102, 1.2580642700195312, 11.175910949707031, 28.908294677734375, 7.895574569702148, -6.1500701904296875, 3.7082462310791016, 10.664447784423828, -1.4148635864257812, -11.340286254882812, -6.340179443359375, 9.04913330078125, 33.426361083984375, 4.7462310791015625, 10.072830200195312, 27.031082153320312, 0.9841461181640625, -4.758790969848633, 2.5571651458740234, -0.9637126922607422, 14.647865295410156, 18.5291748046875, -7.276458740234375, 18.433162689208984, 6.504737854003906, 5.8444976806640625, 23.941967010498047, 9.064796447753906, 14.734870910644531, 1.167327880859375, 31.86273193359375, 7.9596099853515625, 11.321952819824219, 3.6729812622070312, 19.730100631713867, 19.93671417236328, 15.089447021484375, -9.29177474975586, 1.7297897338867188, 10.312873840332031, 12.059911727905273, 7.1291656494140625, 21.282495498657227, 3.1225013732910156, -7.106922149658203, -19.207290649414062, 26.035640716552734, -10.919830322265625, 4.6224365234375, 0.0757293701171875, 20.45462417602539, 25.11285400390625, 13.196029663085938, 2.512500762939453, 1.8823471069335938, 10.145950317382812, 1.6348495483398438, 36.579002380371094, 20.677223205566406, 19.207948684692383, 26.164020538330078, 25.17205810546875, 4.545204162597656, -0.41068267822265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000234.npy"}
{"epoch": 0.7090909090909091, "step": 235, "batch_size": 128, "mean": 7.350369453430176, "std": 10.629400253295898, "min": -22.2994384765625, "p10": -5.434689331054687, "median": 5.685142517089844, "p90": 22.81206169128418, "max": 34.30665588378906, "pos_frac": 0.703125, "sample": [-0.8233184814453125, 15.474414825439453, 8.9862060546875, 10.283855438232422, 5.52032470703125, -1.400014877319336, -5.36199951171875, 16.473201751708984, 4.851249694824219, 2.017589569091797, 10.756206512451172, 24.893272399902344, 22.393043518066406, -1.728973388671875, 6.148529052734375, -4.392578125, 21.7152099609375, 23.01128387451172, 16.97357749938965, 6.258415222167969, -0.1474609375, -3.3241424560546875, 21.910123825073242, 24.453344345092773, 34.30665588378906, 6.7044830322265625, -8.584037780761719, -3.4136009216308594, 23.881195068359375, 2.0732059478759766, 19.28936767578125, -6.882587432861328, 5.567775726318359, -5.746864318847656, 22.726680755615234, 15.99822998046875, 9.562332153320312, -9.963943481445312, 7.176605224609375, 29.00522232055664, 23.717132568359375, -8.341533660888672, 7.2443389892578125, 2.7655601501464844, 3.3344802856445312, 7.731527328491211, -2.219593048095703, -0.3348884582519531, 15.287429809570312, 5.77105712890625, 3.3792495727539062, 7.59625244140625, -5.982612609863281, 2.7048873901367188, -6.958648681640625, -0.5713005065917969, 10.784927368164062, 3.8360366821289062, 9.221145629882812, -9.006111145019531, 14.379230499267578, 1.4332962036132812, -5.38604736328125, 16.62647247314453, 25.522918701171875, 3.016143798828125, 33.68171691894531, -22.2994384765625, 7.122459411621094, 12.724990844726562, 12.653411865234375, 14.248340606689453, 17.83993148803711, -4.796154022216797, -3.2882080078125, 14.2452392578125, -8.318134307861328, 14.22283935546875, 5.5992279052734375, -3.5919647216796875, 14.560718536376953, 7.0732574462890625, -7.348896026611328, 5.469623565673828, 9.79098129272461, -0.22354507446289062, -0.5354080200195312, -4.434484481811523, 13.166122436523438, 23.513015747070312, -5.094646453857422, 20.283042907714844, -2.799365997314453, 2.2808914184570312, 1.7709617614746094, 15.237323760986328, 20.553354263305664, 13.751846313476562, 14.915916442871094, -0.6607513427734375, 21.096332550048828, 11.968097686767578, 18.71991729736328, 2.93310546875, 18.671051025390625, 2.6233444213867188, 24.404205322265625, 3.324737548828125, 1.1681537628173828, -1.0060844421386719, 8.914949417114258, -3.1607513427734375, 25.0653076171875, 15.787384033203125, 23.032466888427734, 1.4272842407226562, -6.641899108886719, -3.1670608520507812, 4.41899299621582, 12.792984008789062, 0.6494998931884766, 11.928932189941406, 1.5913162231445312, 3.842866897583008, 18.36620330810547, -0.6909122467041016, -5.548187255859375, 4.82745361328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000235.npy"}
{"epoch": 0.7121212121212122, "step": 236, "batch_size": 128, "mean": 8.080364227294922, "std": 12.586831092834473, "min": -28.894454956054688, "p10": -5.948332214355468, "median": 6.612844467163086, "p90": 23.574389839172362, "max": 40.436370849609375, "pos_frac": 0.7265625, "sample": [-0.18407821655273438, 17.38394546508789, 16.1069393157959, 9.426082611083984, 1.77325439453125, -0.2985382080078125, 11.8363037109375, 10.025550842285156, 8.366085052490234, 23.998497009277344, 19.27461814880371, -6.29400634765625, 3.4985485076904297, 15.108085632324219, 19.528038024902344, 12.887947082519531, -0.33231353759765625, 2.49365234375, 6.699710845947266, 7.0118408203125, 25.070219039916992, -1.719635009765625, 9.935844421386719, 22.893516540527344, -6.388622283935547, 14.881553649902344, -9.771453857421875, 24.28240966796875, 9.013904571533203, 2.2147064208984375, 13.667137145996094, -1.7872333526611328, 20.909408569335938, 23.063934326171875, 10.189998626708984, 14.5655517578125, 26.481430053710938, 5.6368255615234375, -15.189201354980469, -0.08107185363769531, 23.291969299316406, -28.894454956054688, 12.456428527832031, -6.868621826171875, 26.13524055480957, -0.3939323425292969, 18.75030517578125, 4.695320129394531, 4.249076843261719, 1.915863037109375, 22.93276023864746, 16.806381225585938, 0.2656364440917969, -2.1793975830078125, 10.748001098632812, 6.525978088378906, 10.756439208984375, 15.054981231689453, -3.558990478515625, 30.428939819335938, 1.8069839477539062, 8.885826110839844, 3.79296875, 27.233367919921875, 4.776527404785156, 3.6738739013671875, 6.219318389892578, 3.983642578125, 22.058372497558594, 15.735748291015625, -0.6214599609375, 22.11865234375, -0.8737640380859375, -1.3696365356445312, 34.33036804199219, 9.09307861328125, -9.333730697631836, -0.7056503295898438, -1.6273078918457031, 30.59241485595703, 4.887691497802734, -21.749359130859375, -7.989936828613281, 6.448272705078125, 6.3207855224609375, 40.436370849609375, 0.1363372802734375, 0.22595596313476562, -13.042343139648438, 23.38568878173828, 2.2615718841552734, 7.246118545532227, 2.531158447265625, -0.5329971313476562, 3.2765655517578125, 6.243339538574219, 22.8001708984375, 22.361312866210938, -0.760009765625, 35.53582763671875, 11.172538757324219, -2.1524200439453125, -2.825164794921875, 23.392629623413086, -5.8001861572265625, 11.452674865722656, 4.858184814453125, 27.441516876220703, 28.199310302734375, -5.512754440307617, 16.2559814453125, 1.7762985229492188, 17.963943481445312, 20.175247192382812, -23.639404296875, 20.03797149658203, -25.16485595703125, -10.805255889892578, 15.280706405639648, 0.22515869140625, 3.5323028564453125, 9.777557373046875, 16.032764434814453, -0.5572853088378906, 12.868501663208008, 7.714693069458008, -4.434751510620117, 17.931364059448242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000236.npy"}
{"epoch": 0.7151515151515152, "step": 237, "batch_size": 128, "mean": 7.324731826782227, "std": 12.33658218383789, "min": -22.01519775390625, "p10": -6.810734176635742, "median": 4.762012481689453, "p90": 24.689810943603515, "max": 35.54789733886719, "pos_frac": 0.703125, "sample": [31.67412567138672, 25.059341430664062, 3.300588607788086, -12.831357955932617, -7.699680328369141, 26.609786987304688, 6.081573486328125, 8.073875427246094, 31.882186889648438, 27.652359008789062, 4.848876953125, 19.937942504882812, -11.196430206298828, -1.6368637084960938, 12.436492919921875, -18.120410919189453, -5.482210159301758, -0.9725418090820312, -8.4315185546875, 16.14059829711914, 29.348358154296875, 35.54789733886719, 1.9790630340576172, 4.623937606811523, 15.169265747070312, 18.30271339416504, 2.8970413208007812, 4.567604064941406, 18.79769515991211, 10.546539306640625, -1.5684185028076172, 16.903745651245117, -0.2402496337890625, 21.90192413330078, 18.541641235351562, 2.814380645751953, -1.1691417694091797, 9.616912841796875, 6.623271942138672, -13.560771942138672, -1.9470977783203125, 14.7193603515625, 15.031047821044922, 14.79888916015625, -0.8534641265869141, 4.675148010253906, 0.5398101806640625, -0.8842315673828125, 26.304962158203125, 15.60335922241211, 21.9696044921875, 19.515552520751953, 3.75543212890625, 5.859233856201172, 1.8495292663574219, 5.0184783935546875, 32.28894805908203, -6.712181091308594, 6.0877838134765625, -6.164379119873047, 11.626594543457031, 19.130695343017578, 8.808757781982422, -10.732189178466797, 23.608062744140625, 3.2538986206054688, 2.274463653564453, -8.28162956237793, -5.126033782958984, -1.7491836547851562, 27.605224609375, 26.774333953857422, -5.298377990722656, 2.3720779418945312, -4.092184066772461, 17.105819702148438, -1.0858592987060547, -8.737873077392578, 23.60238265991211, 2.020946502685547, -7.040691375732422, 9.347137451171875, -5.224937438964844, 17.561744689941406, 0.763763427734375, 25.25255584716797, 19.413047790527344, -4.232452392578125, 9.392921447753906, 1.6343822479248047, 20.98575782775879, 23.342926025390625, 6.9085235595703125, 8.857719421386719, 0.7181320190429688, -22.01519775390625, 11.189001083374023, -6.333850860595703, 3.9109573364257812, 6.977470397949219, -18.6053466796875, 3.1527175903320312, 1.7742843627929688, 26.129318237304688, 16.84954071044922, 0.56402587890625, -0.41929054260253906, -4.004425048828125, -0.5155296325683594, 6.936759948730469, 20.506553649902344, 2.6235275268554688, 2.0623817443847656, 7.156646728515625, 18.832366943359375, 2.5369796752929688, -2.304473876953125, 24.53144073486328, 18.258026123046875, 7.740821838378906, 19.05316162109375, -3.6023330688476562, -2.268993377685547, 21.778335571289062, 18.73224639892578, -20.307998657226562, 2.1919631958007812, 3.2681922912597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000237.npy"}
{"epoch": 0.7181818181818181, "step": 238, "batch_size": 128, "mean": 7.9082722663879395, "std": 11.036674499511719, "min": -18.566822052001953, "p10": -6.191732215881347, "median": 7.235034942626953, "p90": 23.311081886291504, "max": 34.46514892578125, "pos_frac": 0.765625, "sample": [3.238363265991211, 2.9486846923828125, 10.610595703125, 10.517402648925781, 15.0294189453125, 8.760295867919922, 20.719863891601562, 24.80754852294922, 13.564437866210938, 9.891677856445312, 3.102621078491211, 16.565460205078125, 3.6508331298828125, -1.8116455078125, 9.685728073120117, -0.615966796875, 20.364227294921875, 31.860275268554688, 7.009189605712891, -11.653778076171875, 10.302297592163086, -3.685131072998047, 2.983673095703125, 2.7467041015625, 28.43682861328125, 1.4746246337890625, 23.6549015045166, 33.773956298828125, 20.546417236328125, 15.335800170898438, 12.699600219726562, -5.878265380859375, 3.738780975341797, -7.4154815673828125, 21.251487731933594, 1.7749176025390625, 8.751190185546875, 27.822418212890625, 5.7113494873046875, 8.397415161132812, 10.636457443237305, -0.7518768310546875, 1.6374053955078125, 28.124603271484375, 15.503036499023438, -2.2184181213378906, 33.89893341064453, 23.16373062133789, 15.015167236328125, 19.37579345703125, 8.027568817138672, 2.6163177490234375, 13.31279182434082, -2.2450599670410156, -6.923154830932617, 7.169685363769531, -0.487701416015625, 12.442535400390625, 7.894989013671875, 18.38543701171875, 7.451328277587891, -2.237823486328125, -3.0025634765625, 3.799631118774414, 25.172775268554688, 8.62057113647461, -3.8057212829589844, 2.951061248779297, -5.015533447265625, 0.1953887939453125, -7.408897399902344, 25.780569076538086, 6.634796142578125, 9.768623352050781, 10.505729675292969, 4.1823577880859375, -9.350584030151367, 4.73126220703125, 18.97831153869629, 18.288692474365234, 12.499931335449219, -9.008703231811523, 11.068347930908203, -0.489959716796875, -9.95068359375, -10.29931640625, 11.651565551757812, 3.3974533081054688, -3.75518798828125, 17.938156127929688, 7.300384521484375, -16.7996826171875, 14.711891174316406, -11.821563720703125, 12.321945190429688, 34.46514892578125, 24.5684814453125, 0.7722396850585938, 20.716217041015625, -0.8099594116210938, 24.35669708251953, 16.618101119995117, 4.407176971435547, 4.922821044921875, 5.5592803955078125, -18.566822052001953, -1.0300750732421875, 3.4323196411132812, 0.40583038330078125, 2.4223690032958984, 22.711719512939453, -7.7270660400390625, -3.4333648681640625, 14.158645629882812, 0.9842987060546875, 2.1215667724609375, 13.721687316894531, 2.540802001953125, 8.565658569335938, 11.539962768554688, 2.9112281799316406, 7.121910095214844, 20.82694435119629, 10.461395263671875, -7.793756484985352, 17.192413330078125, 13.551033020019531, 0.31241607666015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000238.npy"}
{"epoch": 0.7212121212121212, "step": 239, "batch_size": 128, "mean": 5.910390853881836, "std": 12.035028457641602, "min": -24.82275390625, "p10": -8.14095230102539, "median": 5.469066619873047, "p90": 21.498569107055662, "max": 31.766525268554688, "pos_frac": 0.65625, "sample": [-6.145042419433594, -2.286712646484375, 17.1795654296875, 5.104278564453125, -1.328155517578125, 10.306831359863281, 4.243721008300781, 1.8172569274902344, -0.2259674072265625, 17.076194763183594, 6.2793426513671875, -4.756448745727539, 13.847564697265625, 23.382556915283203, 2.1368179321289062, 4.944530487060547, 4.57017707824707, -2.309253692626953, -24.14480972290039, -7.492164611816406, -4.4449615478515625, 11.163333892822266, 10.899604797363281, 5.309379577636719, 17.931922912597656, -1.4877090454101562, 16.070068359375, 7.025297164916992, 13.738197326660156, 27.020919799804688, 12.601364135742188, 17.983123779296875, -2.82110595703125, -1.3887214660644531, 16.160354614257812, 11.771818161010742, -16.44751739501953, 1.0789928436279297, -0.5992774963378906, 1.167959213256836, 11.437427520751953, -5.537269592285156, -17.649139404296875, 8.637809753417969, 21.27790069580078, 4.953941345214844, -9.925872802734375, -11.299667358398438, 22.967498779296875, -0.713531494140625, 14.24740219116211, 22.01346206665039, 2.4265708923339844, 25.885276794433594, -13.411918640136719, 11.90911865234375, -24.82275390625, 2.553009033203125, 15.970161437988281, 30.4947509765625, 5.771209716796875, 16.800315856933594, -2.057403564453125, 14.761394500732422, -3.7418251037597656, 29.28486442565918, 5.120880126953125, 18.643756866455078, 13.93032455444336, -8.336967468261719, 24.21533203125, 9.590744018554688, -3.1086883544921875, 2.7631072998046875, -1.4414024353027344, 14.11083984375, -6.904363632202148, -3.4048099517822266, 20.400177001953125, 13.775947570800781, -22.645309448242188, -17.902618408203125, 6.446372985839844, 19.368667602539062, 23.30853271484375, -3.947235107421875, 10.164566040039062, 8.027046203613281, 10.273208618164062, -2.074970245361328, 12.493358612060547, 16.55933380126953, 0.4023895263671875, 10.116432189941406, 6.400718688964844, 31.766525268554688, -3.038738250732422, 13.005035400390625, -3.448556900024414, 4.501716613769531, 5.628753662109375, 27.32086181640625, -2.9764404296875, 6.297328948974609, 17.03600311279297, -8.05694580078125, 7.834342956542969, 12.873008728027344, 15.851051330566406, -17.499774932861328, 2.0527267456054688, -0.6705646514892578, 31.325347900390625, -3.627269744873047, -13.04605484008789, 5.242170333862305, 20.52405548095703, -1.5986061096191406, 2.0789642333984375, 27.317466735839844, 19.40727996826172, -2.0434036254882812, 0.9447078704833984, 6.9798431396484375, -8.44350814819336, -4.775188446044922, 6.934577941894531, 7.321876525878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000239.npy"}
{"epoch": 0.7242424242424242, "step": 240, "batch_size": 128, "mean": 6.218180179595947, "std": 11.670511245727539, "min": -30.899314880371094, "p10": -7.026245880126953, "median": 5.180566787719727, "p90": 20.65512008666992, "max": 30.82098388671875, "pos_frac": 0.734375, "sample": [6.677467346191406, 23.217140197753906, -2.890533447265625, -11.466766357421875, -3.9201278686523438, 9.060285568237305, 29.233903884887695, 16.743083953857422, 22.842666625976562, 16.617786407470703, 0.6822624206542969, 1.2893924713134766, -9.540668487548828, 4.5357513427734375, 9.633514404296875, 22.769832611083984, -3.579357147216797, 18.803184509277344, 3.865142822265625, 2.048501968383789, -12.367435455322266, -2.5836563110351562, 1.0818023681640625, 19.069103240966797, -0.8807029724121094, 4.247406005859375, 6.040008544921875, 12.087974548339844, -3.0451297760009766, 5.9752197265625, -4.261869430541992, -0.32257843017578125, 20.83527374267578, -5.7606964111328125, 3.970335006713867, -14.108711242675781, 0.8627395629882812, -4.762041091918945, 3.6106033325195312, 11.742660522460938, -5.261295318603516, -12.873085021972656, 7.631275177001953, 7.661102294921875, -9.138473510742188, 0.6852951049804688, 9.369773864746094, 7.361598014831543, 18.986976623535156, -30.899314880371094, 5.0621795654296875, 1.4645576477050781, 19.468725204467773, 28.12306785583496, 14.205314636230469, -17.6741943359375, 15.140205383300781, 8.757766723632812, 8.618003845214844, 3.7046260833740234, -0.5643100738525391, 0.6988754272460938, 15.189231872558594, 14.552602767944336, 10.845479965209961, -2.9808349609375, 14.38766098022461, -16.69959259033203, 5.298954010009766, 1.0548076629638672, 8.855792999267578, -23.747032165527344, 0.0924835205078125, 12.059555053710938, -8.706581115722656, 4.229434967041016, -6.887321472167969, 25.15355682373047, 9.939472198486328, -5.3532257080078125, -0.0529937744140625, 9.441341400146484, 4.939422607421875, 10.60771369934082, 17.083999633789062, 18.390762329101562, -24.605003356933594, 10.154296875, 8.164382934570312, 5.723913192749023, 3.845735549926758, -1.0945587158203125, 23.825950622558594, 20.577911376953125, 13.520687103271484, 8.669069290161133, 4.027767181396484, 30.00445556640625, -2.147947311401367, 6.309211730957031, 19.561843872070312, 2.837554931640625, 18.434188842773438, 0.0137176513671875, 17.47705841064453, 18.97429847717285, 29.45184326171875, 10.132072448730469, 30.82098388671875, 1.136322021484375, 12.941940307617188, 0.11322975158691406, 22.061351776123047, 20.11261749267578, 4.169464111328125, 9.930644989013672, -7.35040283203125, 19.119787216186523, 2.5490951538085938, 0.6059112548828125, 23.916536331176758, 19.75349235534668, 3.2059326171875, -3.7072105407714844, -3.32440185546875, 17.716400146484375, 4.172889709472656, -2.1520767211914062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000240.npy"}
{"epoch": 0.7272727272727273, "step": 241, "batch_size": 128, "mean": 7.850641250610352, "std": 12.313504219055176, "min": -21.268041610717773, "p10": -7.234942817687989, "median": 7.381656646728516, "p90": 22.59841823577881, "max": 38.48553466796875, "pos_frac": 0.7421875, "sample": [12.849529266357422, -0.22211456298828125, 29.4644775390625, 1.29608154296875, -7.587934494018555, 13.862506866455078, 20.88941192626953, 5.83856201171875, 9.603822708129883, 5.439872741699219, 13.260566711425781, 7.469879150390625, -21.268041610717773, 1.5522232055664062, 19.022605895996094, 2.1283226013183594, 4.530052185058594, 1.32952880859375, 12.25125503540039, 11.074478149414062, 10.375152587890625, 7.627107620239258, 1.4111347198486328, 4.356529235839844, 27.784584045410156, 22.585329055786133, 18.8963623046875, 5.563972473144531, 5.44256591796875, -4.834075927734375, -7.215024948120117, 34.068450927734375, -2.4735488891601562, -8.784931182861328, 21.917984008789062, 9.741172790527344, 5.852745056152344, 16.937213897705078, -0.6522216796875, 20.82793426513672, 0.7103767395019531, -3.649324417114258, -4.320556640625, 21.51063346862793, 2.585176467895508, 11.07623291015625, 1.3800468444824219, 16.1956844329834, 7.16302490234375, 14.299087524414062, 14.332538604736328, 32.99492645263672, 1.384866714477539, 30.828338623046875, -1.3599491119384766, 4.011177062988281, -19.40028190612793, 8.636322021484375, 15.348718643188477, 34.22174072265625, 9.916561126708984, 8.578699111938477, -7.2814178466796875, 5.722587585449219, 1.9328765869140625, -20.973251342773438, -3.8683090209960938, 8.940311431884766, -2.7710800170898438, -10.096023559570312, 15.097442626953125, -14.590255737304688, 20.438507080078125, 6.275157928466797, -17.582908630371094, 9.6837158203125, 24.084447860717773, -0.84051513671875, 38.48553466796875, -3.4360809326171875, -5.474208831787109, 9.949783325195312, 28.41045379638672, 22.00403594970703, 15.40899658203125, 4.141754150390625, 7.293434143066406, -7.8819732666015625, 35.36822509765625, -13.7489013671875, 11.394874572753906, 7.8856048583984375, 4.762992858886719, 1.9312286376953125, 15.092351913452148, -0.4779930114746094, 1.2639503479003906, 33.474212646484375, 10.573343276977539, 21.31304931640625, 21.847354888916016, 9.018409729003906, 12.279373168945312, 18.86621856689453, 0.36751747131347656, 10.30862808227539, 22.62895965576172, 12.908819198608398, -3.9086380004882812, 8.735380172729492, -1.8496246337890625, 11.212736129760742, 30.259368896484375, 10.756698608398438, 7.087944030761719, 3.6694412231445312, 4.279003143310547, 21.041841506958008, 9.209953308105469, 3.8491268157958984, -9.0555419921875, 18.531906127929688, -2.9467506408691406, -12.955581665039062, 16.34698486328125, -1.1052932739257812, -4.332916259765625, -2.7328414916992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000241.npy"}
{"epoch": 0.7303030303030303, "step": 242, "batch_size": 128, "mean": 7.5787811279296875, "std": 11.006756782531738, "min": -27.12458038330078, "p10": -5.705210113525391, "median": 5.885780334472656, "p90": 23.547305297851562, "max": 39.72138214111328, "pos_frac": 0.7890625, "sample": [5.9270477294921875, 20.808326721191406, 8.864850997924805, 7.184816360473633, 16.52298355102539, 22.938674926757812, 20.259077072143555, 5.339508056640625, 7.818035125732422, -11.916618347167969, 1.6065406799316406, 19.745811462402344, 15.01153564453125, 24.62877655029297, 25.568222045898438, 0.29238128662109375, 24.333213806152344, 1.3675155639648438, 6.73419189453125, 16.98858642578125, 5.70281982421875, -27.12458038330078, 5.218963623046875, -5.721771240234375, 3.560821533203125, 8.957990646362305, 7.867645263671875, 6.8826446533203125, 5.999153137207031, 31.34101104736328, -6.4608001708984375, -5.636159896850586, 2.6712493896484375, -3.2834548950195312, -6.198543548583984, 11.267669677734375, 19.141815185546875, 32.9755859375, 3.5807037353515625, 15.561212539672852, 23.67669677734375, 10.361942291259766, 18.90752410888672, 0.2311553955078125, 0.2135467529296875, 11.819976806640625, 8.721389770507812, -19.019824981689453, -1.6252288818359375, 3.724000930786133, 12.469947814941406, 4.8480987548828125, -5.698112487792969, 10.42313003540039, 39.72138214111328, 26.400588989257812, 2.435850143432617, 5.317338943481445, 21.37384033203125, 9.982170104980469, -4.367958068847656, -1.6086254119873047, -2.3691635131835938, 18.610118865966797, 7.5186614990234375, 4.91168212890625, -6.770271301269531, 4.0963134765625, 27.892662048339844, 0.9961128234863281, 8.64190673828125, -5.155788421630859, -5.309059143066406, 0.9329376220703125, 5.818550109863281, 16.23638916015625, -5.392240524291992, 10.596755981445312, 29.294227600097656, 8.443254470825195, -0.3195037841796875, -9.426597595214844, 23.491851806640625, 0.4573783874511719, 3.732666015625, 5.170162200927734, 3.431610107421875, 1.675079345703125, 16.584144592285156, 20.084808349609375, 27.56462860107422, 6.542604446411133, 17.747482299804688, -0.8083343505859375, -6.905891418457031, -10.064682006835938, 20.250686645507812, 4.1241912841796875, 5.8334503173828125, -7.473724365234375, 3.9183521270751953, -6.3765411376953125, 5.844512939453125, 7.312538146972656, 5.200946807861328, 13.149200439453125, 23.977081298828125, 0.62744140625, 14.66878890991211, 1.4516067504882812, 7.527162551879883, 7.135414123535156, 10.212797164916992, 3.1035003662109375, 25.702056884765625, -7.166015625, 20.136322021484375, 0.4715595245361328, 13.883707046508789, -1.9464645385742188, 10.677757263183594, 5.23016357421875, 3.7852554321289062, 8.93634033203125, 14.145439147949219, 9.904853820800781, 5.47130012512207, -2.222381591796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000242.npy"}
{"epoch": 0.7333333333333333, "step": 243, "batch_size": 128, "mean": 7.028728485107422, "std": 11.38825798034668, "min": -19.454795837402344, "p10": -5.499757385253906, "median": 4.606166839599609, "p90": 22.897247314453125, "max": 39.84898376464844, "pos_frac": 0.75, "sample": [3.5709800720214844, 0.2939186096191406, 10.598838806152344, 11.768707275390625, 12.463478088378906, -1.3128547668457031, 4.20311164855957, 7.491373062133789, 2.8902339935302734, 22.73239517211914, 21.998458862304688, 2.0429458618164062, 3.0969409942626953, 3.55816650390625, -7.71734619140625, 24.84710693359375, 21.096664428710938, 0.03645133972167969, 3.669189453125, -10.067184448242188, 33.53511047363281, 26.13271713256836, -5.4171905517578125, 21.490066528320312, 10.138065338134766, 1.1702423095703125, 2.5613975524902344, 1.1914596557617188, 10.120025634765625, 29.69141387939453, 11.52020263671875, 17.518234252929688, 16.02222442626953, -5.212074279785156, -18.222068786621094, 12.050233840942383, 31.249670028686523, 24.63402557373047, 3.5139999389648438, 20.232025146484375, -0.05273628234863281, 0.2553367614746094, -19.454795837402344, 6.337890625, -2.9770355224609375, 8.857221603393555, -5.692413330078125, 5.0012359619140625, 3.707042694091797, -3.1465301513671875, 6.498329162597656, 3.096515655517578, 1.6686630249023438, -2.7267990112304688, -2.9452362060546875, 15.205467224121094, 8.788003921508789, 23.715070724487305, -3.6054000854492188, -3.1132583618164062, 8.887222290039062, 7.7259368896484375, 3.5596389770507812, 7.5463714599609375, 2.6189422607421875, 2.149494171142578, 16.04705810546875, 2.015451431274414, 5.539710998535156, -4.447166442871094, 0.0854949951171875, 32.565277099609375, -3.176788330078125, 25.835487365722656, -2.4353103637695312, 17.921676635742188, 10.122213363647461, 15.875473022460938, -5.3526153564453125, 3.6294021606445312, 10.148597717285156, -10.134378433227539, 2.816417694091797, 14.014480590820312, 0.3719291687011719, 12.316459655761719, -10.488845825195312, 6.19329833984375, 20.49945068359375, 22.5008544921875, 9.910675048828125, -0.7465152740478516, 7.84088134765625, 8.997406005859375, -0.7179450988769531, 4.149261474609375, 4.585685729980469, -6.503028869628906, -6.312433242797852, 14.385208129882812, 15.181880950927734, 5.827913284301758, 12.326290130615234, 0.4358406066894531, 31.815807342529297, -10.673995971679688, 13.533065795898438, -4.901086807250977, 13.48033332824707, -13.336219787597656, 18.82684326171875, 19.677757263183594, 1.560842514038086, 23.281902313232422, 2.9925003051757812, 24.330398559570312, 20.91347885131836, 1.2116165161132812, 4.62664794921875, 9.631658554077148, 0.4949226379394531, 10.652645111083984, 39.84898376464844, -5.997259140014648, 14.442237854003906, -17.358440399169922, -2.0064125061035156, -2.2812271118164062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000243.npy"}
{"epoch": 0.7363636363636363, "step": 244, "batch_size": 128, "mean": 6.548891067504883, "std": 11.77430248260498, "min": -19.356672286987305, "p10": -7.496511077880859, "median": 5.237907409667969, "p90": 22.726638412475584, "max": 39.916046142578125, "pos_frac": 0.703125, "sample": [4.45526123046875, -12.202720642089844, 2.2234954833984375, 15.080757141113281, 18.047698974609375, -2.2847728729248047, -4.275299072265625, 10.412055969238281, 7.888893127441406, 0.16178131103515625, 27.55645751953125, 11.673137664794922, -8.441810607910156, -4.551239013671875, 2.853513717651367, 23.61013412475586, 9.29095458984375, -3.609437942504883, 26.638092041015625, 3.7241897583007812, 3.624418258666992, 3.5509490966796875, 10.64276123046875, 11.80965805053711, -0.1134033203125, -19.356672286987305, -0.7549495697021484, 18.12407875061035, 3.4411582946777344, -2.6980514526367188, 8.165750503540039, 16.550262451171875, 17.78314971923828, 15.228652954101562, 7.480682373046875, -7.928314208984375, 1.2114715576171875, -12.599571228027344, -12.520896911621094, 27.55754852294922, 10.462915420532227, -0.6062221527099609, -6.2430419921875, -14.703155517578125, 10.144065856933594, 1.383331298828125, 21.84856414794922, 11.197883605957031, 8.408935546875, -3.0428543090820312, -7.341217041015625, 16.020355224609375, -6.334224700927734, 13.921916961669922, 8.069084167480469, 0.05298614501953125, 5.271488189697266, 7.197711944580078, 1.820098876953125, 13.771728515625, 22.50481414794922, 6.114830017089844, 39.916046142578125, 4.803760528564453, 0.30939483642578125, -4.954803466796875, -2.618967056274414, -9.271434783935547, 17.794212341308594, -4.355018615722656, -11.052967071533203, 17.282909393310547, 2.2787017822265625, 13.106674194335938, 1.7920188903808594, -7.370903015136719, 0.18950653076171875, 7.2687530517578125, 5.251655578613281, 1.1302337646484375, -2.9387645721435547, 29.25652313232422, 13.880767822265625, -2.0232410430908203, 19.179306030273438, -1.7458629608154297, 26.313552856445312, 27.573593139648438, -18.749404907226562, 19.61292266845703, 0.71649169921875, 7.634777069091797, -16.17918586730957, 5.734161376953125, 5.598457336425781, 23.24422836303711, 23.907638549804688, 15.339424133300781, 6.670112609863281, 5.0867767333984375, 25.554412841796875, 5.2464447021484375, -0.32028961181640625, 38.897552490234375, -3.47381591796875, 6.687187194824219, -9.152061462402344, 18.235321044921875, -7.7895965576171875, 22.428619384765625, 3.4257278442382812, -4.43267822265625, 19.515640258789062, 15.919754028320312, 19.384235382080078, -6.681083679199219, 24.831863403320312, -2.559661865234375, 1.5429611206054688, 3.5929107666015625, 5.2293701171875, 17.778873443603516, 14.081592559814453, 6.787654876708984, 3.7934494018554688, -0.8451461791992188, 1.6099853515625, 21.98700714111328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000244.npy"}
{"epoch": 0.7393939393939394, "step": 245, "batch_size": 128, "mean": 5.002694129943848, "std": 12.517181396484375, "min": -22.666404724121094, "p10": -10.797199249267578, "median": 4.596444129943848, "p90": 21.16248016357422, "max": 34.11610412597656, "pos_frac": 0.640625, "sample": [-14.425432205200195, 7.008701324462891, 6.699317932128906, 16.50147247314453, 15.726142883300781, 18.81049919128418, -3.1463623046875, 8.071830749511719, -5.5060882568359375, -0.5955924987792969, 17.588157653808594, 4.515468597412109, 24.40308380126953, 7.0030670166015625, 10.120126724243164, -0.33956146240234375, 25.197044372558594, 5.380472183227539, -5.201416015625, -6.076656341552734, -8.843429565429688, 0.7157135009765625, 15.109573364257812, 5.989143371582031, 8.113048553466797, 4.308868408203125, -10.78741455078125, -0.86773681640625, 3.276744842529297, 0.99713134765625, -4.518999099731445, -19.739990234375, 6.246370315551758, 26.044540405273438, 11.411033630371094, -3.9351043701171875, 5.67327880859375, -16.164443969726562, -1.7049636840820312, -5.19024658203125, 9.989501953125, 18.76183319091797, -4.249870300292969, -4.6311187744140625, 5.0084381103515625, 1.3543815612792969, -11.146720886230469, -10.690254211425781, 9.073186874389648, 5.1090545654296875, 1.8095474243164062, 10.580108642578125, 7.290596008300781, -16.080062866210938, 16.729095458984375, 1.1900558471679688, 13.942031860351562, -6.066383361816406, -8.82345199584961, -0.049774169921875, -9.23277473449707, 22.296001434326172, -4.244508743286133, 6.269927978515625, 3.2974815368652344, -18.643356323242188, -22.666404724121094, 16.267059326171875, -3.5937957763671875, 9.140703201293945, -0.1375274658203125, 13.558746337890625, 30.210922241210938, 16.448760986328125, 19.0760498046875, 2.0990447998046875, -0.7983856201171875, 15.934158325195312, 11.904825210571289, 21.06824493408203, 4.874958038330078, 16.481746673583984, -1.5338859558105469, -5.7249603271484375, -4.003826141357422, 28.87918472290039, -9.982751846313477, -4.832302093505859, -13.222225189208984, -3.5700759887695312, 19.3011474609375, 8.121084213256836, 2.999164581298828, 8.576675415039062, 34.11610412597656, 1.1407432556152344, 32.52861022949219, 27.88433837890625, -16.96446990966797, 28.110763549804688, -21.256385803222656, 4.677419662475586, 20.06063461303711, 13.218299865722656, -10.820030212402344, -4.3414459228515625, 28.276002883911133, 0.22005271911621094, 3.1175765991210938, 0.7417984008789062, -17.452407836914062, 8.572174072265625, 21.382362365722656, 3.4468307495117188, 7.962375640869141, 19.64972496032715, 0.8226127624511719, 17.181489944458008, 0.8918304443359375, -3.750926971435547, 20.069665908813477, 12.837898254394531, 11.844158172607422, 22.424026489257812, -17.641590118408203, -0.743896484375, 16.80987548828125, 13.74190902709961], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000245.npy"}
{"epoch": 0.7424242424242424, "step": 246, "batch_size": 128, "mean": 6.162531852722168, "std": 11.08243465423584, "min": -21.53624725341797, "p10": -6.719084930419922, "median": 5.562625885009766, "p90": 20.78843479156494, "max": 38.95283508300781, "pos_frac": 0.7265625, "sample": [26.04352378845215, -5.740928649902344, 11.579578399658203, 11.069656372070312, 9.132095336914062, 12.973140716552734, 1.0561084747314453, 12.672103881835938, 0.1258544921875, 7.230712890625, 7.627666473388672, 8.52691650390625, 2.5958099365234375, -2.755218505859375, 17.562545776367188, 1.296966552734375, 0.0547943115234375, 11.836685180664062, 17.51318359375, 4.963794708251953, -3.2167816162109375, 17.743240356445312, 4.956413269042969, -6.71575927734375, 20.378082275390625, 10.359603881835938, 11.44775390625, 8.69443130493164, 24.407562255859375, 23.79793930053711, 12.34457778930664, 1.2794036865234375, 10.946014404296875, -1.4097633361816406, 9.604385375976562, -10.211669921875, 24.249530792236328, -0.4203643798828125, 16.500022888183594, -5.1697235107421875, 1.8229694366455078, 17.471046447753906, 2.428863525390625, -2.13238525390625, -4.706897735595703, 6.662479400634766, 20.57897186279297, 11.866958618164062, 23.156604766845703, 0.786529541015625, 3.0576515197753906, 0.6135578155517578, 3.8094482421875, 13.796836853027344, 2.22216796875, -3.7611541748046875, 3.4565162658691406, 6.105625152587891, 9.89329719543457, -19.613914489746094, -8.777807235717773, 3.0406227111816406, -5.2528533935546875, 26.49169921875, 31.581069946289062, 20.114959716796875, -5.021665573120117, 3.075807571411133, 19.679855346679688, -9.898113250732422, 23.74609375, 8.287994384765625, -3.395477294921875, 6.1282196044921875, 3.059385299682617, 9.05316162109375, 7.386920928955078, -9.919143676757812, 1.1897811889648438, -2.5149383544921875, 1.55242919921875, -0.4915275573730469, -7.210691452026367, 0.0028076171875, -8.009124755859375, 27.71630859375, -8.507476806640625, -1.9562301635742188, 5.914642333984375, 15.323772430419922, -21.53624725341797, -18.657550811767578, -0.6313629150390625, 19.85657501220703, 19.870365142822266, 5.210609436035156, 7.868988037109375, -2.112377166748047, 7.6952667236328125, 11.256145477294922, 9.198585510253906, -4.0298919677734375, 9.105979919433594, 12.71563720703125, 7.250621795654297, 10.329151153564453, 7.145870208740234, 38.95283508300781, -4.4774627685546875, -21.515872955322266, 21.27718162536621, 11.90643310546875, 19.834877014160156, 5.147333145141602, 2.5315017700195312, 1.6778087615966797, -2.053070068359375, 8.767234802246094, 13.931638717651367, 2.5015430450439453, 27.261436462402344, 1.347076416015625, 11.611557006835938, -8.904464721679688, 1.7405834197998047, -6.4493408203125, -6.726844787597656, 25.0701904296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000246.npy"}
{"epoch": 0.7454545454545455, "step": 247, "batch_size": 128, "mean": 6.669732093811035, "std": 12.224453926086426, "min": -27.95508575439453, "p10": -6.434207153320312, "median": 5.65289306640625, "p90": 21.480728912353513, "max": 41.308990478515625, "pos_frac": 0.7265625, "sample": [10.326988220214844, 5.2681884765625, -6.238311767578125, 9.470109939575195, 9.786666870117188, -0.29772186279296875, -23.22418212890625, 7.9698333740234375, 21.016830444335938, -1.582529067993164, 15.953231811523438, 18.180683135986328, -0.9923114776611328, 2.5259857177734375, -6.044731140136719, 9.895576477050781, -1.84906005859375, 3.6969757080078125, -6.89129638671875, 7.866020202636719, 26.233789443969727, -7.856254577636719, 18.709152221679688, 1.837179183959961, 17.090667724609375, 0.1826171875, -2.12005615234375, 20.900775909423828, 1.918792724609375, 3.63433837890625, 28.3233642578125, 14.509063720703125, 4.091056823730469, 4.145904541015625, 4.0672607421875, 18.036117553710938, 10.858871459960938, 23.687088012695312, 4.988185882568359, -5.687992095947266, -3.30889892578125, -12.214981079101562, 13.727645874023438, 0.810882568359375, 34.069786071777344, 8.376258850097656, 6.7256622314453125, 17.854846954345703, 7.282318115234375, 7.9976806640625, 9.48592758178711, 8.600433349609375, 4.7886962890625, 30.635265350341797, -11.917205810546875, 21.316238403320312, 8.523178100585938, 1.4088935852050781, 3.2299346923828125, 23.676136016845703, 6.42010498046875, 8.19888687133789, 10.796165466308594, -21.576351165771484, 12.689613342285156, -2.145172119140625, 2.5910873413085938, 1.4132232666015625, 21.042495727539062, 1.9671096801757812, -0.9639701843261719, 14.054466247558594, 19.505624771118164, 3.0397415161132812, 4.3368988037109375, -5.138275146484375, 1.6072158813476562, 41.308990478515625, 18.64244842529297, -6.1629791259765625, -15.648666381835938, 21.864540100097656, -3.071338653564453, 2.8852386474609375, -10.809654235839844, 15.84234619140625, 4.852485656738281, 15.553703308105469, -11.122230529785156, 6.714057922363281, 31.501632690429688, 3.386932373046875, 8.883026123046875, 33.521934509277344, 6.733489990234375, 1.483795166015625, 15.343572616577148, 6.03759765625, 2.0493087768554688, -3.695770263671875, -0.3960723876953125, 17.37049102783203, 5.23347282409668, 5.251945495605469, 10.9776611328125, 31.78936004638672, 14.698997497558594, -6.9966278076171875, 13.87762451171875, 18.171979904174805, 6.5510406494140625, -27.95508575439453, 12.638801574707031, -3.4484214782714844, -26.002777099609375, -3.0150833129882812, 12.01565170288086, 6.151073455810547, 24.9388427734375, 1.9020652770996094, -3.1313018798828125, -3.333385467529297, 10.812416076660156, -11.71685791015625, 17.17620849609375, -0.7251129150390625, -2.711029052734375, 26.210994720458984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000247.npy"}
{"epoch": 0.7484848484848485, "step": 248, "batch_size": 128, "mean": 6.615468978881836, "std": 11.803380966186523, "min": -23.189842224121094, "p10": -5.883970260620117, "median": 4.571053504943848, "p90": 20.56169929504394, "max": 48.2413330078125, "pos_frac": 0.6953125, "sample": [-13.502349853515625, -6.7697906494140625, -5.872016906738281, -17.677167892456055, 26.008960723876953, -0.4911022186279297, -5.911861419677734, 1.4945526123046875, -2.0944290161132812, 10.012657165527344, 15.981719970703125, 3.2437591552734375, 0.3813018798828125, 9.085996627807617, 12.523971557617188, -1.42462158203125, 2.0737648010253906, -7.132200241088867, 7.968660354614258, -3.6024551391601562, 1.0952415466308594, 10.41607666015625, 19.491485595703125, 11.894451141357422, -6.215553283691406, 28.551712036132812, -3.649444580078125, 19.820741653442383, 3.0772552490234375, -0.4802970886230469, 3.1994705200195312, 29.149932861328125, 10.290351867675781, 5.938346862792969, -2.034069061279297, -23.189842224121094, 4.112312316894531, 3.850269317626953, 11.420822143554688, 26.231327056884766, 15.355518341064453, 19.614356994628906, 0.3473548889160156, 9.824615478515625, 1.0014896392822266, -1.3754310607910156, 35.51739501953125, 9.70168685913086, -7.690986633300781, 6.06964111328125, 14.124755859375, 7.734405517578125, 15.853199005126953, 4.509006500244141, 9.187545776367188, -4.2522735595703125, 5.3555908203125, -5.818450927734375, -3.1626930236816406, 1.9176177978515625, 5.319469451904297, 8.185331344604492, 22.95599365234375, -0.5286979675292969, 10.02273178100586, 4.542449951171875, 36.2620849609375, 15.584159851074219, 1.0677261352539062, 6.430820465087891, 0.13085556030273438, 23.94384765625, 4.079689025878906, 5.2086334228515625, 10.929443359375, 19.27850341796875, -13.4169921875, 48.2413330078125, -2.447355270385742, 16.28237533569336, 19.11509132385254, 35.84358215332031, 3.2572784423828125, 4.59965705871582, -17.79234504699707, -1.7023372650146484, 3.755767822265625, 0.763214111328125, -3.6633434295654297, 1.7043991088867188, 30.339157104492188, -10.834381103515625, -5.66119384765625, 2.096586227416992, 16.0263671875, 18.061004638671875, 8.390518188476562, 17.334579467773438, -8.661518096923828, -1.2112579345703125, 21.68569564819336, 5.542793273925781, 4.27717399597168, 2.4815902709960938, 14.775184631347656, -1.58087158203125, 34.601318359375, 16.174758911132812, -2.281147003173828, 13.675247192382812, -4.91961669921875, 2.9156341552734375, 6.4104766845703125, 11.929107666015625, 20.079986572265625, 8.132436752319336, 10.374202728271484, 6.214410781860352, -0.3304901123046875, 10.86086654663086, -0.6209945678710938, 16.757568359375, -3.7875804901123047, 8.487152099609375, -2.3155746459960938, 9.871198654174805, -6.322723388671875, -1.2232742309570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000248.npy"}
{"epoch": 0.7515151515151515, "step": 249, "batch_size": 128, "mean": 8.530600547790527, "std": 11.642529487609863, "min": -22.1243896484375, "p10": -4.247438049316406, "median": 6.334256172180176, "p90": 25.49532203674316, "max": 35.49517059326172, "pos_frac": 0.7578125, "sample": [33.297935485839844, 3.899250030517578, 32.83184814453125, 17.35599136352539, 15.53021240234375, 2.4756336212158203, 5.8832855224609375, -2.950328826904297, 11.483444213867188, -22.1243896484375, 20.831140518188477, 20.002059936523438, -1.3292102813720703, 13.01976203918457, 3.2791519165039062, -3.9265365600585938, 18.6356201171875, 19.589515686035156, -10.017173767089844, 15.081384658813477, 26.174400329589844, 25.204288482666016, 11.151802062988281, -8.766069412231445, -0.10358428955078125, 5.603736877441406, 6.485574722290039, 7.894012451171875, 10.476144790649414, 12.575702667236328, 6.707611083984375, 35.42887878417969, 6.564659118652344, 5.56671142578125, 15.202592849731445, 6.0346527099609375, 21.549440383911133, 1.9212875366210938, 6.176109313964844, -0.21722412109375, 2.8409881591796875, -2.7524337768554688, -10.64105224609375, 14.061454772949219, 7.404266357421875, 22.373233795166016, -2.789766311645508, 15.560523986816406, 22.176803588867188, 2.400775909423828, -2.7963638305664062, 0.32132720947265625, 13.411605834960938, -5.3215789794921875, 35.49517059326172, 27.272201538085938, -8.586395263671875, 4.023433685302734, 0.6816635131835938, 18.58698272705078, 11.95794677734375, 9.517730712890625, -0.26898956298828125, 15.4649658203125, -0.413177490234375, 16.191757202148438, 33.53474426269531, 8.567024230957031, -6.781280517578125, -4.17115592956543, 11.073684692382812, 22.854568481445312, 1.7292823791503906, -2.5419273376464844, 1.1857833862304688, 10.21673583984375, 32.81438446044922, 7.226497650146484, 17.330886840820312, 1.1865272521972656, 1.5718326568603516, 6.1829376220703125, 1.1721954345703125, 7.6466064453125, -5.552478790283203, 13.608358383178711, 0.17838096618652344, -5.808977127075195, 6.833564758300781, 8.779729843139648, 19.802513122558594, 28.72182846069336, 14.229316711425781, -1.2672882080078125, 19.725914001464844, 1.3918800354003906, -4.806665420532227, -2.508148193359375, 18.49109649658203, 2.915914535522461, 4.562797546386719, 0.806640625, 27.050315856933594, 8.255615234375, 4.8131256103515625, 2.0686492919921875, -9.825973510742188, 21.010570526123047, 16.533607482910156, 1.3143844604492188, 18.646026611328125, 22.047103881835938, 21.832332611083984, 26.248239517211914, -14.457916259765625, 14.068376541137695, -0.7059326171875, 1.2388420104980469, 3.01959228515625, 2.8075294494628906, 26.79638671875, 2.31121826171875, -0.8706874847412109, -4.4087982177734375, -4.17828369140625, -2.7460479736328125, 4.804107666015625, 32.68836212158203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000249.npy"}
{"epoch": 0.7545454545454545, "step": 250, "batch_size": 128, "mean": 7.068459510803223, "std": 11.936080932617188, "min": -19.83966827392578, "p10": -8.627187538146973, "median": 5.8219757080078125, "p90": 24.334062004089354, "max": 32.62605285644531, "pos_frac": 0.7109375, "sample": [21.931529998779297, 24.181373596191406, -4.807228088378906, 8.822509765625, 11.606979370117188, 2.691722869873047, 5.268230438232422, -15.386177062988281, 31.363677978515625, -15.07492446899414, 24.302614212036133, 16.271329879760742, 11.960281372070312, -8.587921142578125, -7.005292892456055, -7.374982833862305, 7.633251190185547, -1.8477325439453125, 10.42038345336914, -1.6557693481445312, -2.35009765625, 1.7549705505371094, 2.0547256469726562, 1.6546249389648438, 25.99371337890625, 32.62605285644531, 16.095481872558594, 1.7864837646484375, 4.396722793579102, -7.82025146484375, -19.83966827392578, 11.501922607421875, -0.3258209228515625, 11.130165100097656, 15.189615249633789, 18.13030242919922, 19.080467224121094, -4.245643615722656, 22.623275756835938, -1.2567367553710938, -8.718809127807617, 14.62091064453125, 13.435806274414062, 11.944587707519531, -10.745565414428711, 1.4047164916992188, -2.90875244140625, -0.9697494506835938, 30.946792602539062, -2.043790817260742, 25.04132843017578, 2.1387939453125, -13.1575927734375, 0.76177978515625, 5.73541259765625, 17.803754806518555, 17.991994857788086, 13.20859146118164, -3.1849822998046875, 24.407440185546875, 7.3854522705078125, 24.577003479003906, 11.20965576171875, -2.549945831298828, 0.757720947265625, 3.5792922973632812, 4.044441223144531, 0.35071754455566406, 10.416610717773438, 1.9354476928710938, 7.569587707519531, 15.902267456054688, 29.283451080322266, -1.2234268188476562, 4.782497406005859, -13.884040832519531, 29.307228088378906, 15.629035949707031, 7.322826385498047, 13.51605224609375, 2.206888198852539, 5.1437835693359375, -14.252153396606445, 0.5267715454101562, 6.1323089599609375, 14.513320922851562, 16.63463592529297, -4.386016845703125, 16.365699768066406, -9.854225158691406, 24.773597717285156, -2.0125732421875, 2.121980667114258, 6.414031982421875, 20.259220123291016, -1.4125518798828125, 6.299598693847656, 30.243450164794922, 11.871429443359375, 21.222614288330078, 14.271156311035156, 0.48734283447265625, 18.12262725830078, 11.56655502319336, -5.171051025390625, 24.994178771972656, 19.77920913696289, 2.877054214477539, -1.663787841796875, -13.951614379882812, -10.944557189941406, 5.908538818359375, 15.873924255371094, -1.0923423767089844, 12.599205017089844, 5.250984191894531, -4.105827331542969, 16.427553176879883, 1.3827667236328125, -9.970703125, 2.0301742553710938, 16.432022094726562, 6.160926818847656, 0.8759307861328125, -12.303871154785156, 13.792259216308594, 22.822830200195312, 28.980819702148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000250.npy"}
{"epoch": 0.7575757575757576, "step": 251, "batch_size": 128, "mean": 5.714435577392578, "std": 9.502237319946289, "min": -18.200759887695312, "p10": -5.120404052734375, "median": 3.969143867492676, "p90": 16.964094734191892, "max": 37.14037322998047, "pos_frac": 0.7578125, "sample": [1.7455711364746094, 20.953590393066406, 5.9701995849609375, 9.822704315185547, 16.90408706665039, 17.10411262512207, 1.1674957275390625, 15.65936279296875, 8.921501159667969, 14.916091918945312, -6.540130615234375, -4.270116806030273, 4.4672393798828125, -1.2628231048583984, -0.010168075561523438, 7.195243835449219, -4.129655838012695, 3.5313949584960938, 19.483261108398438, 3.2272109985351562, 9.219711303710938, -2.9695663452148438, 0.4026985168457031, 4.8426666259765625, -6.115322113037109, 3.4292221069335938, 3.3713302612304688, 28.666107177734375, 1.8285331726074219, 1.9496192932128906, 1.4291458129882812, 0.8452587127685547, 11.872230529785156, 27.89493179321289, 28.454986572265625, -1.2711563110351562, 14.58803939819336, 5.0051727294921875, -4.657281875610352, -2.1683216094970703, 27.011756896972656, 20.749534606933594, 6.575935363769531, 28.955780029296875, -1.8987655639648438, 0.24497222900390625, 1.5431671142578125, 10.142959594726562, 4.523555755615234, 21.94243621826172, 1.277688980102539, 2.2340774536132812, -0.3728370666503906, -7.6532745361328125, 4.180839538574219, 13.469573974609375, 1.318695068359375, 1.5068073272705078, 13.069110870361328, 13.11253547668457, 16.053348541259766, 3.8886661529541016, 11.106584548950195, -3.6849288940429688, -5.630420684814453, -1.80218505859375, 6.146991729736328, -2.5950469970703125, 10.541160583496094, 6.935014724731445, 4.8449859619140625, 5.210533142089844, 2.30413818359375, 1.230377197265625, 1.3878173828125, 4.04962158203125, 6.534736633300781, -5.2283477783203125, 8.25337028503418, 5.597949981689453, 5.310905456542969, -0.6830101013183594, 1.1386833190917969, -5.273902893066406, 26.07805824279785, 37.14037322998047, -7.23388671875, -1.8858070373535156, 9.33243179321289, -12.61346435546875, 4.257364273071289, 6.7371978759765625, 15.598979949951172, 8.89190673828125, 3.06597900390625, -5.0741424560546875, -7.872467041015625, 0.718475341796875, 4.469451904296875, 13.853317260742188, -5.88983154296875, -18.200759887695312, 8.452301025390625, 32.17271423339844, 2.2798843383789062, 2.0397872924804688, 12.584575653076172, -10.732879638671875, -0.7497482299804688, 8.064010620117188, 3.306976318359375, 1.8064422607421875, 0.9402008056640625, 3.05413818359375, 7.644218444824219, 2.7922325134277344, 0.1401691436767578, -4.5137481689453125, 1.1494293212890625, 12.490150451660156, 9.03009033203125, 15.671531677246094, -5.513206481933594, 11.868888854980469, 5.007823944091797, 14.243904113769531, 12.550018310546875, 15.248924255371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000251.npy"}
{"epoch": 0.7606060606060606, "step": 252, "batch_size": 128, "mean": 8.197932243347168, "std": 10.372557640075684, "min": -15.041587829589844, "p10": -4.0025497436523425, "median": 6.102252960205078, "p90": 23.40229034423828, "max": 32.22340393066406, "pos_frac": 0.828125, "sample": [13.845298767089844, 2.4277114868164062, 5.2395782470703125, 5.349983215332031, 17.4757080078125, 12.609611511230469, 16.99773406982422, -10.973464965820312, 1.0529956817626953, -5.5290374755859375, 29.62591552734375, -5.075965881347656, 3.7947921752929688, 5.4027099609375, 22.81354522705078, 4.304267883300781, 24.064876556396484, 18.835933685302734, 22.554513931274414, -3.5935211181640625, 10.79166030883789, -6.9495391845703125, -10.186851501464844, 0.8126029968261719, 26.098134994506836, 2.844940185546875, 10.177135467529297, 13.286846160888672, -13.914737701416016, 1.938589096069336, 19.82141876220703, 13.112876892089844, 1.7438201904296875, 19.22478485107422, 23.318130493164062, 2.1257553100585938, 5.290313720703125, 25.096874237060547, -1.3055858612060547, 9.741859436035156, 13.083938598632812, 8.101402282714844, 2.2715606689453125, 25.402957916259766, -0.2660255432128906, 17.62781524658203, -5.558895111083984, -0.9012298583984375, 0.05735588073730469, 4.129547119140625, 0.08362007141113281, 15.520164489746094, 3.0848217010498047, 5.686561584472656, 5.3442230224609375, 26.93145751953125, 5.1640472412109375, -9.170028686523438, 9.098503112792969, 15.98251724243164, 2.8725318908691406, 12.000015258789062, 12.587238311767578, -15.041587829589844, -8.83380126953125, 21.642309188842773, 3.6207504272460938, 7.797893524169922, 9.109046936035156, 1.5324172973632812, 21.092369079589844, 6.5179443359375, 22.797428131103516, -3.759307861328125, 17.323753356933594, 25.26537322998047, 2.727184295654297, 3.9783096313476562, 0.4334545135498047, 23.598663330078125, 1.0123863220214844, 16.759342193603516, 7.84906005859375, 1.4568252563476562, 3.251373291015625, -13.266456604003906, 15.38604736328125, 2.2574615478515625, 16.8046875, 11.401626586914062, 5.3436737060546875, -2.1521129608154297, -9.46713638305664, 8.523895263671875, 7.757293701171875, 4.642097473144531, 28.040245056152344, 0.1656627655029297, -4.5701141357421875, 7.606231689453125, 1.6284408569335938, 24.93358612060547, 1.3146553039550781, 21.596723556518555, 19.523860931396484, -2.18359375, 3.7697505950927734, 26.742900848388672, -2.026906967163086, 32.22340393066406, 16.869293212890625, 18.624099731445312, 8.031036376953125, -2.9672775268554688, 1.0103759765625, 13.279159545898438, 7.578376770019531, 16.683074951171875, 15.20705795288086, 8.779043197631836, 1.5386123657226562, 6.667327880859375, 11.765129089355469, 3.3822898864746094, 1.7848701477050781, 24.5968017578125, 12.247570037841797, 4.705051422119141], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000252.npy"}
{"epoch": 0.7636363636363637, "step": 253, "batch_size": 128, "mean": 6.040040493011475, "std": 12.525606155395508, "min": -24.019943237304688, "p10": -9.431410980224609, "median": 6.2873687744140625, "p90": 20.352216720581055, "max": 40.56024169921875, "pos_frac": 0.6875, "sample": [14.561195373535156, 2.5452117919921875, 3.7312088012695312, 5.175014495849609, 0.21528244018554688, 17.459381103515625, -6.965667724609375, 35.52006530761719, -5.72015380859375, -0.2021484375, 8.821199417114258, 5.1829376220703125, -7.025421142578125, 20.573944091796875, 11.1318359375, 9.125282287597656, 20.107288360595703, -13.531776428222656, 30.022342681884766, 21.128257751464844, 13.48797607421875, 14.759841918945312, -3.020944595336914, -0.5937957763671875, 24.094757080078125, -22.472808837890625, -2.24407958984375, 11.604503631591797, 25.886022567749023, 11.4681396484375, 22.206235885620117, -1.403472900390625, 10.392865180969238, 16.993511199951172, 18.559005737304688, -5.208763122558594, 3.6139984130859375, 5.001579284667969, 16.879074096679688, 8.204605102539062, 17.176536560058594, 40.56024169921875, 10.359649658203125, -5.6551513671875, 4.2286834716796875, -0.2709980010986328, 0.0482940673828125, -11.928047180175781, 7.6258544921875, 4.6755523681640625, 1.5212440490722656, -14.397384643554688, 7.3586578369140625, 1.8977508544921875, 1.4172210693359375, -7.207939147949219, 13.979354858398438, -0.24416351318359375, 2.6063919067382812, 26.719669342041016, 3.4642505645751953, -1.4978370666503906, -7.6302490234375, -18.592864990234375, 6.798921585083008, 19.176719665527344, -1.2676124572753906, -9.235973358154297, 15.2767333984375, 8.645126342773438, 7.620292663574219, -6.723289489746094, -20.520517349243164, 31.085601806640625, 38.43632507324219, 4.814079284667969, 11.948638916015625, -14.713699340820312, -0.93408203125, -20.962631225585938, 18.70538330078125, -5.319969177246094, 19.90785026550293, 3.4214630126953125, 16.973690032958984, 7.359214782714844, 6.7684326171875, -12.768356323242188, 10.10466194152832, 9.91015625, 16.074039459228516, 0.0335235595703125, -0.8575248718261719, 16.28314208984375, 17.23479461669922, -4.615753173828125, 13.406827926635742, 8.326812744140625, 0.12926101684570312, 11.735023498535156, 10.397132873535156, 2.3425521850585938, 9.002464294433594, 2.8000717163085938, -18.6220703125, -24.019943237304688, 5.7191619873046875, 21.026992797851562, 19.704498291015625, 13.017364501953125, 20.257190704345703, -0.44184112548828125, -11.560577392578125, -2.5837783813476562, 5.8277587890625, 11.046943664550781, -4.9244537353515625, 13.623382568359375, -3.8198318481445312, 10.834426879882812, 0.5033340454101562, 11.136192321777344, -9.241661071777344, -9.874160766601562, 6.746978759765625, 12.829498291015625, 16.4522705078125, 26.409759521484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000253.npy"}
{"epoch": 0.7666666666666667, "step": 254, "batch_size": 128, "mean": 7.139259338378906, "std": 12.328140258789062, "min": -19.37401580810547, "p10": -7.618170166015624, "median": 6.1510162353515625, "p90": 22.951599884033204, "max": 41.09954833984375, "pos_frac": 0.71875, "sample": [21.307968139648438, 8.317398071289062, -4.640830993652344, -1.23736572265625, 32.7894287109375, -1.0151252746582031, 13.9747314453125, -14.38812255859375, 0.3548583984375, -7.30474853515625, 18.380104064941406, 5.622123718261719, 6.7079315185546875, 8.606369018554688, 11.539344787597656, -1.2082061767578125, 31.658004760742188, -12.137161254882812, 12.265182495117188, 10.924434661865234, -4.13494873046875, 5.407936096191406, -17.56985855102539, -0.939483642578125, 33.342559814453125, 39.0125732421875, -17.738121032714844, 24.338714599609375, -4.616138458251953, 4.335590362548828, 16.411056518554688, 0.04416656494140625, -3.5045242309570312, 11.436695098876953, 4.718645095825195, 4.0017242431640625, -5.450046539306641, 8.344970703125, -4.966739654541016, 6.9667816162109375, 21.252593994140625, -0.9973106384277344, 1.5633201599121094, 5.5929718017578125, 5.112092971801758, 4.985500335693359, 0.9664459228515625, 0.2612457275390625, 0.42977142333984375, 5.8083343505859375, 2.173328399658203, 20.861400604248047, 12.968036651611328, 6.0322418212890625, 10.338592529296875, 6.2697906494140625, 7.9867401123046875, -2.887115478515625, 4.628810882568359, -13.398490905761719, 7.721416473388672, 26.307815551757812, 7.832450866699219, 21.86389923095703, 13.106868743896484, -1.3366165161132812, 33.37754821777344, 5.27618408203125, 7.279014587402344, -12.993309020996094, 34.04530334472656, -0.8564662933349609, -11.42818832397461, 6.50823974609375, 7.014181137084961, -9.79000473022461, 8.942611694335938, -12.443923950195312, 15.899351119995117, -1.0965385437011719, 7.4338226318359375, 10.909156799316406, 22.926467895507812, 22.955429077148438, 0.6723976135253906, 12.613143920898438, 22.94995880126953, -2.7581253051757812, 18.90721893310547, 6.469770431518555, 20.960739135742188, 0.6650676727294922, 3.209066390991211, -4.953216552734375, -5.0279693603515625, -19.37401580810547, 15.63270378112793, 19.819046020507812, 41.09954833984375, 19.253353118896484, -5.560268402099609, 4.502277374267578, -8.3494873046875, 19.206520080566406, -0.0838623046875, -14.402153015136719, 7.712736129760742, 2.424623489379883, 22.4912109375, 14.557647705078125, 1.1356048583984375, 5.3962860107421875, -5.002363204956055, -8.7672119140625, 24.37085723876953, 18.231849670410156, 0.45331573486328125, -3.99627685546875, 11.709762573242188, 18.993560791015625, 1.9279861450195312, 26.151405334472656, 14.300254821777344, 10.4659423828125, 10.379770278930664, 11.047927856445312, 11.35653305053711, 23.671142578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000254.npy"}
{"epoch": 0.7696969696969697, "step": 255, "batch_size": 128, "mean": 8.180608749389648, "std": 11.117136001586914, "min": -17.38384246826172, "p10": -6.360686492919922, "median": 8.578108787536621, "p90": 21.783860778808595, "max": 35.615997314453125, "pos_frac": 0.7421875, "sample": [10.996173858642578, -2.159708023071289, -0.7500705718994141, 21.747024536132812, 3.9838409423828125, 1.340005874633789, 7.073116302490234, -2.0189380645751953, 28.80828857421875, 19.467636108398438, -1.2243194580078125, 2.8462600708007812, 9.447395324707031, -7.5441741943359375, 15.32342529296875, -2.2150611877441406, 14.756301879882812, 11.53268051147461, -5.609649658203125, -1.0165786743164062, 17.33490753173828, 6.683990478515625, 15.995590209960938, 19.4406795501709, 30.47760772705078, 21.86981201171875, 20.2252197265625, -0.21982574462890625, -1.6858901977539062, -10.750701904296875, -4.6642303466796875, 11.202590942382812, -6.7439117431640625, 1.449209213256836, 13.823150634765625, 20.7723388671875, 9.273590087890625, 8.593597412109375, 19.37194061279297, -17.38384246826172, 23.283767700195312, 4.850456237792969, 1.4150238037109375, -1.2366523742675781, 13.721153259277344, 5.547206878662109, 19.626937866210938, 15.471633911132812, 6.705772399902344, 10.85336685180664, 21.415512084960938, -5.316967010498047, 15.17026138305664, 14.345420837402344, 4.1690216064453125, 13.174957275390625, 11.010427474975586, -7.155841827392578, -6.107597351074219, 0.8087387084960938, 7.539251327514648, 12.88067626953125, 23.350982666015625, 8.705322265625, -3.47576904296875, 20.70969009399414, 16.85577392578125, 6.842521667480469, -6.7843017578125, 12.891128540039062, 5.854511260986328, 31.93054962158203, -2.076122283935547, 13.804573059082031, 0.7835235595703125, 10.562992095947266, 5.899894714355469, 21.420013427734375, 20.79693603515625, -7.379341125488281, 25.436798095703125, 3.782255172729492, 0.47777366638183594, 10.346099853515625, 17.882888793945312, 35.615997314453125, 9.205596923828125, -8.009429931640625, 9.936981201171875, 0.7855987548828125, 12.537801742553711, 8.562620162963867, 25.466087341308594, 11.470342636108398, -13.651351928710938, 2.1880722045898438, 4.9773101806640625, 6.38824462890625, -0.2969818115234375, 1.2321929931640625, 0.6411361694335938, 4.720804214477539, -6.481426239013672, -3.6518993377685547, 7.771631240844727, 20.333702087402344, 13.641864776611328, -5.7047119140625, -6.308940887451172, 17.56716537475586, 11.5562744140625, 12.235984802246094, 4.6533966064453125, -14.414299011230469, 12.732933044433594, 15.38134765625, 25.003875732421875, 7.247352600097656, 10.720207214355469, 31.317138671875, 16.466033935546875, 27.17486572265625, -0.8441219329833984, 16.032520294189453, 24.816619873046875, 7.909688949584961, -14.145713806152344, -16.305313110351562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000255.npy"}
{"epoch": 0.7727272727272727, "step": 256, "batch_size": 128, "mean": 8.131439208984375, "std": 11.643978118896484, "min": -15.472343444824219, "p10": -6.1045082092285154, "median": 5.936737060546875, "p90": 24.757603645324707, "max": 33.652008056640625, "pos_frac": 0.75, "sample": [33.652008056640625, 20.58271026611328, 12.138587951660156, 26.376375198364258, 6.372425079345703, -6.124839782714844, 6.797615051269531, -1.3634719848632812, 15.913198471069336, 0.5204391479492188, 22.359134674072266, 17.51645278930664, 21.82286834716797, -14.836334228515625, 3.5815887451171875, -7.25897216796875, -6.095794677734375, 5.2738037109375, 15.788396835327148, 13.67236328125, 18.99394989013672, -3.7062225341796875, -2.724292755126953, 2.8048629760742188, 0.37175750732421875, 24.798826217651367, 3.5757598876953125, 3.1009674072265625, 17.52043914794922, 9.246780395507812, 0.9737205505371094, 8.488264083862305, 16.606124877929688, 9.732463836669922, 17.928237915039062, -6.93853759765625, 8.178543090820312, 2.7368698120117188, 16.619510650634766, 18.5814208984375, -2.498149871826172, 5.64068603515625, 28.308650970458984, 12.259521484375, 16.88818359375, 11.075424194335938, 5.225652694702148, 6.2327880859375, 10.728118896484375, 6.522193908691406, 24.984161376953125, 27.57001495361328, 27.238784790039062, 14.998283386230469, -6.796245574951172, -1.1248779296875, -10.541183471679688, 32.29753112792969, 14.854362487792969, 17.167129516601562, 30.693777084350586, -1.5168590545654297, -15.472343444824219, -4.783077239990234, 12.698925018310547, 2.9735260009765625, 5.3724517822265625, -12.904172897338867, 3.5566444396972656, 31.721221923828125, 5.63886833190918, -4.5209808349609375, 3.2890167236328125, -0.322296142578125, 19.940109252929688, 2.2697696685791016, 24.61569595336914, 15.038856506347656, -4.3750152587890625, 15.29168701171875, 23.227149963378906, 0.07810020446777344, 2.65081787109375, 1.796630859375, 1.1303482055664062, 24.321388244628906, -2.630250930786133, 24.73993682861328, -2.1768798828125, 0.5384502410888672, 21.981708526611328, 2.5894927978515625, 3.8693084716796875, -3.0503921508789062, 2.7153472900390625, 5.3608856201171875, 6.498981475830078, 4.8463287353515625, 12.3250732421875, 23.14136505126953, -11.6788330078125, -5.531841278076172, 10.77719497680664, 7.054740905761719, -0.32464599609375, 19.2432861328125, -12.394950866699219, 24.963150024414062, 0.35605621337890625, 18.668014526367188, -0.9714927673339844, 3.759185791015625, 4.3264617919921875, -10.659235000610352, -4.0093841552734375, 7.2530517578125, 13.881179809570312, 25.077621459960938, -10.276687622070312, 1.978597640991211, 18.292728424072266, 10.114692687988281, 31.986572265625, 19.872406005859375, 5.011421203613281, -10.928363800048828, 11.894012451171875, -4.6794281005859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000256.npy"}
{"epoch": 0.7757575757575758, "step": 257, "batch_size": 128, "mean": 8.17287540435791, "std": 11.600485801696777, "min": -21.040157318115234, "p10": -4.4464897155761705, "median": 6.521946907043457, "p90": 23.103749847412107, "max": 45.06695556640625, "pos_frac": 0.765625, "sample": [45.06695556640625, 28.893478393554688, 28.920066833496094, 7.6235809326171875, 13.934745788574219, 9.587970733642578, 13.564188003540039, 1.2040824890136719, 1.1484451293945312, 5.351463317871094, 31.054988861083984, 4.751411437988281, 4.182317733764648, 27.61223602294922, 9.868171691894531, 16.37408447265625, 13.946250915527344, -21.040157318115234, 10.063148498535156, 7.45695686340332, -0.9832324981689453, 12.877410888671875, 30.777908325195312, 1.7599964141845703, 3.0539093017578125, 6.863372802734375, 8.25509262084961, -2.1719913482666016, 14.37164306640625, 11.315460205078125, 6.1298065185546875, -7.177295684814453, 1.0495223999023438, 4.881275177001953, 5.131172180175781, -9.0736083984375, 19.27989959716797, 9.096122741699219, 4.1042327880859375, 14.67376708984375, 19.22653579711914, -6.343364715576172, 2.3465042114257812, 1.3184738159179688, 5.479909896850586, 1.3125324249267578, 6.180521011352539, 8.788211822509766, -2.392986297607422, -9.557296752929688, 35.988059997558594, -0.39118194580078125, 15.244102478027344, 25.692054748535156, 30.98822021484375, -13.692161560058594, -0.5085296630859375, 7.221282958984375, -2.3108367919921875, 1.600433349609375, 8.486572265625, -1.97723388671875, -2.7593536376953125, 5.505252838134766, 5.37542724609375, 10.81494140625, 13.32008171081543, 14.335716247558594, 4.406890869140625, 5.098932266235352, 17.170211791992188, -1.0868759155273438, -10.553237915039062, 11.369064331054688, 2.837291717529297, 0.1257171630859375, -19.78459930419922, 0.9496383666992188, 17.904369354248047, 16.96966552734375, 18.61573028564453, 10.331275939941406, 5.097900390625, 8.289939880371094, 1.382843017578125, 19.95014190673828, 5.630157470703125, 9.03555679321289, 17.15918731689453, 18.303749084472656, -3.66015625, -2.904743194580078, 14.58848762512207, 22.12640380859375, -9.769210815429688, 18.39410400390625, 10.503402709960938, -2.566333770751953, 9.991813659667969, 21.80925750732422, 13.936176300048828, 23.381134033203125, -2.931852340698242, 0.7649917602539062, 33.326934814453125, 3.767059326171875, -5.315727233886719, 12.179756164550781, 7.7765045166015625, 17.03614044189453, -2.31689453125, -11.249614715576172, 4.1810760498046875, -4.0739593505859375, -7.311622619628906, 5.955375671386719, 3.45947265625, 13.834922790527344, 3.5647430419921875, -2.6731643676757812, 22.98487091064453, 14.052658081054688, 20.797286987304688, -6.127424240112305, 34.301605224609375, 23.94229507446289, -2.6666107177734375, 20.764617919921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000257.npy"}
{"epoch": 0.7787878787878788, "step": 258, "batch_size": 128, "mean": 7.213096618652344, "std": 11.493427276611328, "min": -26.240489959716797, "p10": -5.890494537353516, "median": 7.833487510681152, "p90": 22.89836559295654, "max": 33.01280975341797, "pos_frac": 0.7578125, "sample": [3.8818931579589844, 10.169857025146484, 0.16455078125, 7.557636260986328, 31.386184692382812, 20.358152389526367, -0.7254791259765625, 23.54573631286621, -9.585151672363281, 26.430160522460938, -8.589942932128906, -5.8609619140625, 6.11175537109375, -1.1219406127929688, 4.51043701171875, 1.73114013671875, -3.699066162109375, 3.7418060302734375, 12.698661804199219, 23.11431312561035, -8.978546142578125, 17.753440856933594, 1.95526123046875, 9.178085327148438, 7.745391845703125, 16.414743423461914, 7.015384674072266, 5.442138671875, 11.527420043945312, 25.34930419921875, 23.328323364257812, -13.75461196899414, 4.58551025390625, 22.682037353515625, 11.01165771484375, 7.7308349609375, 13.506488800048828, -2.2912254333496094, 14.778038024902344, -2.123046875, -15.168411254882812, 13.710855484008789, 27.571979522705078, 5.701740264892578, 10.350349426269531, -4.718986511230469, 8.716270446777344, 13.382389068603516, -2.4459266662597656, 5.552556991577148, 18.362762451171875, -18.480682373046875, 5.9137725830078125, 30.099212646484375, 1.2319555282592773, -0.14426422119140625, 14.740812301635742, 2.603984832763672, 26.24530792236328, -5.959403991699219, 12.493301391601562, 10.3123779296875, -1.6660327911376953, -6.730628967285156, 22.805816650390625, 33.01280975341797, 10.168216705322266, -5.8426513671875, -8.409042358398438, 10.531028747558594, -5.6226043701171875, 10.555732727050781, 19.339828491210938, 14.17074203491211, 17.042770385742188, 12.744979858398438, -2.8109817504882812, -26.240489959716797, 10.964157104492188, 17.567703247070312, 1.4336109161376953, -11.791885375976562, 3.406890869140625, 10.571640014648438, 14.059158325195312, 0.38297271728515625, 2.7399120330810547, 4.767997741699219, 9.171371459960938, 9.053009033203125, 8.258651733398438, 10.58770751953125, 8.774696350097656, 6.262914657592773, 2.9521636962890625, 7.558570861816406, 13.388492584228516, 13.569913864135742, 5.5648345947265625, 7.92158317565918, 7.077239990234375, 28.45342254638672, 1.5294818878173828, 0.5330963134765625, 26.5491943359375, 11.942049026489258, 15.493080139160156, 0.09514236450195312, 17.579265594482422, 12.060100555419922, -24.749801635742188, -5.84759521484375, 8.374008178710938, -25.80194854736328, 10.35919189453125, 18.35283088684082, 11.402252197265625, -2.0327911376953125, 20.539161682128906, 15.891098022460938, 4.752765655517578, 12.317943572998047, -5.141975402832031, -1.9878406524658203, 14.321678161621094, 0.32090187072753906, -1.70379638671875, 23.634315490722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000258.npy"}
{"epoch": 0.7818181818181819, "step": 259, "batch_size": 128, "mean": 6.168429374694824, "std": 11.482932090759277, "min": -16.937307357788086, "p10": -6.890820693969726, "median": 4.632347106933594, "p90": 21.726690292358395, "max": 38.145965576171875, "pos_frac": 0.703125, "sample": [8.342575073242188, -5.104438781738281, -6.595058441162109, 2.565753936767578, -0.5316429138183594, 9.147146224975586, 4.704658508300781, 4.239681243896484, 0.028141021728515625, 10.057884216308594, 10.833919525146484, -1.0020980834960938, 11.816734313964844, -3.0526199340820312, 8.897369384765625, 29.503759384155273, 12.008428573608398, 1.3710479736328125, -9.65625, -16.937307357788086, 4.5812530517578125, 23.790260314941406, -6.790409088134766, -9.2103271484375, -6.205862045288086, -7.125114440917969, -4.858734130859375, 16.29560661315918, 8.725982666015625, 32.830360412597656, 37.189788818359375, 16.84808349609375, 22.339080810546875, 8.671722412109375, 8.818214416503906, -4.38922119140625, -2.1339569091796875, 1.8330459594726562, 0.2408294677734375, -1.7135543823242188, -2.76226806640625, 1.242483139038086, 0.43833160400390625, 6.280303955078125, -0.860504150390625, 13.695932388305664, 15.254501342773438, 21.274078369140625, 14.711463928222656, 20.342662811279297, 37.50604248046875, 2.0618858337402344, 2.258148193359375, 22.717369079589844, 9.239097595214844, 9.969444274902344, 5.32220458984375, -1.8795089721679688, 2.420682907104492, 4.363487243652344, 17.09202003479004, -2.4235382080078125, 13.186502456665039, -4.131172180175781, 9.29010009765625, -8.833053588867188, 0.4026508331298828, 11.592792510986328, -2.6544036865234375, -1.1195297241210938, -2.8833694458007812, 21.464237213134766, 8.994041442871094, 17.519262313842773, -8.831218719482422, 18.77911376953125, 30.54254150390625, -12.354278564453125, -1.0905914306640625, 8.607162475585938, 2.030546188354492, 2.7665767669677734, 12.108848571777344, -1.4231109619140625, 6.678241729736328, -4.349449157714844, 9.226364135742188, 38.145965576171875, 12.672225952148438, -13.553600311279297, -5.710187911987305, 1.2189559936523438, 7.0189208984375, 1.0529499053955078, 5.435935974121094, 18.741920471191406, -15.441505432128906, 9.960723876953125, 8.55539321899414, 5.265827178955078, 4.595069885253906, 19.262794494628906, 4.669624328613281, -11.508163452148438, 15.613151550292969, 13.73855209350586, -10.678947448730469, 13.138259887695312, -10.991844177246094, 7.943939208984375, 1.9457874298095703, -3.9400978088378906, 1.478851318359375, 8.540176391601562, 0.35263633728027344, 31.04132080078125, 7.4703826904296875, 24.288238525390625, -3.15753173828125, 27.06718635559082, 4.797431945800781, 1.1462593078613281, 2.5101776123046875, -8.948675155639648, 3.501922607421875, 0.275604248046875, 31.490936279296875, 6.394477844238281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000259.npy"}
{"epoch": 0.7848484848484848, "step": 260, "batch_size": 128, "mean": 5.3159894943237305, "std": 11.230116844177246, "min": -29.428863525390625, "p10": -7.993267059326172, "median": 4.604427337646484, "p90": 21.816572952270505, "max": 29.012306213378906, "pos_frac": 0.671875, "sample": [-18.814123153686523, 14.392814636230469, -4.1280670166015625, -27.53227996826172, -3.4829654693603516, 10.291519165039062, -2.044403076171875, 24.69390869140625, 7.825233459472656, 4.745084762573242, -8.686700820922852, -6.1876068115234375, 21.681316375732422, 17.9947509765625, -8.084297180175781, 10.746139526367188, 3.8811798095703125, -4.291351318359375, 10.123579025268555, -3.906951904296875, -4.175079345703125, 4.633262634277344, 26.21337127685547, -4.517662048339844, 6.157745361328125, -14.621374130249023, 24.97174072265625, -6.831153869628906, 0.9169197082519531, 29.012306213378906, 6.985443115234375, 4.120697021484375, -1.1359672546386719, 14.962783813476562, -0.930877685546875, 9.384082794189453, 18.611053466796875, 2.5508956909179688, 4.892127990722656, 6.036905288696289, 14.902618408203125, 18.33164405822754, -16.474273681640625, 2.0582923889160156, 13.472297668457031, 6.207244873046875, -5.861154556274414, 19.70555877685547, 0.08885574340820312, 3.4112510681152344, 26.0067138671875, 7.4573974609375, 9.473541259765625, -29.428863525390625, -0.7556686401367188, -9.655258178710938, 21.564834594726562, 23.643863677978516, 3.585987091064453, -10.939125061035156, -0.27930450439453125, -6.349067687988281, 22.784454345703125, -10.023946762084961, 15.915645599365234, -1.6147174835205078, -2.7682151794433594, 1.8904075622558594, 15.092994689941406, 0.709259033203125, 11.64683723449707, 2.973735809326172, 6.67950439453125, -1.5609130859375, 1.0795440673828125, 25.720273971557617, 19.763763427734375, 13.625640869140625, 3.9974117279052734, 14.957733154296875, -0.06314659118652344, 4.7156524658203125, -1.2314071655273438, 1.5169258117675781, 22.743188858032227, 22.132171630859375, -1.1422195434570312, -1.0302886962890625, 7.152788162231445, 15.352996826171875, 0.34026336669921875, 28.422836303710938, 7.254310607910156, 3.605682373046875, 27.019149780273438, 10.288841247558594, -0.39340972900390625, 3.270050048828125, 15.146316528320312, 5.35357666015625, -2.3152923583984375, 8.276832580566406, 9.493484497070312, 4.324737548828125, 4.575592041015625, 6.538154602050781, 24.222854614257812, -12.780776977539062, -5.943523406982422, 20.909683227539062, 8.235015869140625, 3.4312820434570312, -8.664264678955078, 11.133712768554688, 6.170310974121094, -7.954254150390625, 1.2065353393554688, 5.690431594848633, 0.51971435546875, 7.950927734375, 5.740447998046875, 5.007656097412109, -0.39795875549316406, -2.077059745788574, -0.04392242431640625, 9.241844177246094, 18.555044174194336, -8.545669555664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000260.npy"}
{"epoch": 0.7878787878787878, "step": 261, "batch_size": 128, "mean": 6.908292770385742, "std": 12.063395500183105, "min": -33.47303009033203, "p10": -7.007081604003906, "median": 6.730364799499512, "p90": 24.210276985168456, "max": 35.2340087890625, "pos_frac": 0.6953125, "sample": [15.354400634765625, 1.747100830078125, 17.473007202148438, 8.347030639648438, 22.05279541015625, 0.9059219360351562, 13.750816345214844, -1.1062393188476562, 6.178985595703125, 8.860916137695312, -6.8128204345703125, 24.80599021911621, 4.0832672119140625, 19.392139434814453, 7.406219482421875, 4.846721649169922, -9.105911254882812, 4.14280891418457, 4.102935791015625, -2.936859130859375, 4.7513427734375, 4.064399719238281, -1.0242843627929688, 29.975814819335938, -3.9244842529296875, 22.356487274169922, -2.8321990966796875, 10.962043762207031, -20.86785125732422, 27.388412475585938, -12.992950439453125, 8.805032730102539, -4.8832855224609375, 14.098831176757812, 13.103187561035156, -4.070911407470703, 19.066070556640625, 3.5211029052734375, 17.999168395996094, 8.623023986816406, 1.2231006622314453, 13.899208068847656, -6.638496398925781, 19.67974090576172, 7.7411956787109375, 21.934898376464844, 27.022994995117188, 31.824310302734375, 18.33984375, -0.9842262268066406, -7.907548904418945, 3.1439056396484375, -1.2757930755615234, 3.569070816040039, 11.588325500488281, 18.43407440185547, 11.699804306030273, -2.49371337890625, 12.774330139160156, -0.6836624145507812, 9.492507934570312, 12.308795928955078, 10.475341796875, -7.460357666015625, 2.7693405151367188, 8.914913177490234, 7.244185447692871, 5.188499450683594, 11.180778503417969, 19.289039611816406, 6.502281188964844, 25.3740234375, -9.465950012207031, 7.798946380615234, 11.740955352783203, 6.691305160522461, 16.91326904296875, -4.808040618896484, 6.7694244384765625, -33.47303009033203, -4.4513092041015625, 24.843212127685547, 20.801342010498047, 1.2142562866210938, -1.3137054443359375, 11.686256408691406, -3.6092758178710938, 2.0226974487304688, 28.91019058227539, -2.2628841400146484, 5.287300109863281, -16.325592041015625, 9.884628295898438, 25.266815185546875, 2.0030441284179688, 2.348785400390625, 23.954971313476562, -1.3529930114746094, -18.38751983642578, -4.85151481628418, 8.016059875488281, 28.77393341064453, 3.1161117553710938, 13.845962524414062, 28.03887939453125, -1.054311752319336, -1.1664695739746094, -5.0639190673828125, -9.127517700195312, 7.397605895996094, -0.48203277587890625, 27.354724884033203, 7.857421875, 2.7515411376953125, 7.833099365234375, -3.2916717529296875, 12.770833969116211, -1.3870201110839844, 14.756828308105469, -17.39448356628418, -8.083847045898438, 35.2340087890625, 14.73061752319336, 23.49779510498047, 6.685291290283203, -12.692817687988281, 9.115671157836914, 12.614669799804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000261.npy"}
{"epoch": 0.7909090909090909, "step": 262, "batch_size": 128, "mean": 6.211188316345215, "std": 11.293362617492676, "min": -25.326194763183594, "p10": -9.670481872558593, "median": 6.507232666015625, "p90": 20.83637943267822, "max": 33.469764709472656, "pos_frac": 0.7109375, "sample": [-2.05499267578125, 23.942134857177734, 0.570037841796875, 6.0509185791015625, 29.335586547851562, 21.900890350341797, -3.2043685913085938, 20.138145446777344, 11.690208435058594, -1.9166069030761719, 18.997154235839844, -14.477973937988281, 2.9029464721679688, 14.285919189453125, -4.263652801513672, 6.5221099853515625, 16.8955078125, 11.643142700195312, 12.525489807128906, 29.239017486572266, 25.059097290039062, -2.6051177978515625, 14.964675903320312, -11.62921142578125, 11.166015625, -15.954277038574219, 27.173843383789062, 5.722204208374023, 1.3857879638671875, -5.668769836425781, 22.374191284179688, 12.635200500488281, 6.411937713623047, 2.9039306640625, 1.6914634704589844, 12.754264831542969, 30.04357147216797, -10.446762084960938, 20.210472106933594, 16.25933837890625, -10.930068969726562, 11.92901611328125, -4.603569030761719, -4.336921691894531, 10.462730407714844, 4.510890960693359, 4.670890808105469, 17.575279235839844, 6.6022186279296875, -14.186241149902344, 33.469764709472656, 1.627899169921875, 13.822418212890625, -7.1589508056640625, -7.987970352172852, 0.7640838623046875, 8.685216903686523, 7.1617431640625, 5.6192474365234375, -25.326194763183594, -10.247734069824219, 6.595104217529297, -12.201583862304688, 9.542129516601562, 19.225982666015625, 2.43243408203125, 12.735015869140625, 0.8659324645996094, 3.4491729736328125, -1.9647178649902344, -9.423088073730469, 7.732757568359375, -6.157951354980469, -13.650970458984375, 8.144218444824219, 10.11508560180664, 13.425849914550781, -12.87841796875, 18.913558959960938, -0.390899658203125, -0.8642425537109375, 6.026142120361328, 6.795623779296875, 9.343194961547852, 7.574748992919922, 9.8668212890625, 7.10028076171875, -3.8987464904785156, 15.469070434570312, 1.678497314453125, 8.30313491821289, 5.034454345703125, 6.4923553466796875, 21.512802124023438, 1.6890792846679688, 17.412822723388672, 10.621536254882812, 9.087909698486328, 18.616683959960938, -2.5630245208740234, -11.993095397949219, 10.847434997558594, -7.374214172363281, 23.07697296142578, 4.377033233642578, 3.148256301879883, 13.192062377929688, 4.1021270751953125, 16.472637176513672, 1.768402099609375, -2.753082275390625, 10.875602722167969, -0.21685791015625, -2.746368408203125, 3.7810287475585938, 21.742095947265625, 7.763774871826172, 12.452651977539062, 18.200855255126953, -4.680389404296875, 20.546483993530273, 12.20500373840332, -14.326629638671875, -4.593494415283203, 21.560375213623047, -4.700614929199219, 19.991710662841797, 3.2064247131347656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000262.npy"}
{"epoch": 0.793939393939394, "step": 263, "batch_size": 128, "mean": 5.1545538902282715, "std": 11.374492645263672, "min": -28.33592987060547, "p10": -7.576250457763671, "median": 5.117717742919922, "p90": 20.366868591308595, "max": 34.25794982910156, "pos_frac": 0.6640625, "sample": [11.088134765625, -19.493865966796875, -2.6725692749023438, -6.753196716308594, -6.330699920654297, 9.545394897460938, 2.877775192260742, 3.3433761596679688, 11.143253326416016, -28.33592987060547, 31.907318115234375, 1.1939849853515625, 14.035015106201172, -4.441734313964844, 20.441818237304688, 18.923114776611328, 2.6031532287597656, 12.132266998291016, 8.748149871826172, -2.4749908447265625, 6.017677307128906, 0.08312606811523438, 29.798629760742188, 1.758584976196289, 12.710548400878906, 23.47467041015625, 6.410209655761719, 14.104537963867188, 1.8866195678710938, -2.920612335205078, 11.192031860351562, 10.641510009765625, 21.964500427246094, -4.8320465087890625, 5.870290756225586, -2.987396240234375, 9.278055191040039, 3.3348007202148438, 3.9779319763183594, 20.334747314453125, -3.7270355224609375, 14.712030410766602, 4.3948822021484375, -2.7810935974121094, 8.448768615722656, 7.339351654052734, 1.4014739990234375, 15.948837280273438, -14.660545349121094, 6.443935394287109, 6.758039474487305, -16.90081787109375, -1.44793701171875, 28.800384521484375, 5.353527069091797, -4.535469055175781, -9.371307373046875, 7.372776031494141, 4.610958099365234, 6.706733703613281, -8.427391052246094, 27.425806045532227, -1.888936996459961, -8.184600830078125, 25.08453369140625, 3.0318546295166016, -6.654264450073242, 1.4445247650146484, -4.497014999389648, 11.537351608276367, 5.34831428527832, 25.550872802734375, 13.246866226196289, 34.25794982910156, 5.000328063964844, 9.053276062011719, 5.235107421875, 17.332252502441406, 12.88031005859375, 20.094223022460938, 3.8906326293945312, -4.844936370849609, 13.868690490722656, -0.4644432067871094, 1.8557758331298828, -2.5305252075195312, 21.739898681640625, -8.224296569824219, 11.108055114746094, -8.805328369140625, 13.014190673828125, -2.9041519165039062, 32.392852783203125, 5.6130828857421875, 26.796504974365234, 10.217933654785156, -16.02159881591797, -1.74969482421875, 14.224395751953125, 10.051179885864258, -6.421628952026367, 0.723876953125, 8.5313720703125, -1.886505126953125, 14.150306701660156, 12.797740936279297, 15.137344360351562, 4.5462799072265625, -6.097461700439453, 6.254852294921875, -3.020784378051758, -0.5228157043457031, 8.312980651855469, 5.846317291259766, 0.15824127197265625, 2.4730682373046875, -2.5069808959960938, -14.21942138671875, -6.254217147827148, -2.1111221313476562, -8.811910629272461, 14.327953338623047, 11.307380676269531, 6.798652648925781, -2.544231414794922, -7.315528869628906, 9.169174194335938, -19.585304260253906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000263.npy"}
{"epoch": 0.796969696969697, "step": 264, "batch_size": 128, "mean": 4.60404634475708, "std": 11.129776000976562, "min": -24.60516357421875, "p10": -9.006432914733887, "median": 3.9012651443481445, "p90": 18.5840347290039, "max": 32.84801483154297, "pos_frac": 0.6875, "sample": [-0.8281803131103516, 2.129222869873047, 9.12237548828125, 14.460830688476562, 1.994049072265625, 2.7679004669189453, 27.257484436035156, 12.572120666503906, 16.57349395751953, 3.6005115509033203, 3.9375038146972656, -5.020845413208008, -0.997314453125, -17.365745544433594, -13.459152221679688, -5.866127014160156, -5.856964111328125, 3.905862808227539, -2.1190719604492188, 32.84801483154297, 9.086959838867188, 1.60186767578125, -4.060882568359375, 23.858203887939453, -7.527099609375, -2.370687484741211, -5.2065887451171875, 3.89666748046875, -4.755340576171875, 10.162696838378906, -5.237892150878906, -1.4286518096923828, 23.248193740844727, 16.0189208984375, 15.402963638305664, -14.996337890625, 12.413360595703125, 5.216156005859375, 9.210472106933594, -8.95724105834961, 8.772430419921875, 5.307563781738281, 31.686660766601562, 2.2629623413085938, -0.950225830078125, 4.605621337890625, 11.473602294921875, 4.591175079345703, 7.571506500244141, -2.68951416015625, 15.126480102539062, -1.5044021606445312, 2.3553848266601562, 0.27437400817871094, -9.121213912963867, 20.135318756103516, 5.333595275878906, 6.277618408203125, 9.944316864013672, 20.81884765625, -17.886890411376953, -19.4007568359375, 11.706207275390625, 3.383983612060547, 9.751411437988281, -24.60516357421875, 27.95439910888672, 0.6115970611572266, 9.361019134521484, 6.184806823730469, 12.878162384033203, -5.745491027832031, 18.0299072265625, 3.6686573028564453, 2.231679916381836, -4.724906921386719, -2.5043487548828125, 24.068832397460938, 0.7126941680908203, -11.234481811523438, 11.508331298828125, 29.379684448242188, 7.471534729003906, -2.4897823333740234, 31.057022094726562, 2.813018798828125, 13.327133178710938, 3.643524169921875, 13.58375358581543, 1.6846427917480469, 9.402748107910156, -16.269271850585938, -19.28903579711914, 15.57550048828125, 9.26971435546875, 11.525264739990234, 3.6646690368652344, 4.168415069580078, -1.2813968658447266, -9.922805786132812, 6.641136169433594, -15.120109558105469, 1.1366310119628906, -2.132915496826172, 0.2692909240722656, 4.151325225830078, 2.6249771118164062, -3.8631668090820312, 4.365711212158203, 19.876998901367188, 0.43470191955566406, 7.579315185546875, 16.569854736328125, 20.500293731689453, 1.5916481018066406, 9.369407653808594, -14.004093170166016, 9.873321533203125, 4.722831726074219, 2.2853012084960938, -4.235847473144531, -0.560150146484375, 14.255180358886719, -2.361186981201172, 8.032812118530273, 14.321929931640625, 16.959144592285156, 5.26580810546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000264.npy"}
{"epoch": 0.8, "step": 265, "batch_size": 128, "mean": 9.215797424316406, "std": 10.881912231445312, "min": -17.903465270996094, "p10": -3.2580652236938468, "median": 7.50189208984375, "p90": 23.393073463439936, "max": 37.385467529296875, "pos_frac": 0.78125, "sample": [16.004119873046875, 1.255615234375, 10.552780151367188, 1.5457305908203125, 19.091114044189453, 7.210350036621094, 37.385467529296875, 26.58248519897461, -4.728538513183594, 26.43800163269043, -1.6715927124023438, 22.65003204345703, 21.782642364501953, 2.562591552734375, 8.49847412109375, 12.338020324707031, 19.171710968017578, 26.884498596191406, 17.553035736083984, 21.07390594482422, 22.425046920776367, 17.048818588256836, 0.8271656036376953, 19.342010498046875, 12.270545959472656, 12.71368408203125, 3.8505706787109375, 5.0297393798828125, 7.873813629150391, 22.855125427246094, -3.938129425048828, 32.8226318359375, 6.9938201904296875, 4.293979644775391, -1.4776153564453125, -2.8219070434570312, 8.78021240234375, 3.0202407836914062, 16.85699462890625, 5.189661026000977, 20.84557342529297, 28.10561752319336, -1.226715087890625, 7.345367431640625, 8.185266494750977, 4.989953994750977, 15.88385009765625, 19.199321746826172, 7.9850311279296875, 12.2626953125, 31.917888641357422, 0.02878570556640625, 14.98455810546875, 22.912439346313477, 2.3547592163085938, -2.298065185546875, 20.67443084716797, 13.438793182373047, -2.1458816528320312, 12.688140869140625, -17.903465270996094, 3.8132476806640625, -2.5590858459472656, 28.571380615234375, 27.943878173828125, 7.256050109863281, 4.844745635986328, 0.8059959411621094, 13.215629577636719, 2.2971572875976562, 28.860610961914062, -3.06048583984375, 5.918117523193359, 12.502159118652344, 24.51455307006836, 5.544929504394531, -0.5700740814208984, -6.048160552978516, 0.572021484375, 14.5430908203125, 17.38665008544922, 21.76202392578125, 4.314292907714844, -1.7629337310791016, 20.349925994873047, 16.555273056030273, -7.797454833984375, 6.8136138916015625, -9.290666580200195, -5.427886962890625, -3.8942737579345703, -0.7762794494628906, 15.571739196777344, -11.745513916015625, 18.26978302001953, 3.618499755859375, 21.264434814453125, 10.804340362548828, 4.469596862792969, 29.422210693359375, -3.719083786010742, 9.962808609008789, 14.56422233581543, 11.785356521606445, 10.26385498046875, -0.6671543121337891, 8.577774047851562, 6.71397590637207, 7.027116775512695, 26.486515045166016, 3.358936309814453, 0.5315322875976562, -1.3490371704101562, 7.658416748046875, -2.5470199584960938, 10.666305541992188, 0.7133064270019531, -5.50225830078125, 21.295984268188477, -10.778755187988281, 9.977867126464844, -5.075691223144531, 3.914989471435547, -0.9478988647460938, 22.319252014160156, 3.396453857421875, 1.7200889587402344, 4.035858154296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000265.npy"}
{"epoch": 0.803030303030303, "step": 266, "batch_size": 128, "mean": 6.936187744140625, "std": 11.382173538208008, "min": -21.841827392578125, "p10": -6.301082992553711, "median": 5.427425384521484, "p90": 22.452100944519042, "max": 31.348388671875, "pos_frac": 0.7265625, "sample": [-7.48272705078125, 6.884662628173828, 7.906150817871094, 8.66446304321289, 5.381294250488281, 9.459274291992188, -3.524036407470703, 15.885528564453125, -2.8183670043945312, 0.1575775146484375, -0.234161376953125, 30.5711669921875, 6.0316619873046875, 24.41766357421875, -6.31231689453125, -2.8329620361328125, -4.179540634155273, -0.025890350341796875, -21.841827392578125, -1.8946170806884766, 12.117782592773438, 9.208297729492188, 17.37500762939453, -18.883750915527344, 5.8494415283203125, 1.680633544921875, 2.1046600341796875, 6.623584747314453, -1.0053558349609375, 15.409072875976562, 5.4735565185546875, 1.1411552429199219, 15.233062744140625, 2.369220733642578, 3.4607620239257812, 1.1748809814453125, 21.56878662109375, -2.8556747436523438, 21.549041748046875, 17.35570526123047, 0.9455604553222656, 7.95501708984375, 1.40625, 9.614761352539062, 6.302085876464844, 1.7222061157226562, 6.283069610595703, -3.557119369506836, -5.1249542236328125, 2.0215072631835938, 22.057201385498047, 22.12646484375, -7.8451995849609375, -3.8626251220703125, 24.459671020507812, 25.480201721191406, 6.16229248046875, -2.5363426208496094, 17.24839210510254, 31.348388671875, 4.425039291381836, 3.8580474853515625, 25.380584716796875, -1.3427925109863281, 18.05309295654297, 18.302268981933594, 20.976715087890625, 27.04400634765625, 5.1358184814453125, 0.891021728515625, 5.3519287109375, 27.582271575927734, 7.2325439453125, 16.800487518310547, -0.08691024780273438, -13.56298828125, -0.8940868377685547, 1.292572021484375, 17.271249771118164, 12.401323318481445, -8.100860595703125, 28.486328125, 2.3405609130859375, 17.649063110351562, 16.004457473754883, -12.071319580078125, 3.607757568359375, 2.5849609375, 9.522689819335938, 13.398963928222656, 12.13515853881836, 12.205162048339844, -1.3350639343261719, 27.33971405029297, -16.62161636352539, 10.862663269042969, 3.9972877502441406, 7.5336456298828125, -6.296268463134766, 19.811599731445312, 22.000564575195312, 8.066543579101562, 22.270404815673828, 1.8862380981445312, 4.0951385498046875, 14.805656433105469, 5.233940124511719, 23.91100311279297, 16.10715675354004, 20.284408569335938, 8.078826904296875, -1.7202033996582031, -9.381629943847656, -17.278640747070312, 3.306285858154297, -2.5506324768066406, 21.459556579589844, 0.064727783203125, 24.61907196044922, -4.603748321533203, -11.598684310913086, 2.254241943359375, 22.87605857849121, -12.079093933105469, 3.362194061279297, -3.427410125732422, 12.693283081054688, 10.559967041015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000266.npy"}
{"epoch": 0.806060606060606, "step": 267, "batch_size": 128, "mean": 7.157028675079346, "std": 11.423766136169434, "min": -18.616209030151367, "p10": -8.186695098876953, "median": 6.521247863769531, "p90": 22.86457176208496, "max": 39.075103759765625, "pos_frac": 0.734375, "sample": [10.443954467773438, 20.368703842163086, -1.3977508544921875, 15.755401611328125, 6.993549346923828, 8.459747314453125, -0.5901336669921875, -8.628725051879883, 3.2973899841308594, 17.116348266601562, 4.5948944091796875, -8.4376220703125, 1.322164535522461, 3.5637435913085938, 13.769096374511719, 4.518390655517578, 24.051607131958008, 21.568740844726562, -4.847667694091797, 21.53232192993164, -13.697647094726562, 29.494461059570312, 3.114023208618164, 3.594623565673828, 1.0797576904296875, -2.198190689086914, 0.9321708679199219, 25.242408752441406, 8.761272430419922, -0.09259033203125, 0.9952621459960938, -2.6658554077148438, 18.82776641845703, 11.424821853637695, -9.720458984375, -7.56341552734375, 4.870964050292969, -1.1146202087402344, 15.873903274536133, 22.935577392578125, -12.930374145507812, 5.45263671875, 12.896289825439453, -4.784210205078125, 1.9892158508300781, -11.029067993164062, -6.2417755126953125, 5.4461669921875, 3.39605712890625, 11.369796752929688, 11.651786804199219, 14.699043273925781, 25.315895080566406, 25.3951473236084, 2.571380615234375, -3.702068328857422, 4.210849761962891, 14.656570434570312, 7.205963134765625, 28.061756134033203, 28.972869873046875, 19.207977294921875, -18.616209030151367, 11.797908782958984, 13.597953796386719, -4.599449157714844, 11.535919189453125, 14.037294387817383, 0.2799091339111328, 24.849380493164062, -2.53546142578125, -12.206642150878906, 9.833297729492188, 8.287473678588867, 16.895057678222656, -7.4206695556640625, 22.415542602539062, 24.73016357421875, 11.961051940917969, 9.172416687011719, 11.415611267089844, 27.82247543334961, 21.819766998291016, 11.329803466796875, -10.770679473876953, 4.164787292480469, 39.075103759765625, 20.87239646911621, 7.3995819091796875, 3.7146072387695312, 6.646144866943359, 19.466434478759766, 6.396350860595703, -3.3908557891845703, 1.8632965087890625, 13.422348022460938, 16.78784942626953, -9.748199462890625, -2.587818145751953, 6.7563934326171875, 5.61590576171875, 14.056198120117188, -6.829078674316406, 29.2239990234375, -4.364692687988281, 5.0052490234375, -12.746726989746094, 6.332944869995117, 17.666336059570312, 7.125783920288086, 9.170501708984375, -5.171424865722656, 9.509063720703125, 4.356292724609375, -6.7904052734375, 0.5193328857421875, 17.391498565673828, 9.912960052490234, 0.5259513854980469, 8.638526916503906, 15.43941879272461, -11.522872924804688, 5.6644439697265625, -11.12115478515625, 14.123001098632812, 5.782997131347656, 22.83414077758789, -8.079154968261719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000267.npy"}
{"epoch": 0.8090909090909091, "step": 268, "batch_size": 128, "mean": 7.952608585357666, "std": 11.250640869140625, "min": -26.753501892089844, "p10": -3.535246086120605, "median": 5.974323272705078, "p90": 23.870670509338378, "max": 36.74376678466797, "pos_frac": 0.71875, "sample": [0.36992835998535156, -4.816608428955078, 31.22264862060547, 12.281429290771484, 6.632476806640625, 15.575098037719727, -15.671821594238281, -3.5110721588134766, -3.6730270385742188, 18.513954162597656, 0.2694072723388672, -4.8975067138671875, 11.45074462890625, 12.285018920898438, 12.033422470092773, -3.38726806640625, 13.536893844604492, 0.0883331298828125, 8.035003662109375, 10.66714859008789, 1.6781692504882812, 21.2841796875, -6.992332458496094, 7.743381500244141, -0.9408016204833984, 24.275161743164062, 19.659942626953125, -3.2396278381347656, 1.0353374481201172, 28.629791259765625, 10.548225402832031, 4.514915466308594, -7.749950408935547, 18.331153869628906, 2.4149169921875, -3.1708908081054688, 4.984098434448242, 14.019622802734375, 0.014678955078125, 3.2767715454101562, 17.80675506591797, -3.5916519165039062, 21.39562225341797, -13.380386352539062, -6.6169281005859375, 3.7899322509765625, 18.7635498046875, 36.74376678466797, 5.3752288818359375, -0.8207340240478516, 30.565460205078125, 5.7672119140625, -0.5462532043457031, -26.753501892089844, -9.405838012695312, 15.521781921386719, 2.977153778076172, 9.208480834960938, 23.59648895263672, -1.9250030517578125, -3.07269287109375, 5.415672302246094, 25.952415466308594, 13.202590942382812, 8.00344467163086, -2.0548553466796875, -2.06561279296875, 25.272802352905273, 21.425460815429688, 10.977546691894531, -4.16778564453125, -5.1029052734375, 11.44464111328125, 24.422819137573242, 35.62837219238281, 8.548681259155273, 6.3126373291015625, -3.1027088165283203, 14.211372375488281, 17.771141052246094, -0.9409637451171875, 5.965953826904297, 2.618988037109375, -1.4332733154296875, 24.557781219482422, -1.7509841918945312, 14.69561767578125, 1.845001220703125, 2.780132293701172, 28.783843994140625, -1.9799175262451172, 13.28497314453125, 3.0577621459960938, 19.641616821289062, 15.339752197265625, 2.4342479705810547, 4.711906433105469, 2.1194610595703125, 4.511322021484375, 13.892066955566406, 11.119132995605469, 25.648155212402344, 8.113550186157227, 10.207218170166016, 1.1071243286132812, 30.916606903076172, 20.72173309326172, -0.4387168884277344, 8.080699920654297, 15.922599792480469, 19.17386245727539, -2.797849655151367, 16.804702758789062, 17.970232009887695, 13.318811416625977, -1.6671504974365234, 3.4479713439941406, 1.7797012329101562, 10.92312240600586, 5.982692718505859, -0.8401317596435547, 23.291717529296875, -1.2601852416992188, -0.9502105712890625, 11.097923278808594, 23.697317123413086, -3.1734695434570312, 0.7863540649414062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000268.npy"}
{"epoch": 0.8121212121212121, "step": 269, "batch_size": 128, "mean": 6.81407356262207, "std": 11.125000953674316, "min": -16.29345703125, "p10": -7.204471397399901, "median": 6.051759719848633, "p90": 21.943321800231935, "max": 35.88079833984375, "pos_frac": 0.7265625, "sample": [5.744054794311523, 8.671981811523438, 2.1399917602539062, 18.630218505859375, -1.29693603515625, 21.413541793823242, 15.158275604248047, 0.005069732666015625, 24.779518127441406, 6.023845672607422, 4.216220855712891, -4.712047576904297, -10.08560562133789, 1.040985107421875, 2.649658203125, -0.5682106018066406, 11.395462036132812, -3.5030899047851562, -14.654098510742188, 2.474700927734375, 22.548049926757812, 9.100383758544922, 3.2054443359375, -0.2157440185546875, 3.181915283203125, 17.63007354736328, -2.209392547607422, 14.266433715820312, -14.056510925292969, 14.35638427734375, 26.112640380859375, 24.760848999023438, 7.111572265625, 7.353450775146484, -0.39907073974609375, 2.868986129760742, 24.601375579833984, 18.39654541015625, 21.520706176757812, 18.21417236328125, -4.898185729980469, 30.638214111328125, 5.9761962890625, 8.212066650390625, -16.29345703125, 2.2566757202148438, 13.160354614257812, 6.1060638427734375, 9.557235717773438, 18.331634521484375, 7.04534912109375, 10.754226684570312, -6.924570083618164, -2.4415359497070312, -1.3133697509765625, -8.832015991210938, -10.66037368774414, 21.817611694335938, -11.287904739379883, 0.605712890625, 20.28907012939453, 24.624710083007812, -10.283626556396484, -4.825290679931641, -12.9100341796875, 8.116521835327148, -0.38944244384765625, 2.6427536010742188, 16.977100372314453, -15.903396606445312, 11.209735870361328, -5.4573822021484375, 24.215778350830078, 21.908782958984375, 11.709823608398438, 7.170330047607422, 9.710823059082031, 3.3086090087890625, 17.496749877929688, 16.948089599609375, -7.857574462890625, -3.9515380859375, 3.34576416015625, 2.8241958618164062, 2.8521728515625, 3.270610809326172, -3.144256591796875, 3.8280029296875, 15.404548645019531, 35.88079833984375, 2.377420425415039, 11.859710693359375, 6.3701171875, -12.088272094726562, 14.340377807617188, 1.1578865051269531, 0.548858642578125, 16.106569290161133, 9.82044792175293, 22.02391242980957, 3.682098388671875, 9.580810546875, 10.074020385742188, 1.4489364624023438, 6.079673767089844, -5.284156799316406, 18.09521484375, -3.3277359008789062, 26.5208740234375, 2.448455810546875, -1.7519149780273438, 17.29684829711914, 19.061111450195312, -0.898406982421875, 23.045211791992188, 9.165908813476562, -12.530902862548828, 14.2342529296875, -5.285331726074219, 16.86752700805664, 28.478290557861328, 2.3897972106933594, 11.179328918457031, -4.7843017578125, 7.066093444824219, 18.839540481567383, 8.489448547363281, 0.8095550537109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000269.npy"}
{"epoch": 0.8151515151515152, "step": 270, "batch_size": 128, "mean": 6.823485374450684, "std": 11.064513206481934, "min": -18.90020751953125, "p10": -5.098513793945312, "median": 4.663002014160156, "p90": 22.314521026611324, "max": 35.74147033691406, "pos_frac": 0.7109375, "sample": [1.30859375, 7.936607360839844, -1.2615795135498047, 29.73072052001953, 4.092657089233398, -1.5547409057617188, 15.098880767822266, 14.01873779296875, -0.019908905029296875, 0.9888343811035156, 25.972373962402344, 12.796756744384766, 0.9160919189453125, -4.161548614501953, 8.73046875, -14.264236450195312, -1.2007713317871094, -2.0106773376464844, 14.71014404296875, 3.67694091796875, 8.37875747680664, 1.4481964111328125, 18.878379821777344, -2.403200149536133, 4.6118621826171875, 0.6094131469726562, 16.330108642578125, 13.752983093261719, 2.74224853515625, -0.452484130859375, 5.507724761962891, 9.901039123535156, 21.780899047851562, 2.302509307861328, 18.44347381591797, 0.4952545166015625, -3.5186004638671875, 4.714141845703125, -1.61883544921875, 1.4002933502197266, -0.5355453491210938, -5.574493408203125, -3.280588150024414, 3.6311111450195312, 35.74147033691406, 15.652618408203125, -2.087921142578125, -2.0244369506835938, -2.1064224243164062, -4.7841339111328125, 12.308696746826172, 1.5418205261230469, 5.535850524902344, 5.432456970214844, -5.3082275390625, 17.478225708007812, -0.667510986328125, 2.2111682891845703, -5.369087219238281, 16.39716148376465, 2.2498321533203125, 25.261337280273438, 20.197341918945312, 0.2445526123046875, -9.400009155273438, 0.5667724609375, 21.791473388671875, 19.6937313079834, 2.9282665252685547, -16.870864868164062, 8.251068115234375, 9.809326171875, 24.519149780273438, 8.57220458984375, -0.09863662719726562, 17.88153076171875, 19.028884887695312, -7.2300872802734375, 17.71112060546875, 11.571189880371094, 20.60552978515625, 4.861175537109375, 31.020538330078125, -0.3594970703125, -5.008636474609375, 17.167530059814453, -7.5855255126953125, 1.5020465850830078, -2.600982666015625, -11.156139373779297, 5.073936462402344, 12.71527099609375, -3.8421401977539062, 0.8064842224121094, 0.2018909454345703, 8.437713623046875, 6.170341491699219, 5.713592529296875, 3.4100608825683594, 17.897781372070312, 18.944547653198242, 12.135456085205078, -7.6337432861328125, 4.9807891845703125, 27.483596801757812, 18.567245483398438, -8.153226852416992, 7.2282562255859375, -18.90020751953125, 3.8425064086914062, -16.241058349609375, 26.32660675048828, 1.2873115539550781, 23.53496551513672, 9.871746063232422, 30.037460327148438, 24.161842346191406, -4.299629211425781, 24.348121643066406, 5.872810363769531, -1.1990280151367188, 28.199722290039062, 1.1995620727539062, 12.288597106933594, 15.3421630859375, 0.8233375549316406, 8.848270416259766, 19.798200607299805], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000270.npy"}
{"epoch": 0.8181818181818182, "step": 271, "batch_size": 128, "mean": 8.117181777954102, "std": 10.965630531311035, "min": -20.297056198120117, "p10": -5.878273010253906, "median": 7.165012359619141, "p90": 22.459716606140137, "max": 29.83536148071289, "pos_frac": 0.75, "sample": [15.751262664794922, 20.198440551757812, 14.27789306640625, 22.502965927124023, 1.7870025634765625, 21.158843994140625, -8.134437561035156, 18.190988540649414, 11.577354431152344, 20.517189025878906, 4.141532897949219, 18.265172958374023, -3.9685745239257812, 2.4015960693359375, 24.35997772216797, 16.55731201171875, 19.11815643310547, 27.768310546875, -4.674613952636719, 11.134176254272461, 6.719526290893555, -8.596366882324219, 10.957435607910156, 4.7770233154296875, -5.908447265625, 4.920280456542969, -1.2874088287353516, 10.144414901733398, 6.6995391845703125, 2.3779220581054688, -0.8910999298095703, 5.4109954833984375, 15.12388801574707, 0.3560028076171875, 0.836578369140625, -3.8511962890625, 7.096710205078125, 8.688863754272461, 0.4428367614746094, -20.297056198120117, 16.606407165527344, 14.257244110107422, 20.677978515625, -0.3316917419433594, -10.551359176635742, -0.24091339111328125, 16.422462463378906, 11.562740325927734, -0.5958786010742188, -10.557044982910156, 10.506996154785156, 21.529640197753906, 9.40738296508789, -1.2593002319335938, 16.296417236328125, 10.285820007324219, 12.34775161743164, 6.273983001708984, -2.2641849517822266, 12.556716918945312, 8.514049530029297, -10.038753509521484, 23.74022102355957, 7.233314514160156, 16.268943786621094, 14.807571411132812, 5.956321716308594, 20.747833251953125, 9.551362991333008, 4.374153137207031, 3.954681396484375, 6.065700531005859, -5.8653411865234375, 14.1744384765625, -9.169708251953125, 23.510272979736328, 27.444595336914062, 28.874465942382812, 9.130651473999023, -1.5315780639648438, 1.5547466278076172, 27.14044189453125, -5.584861755371094, -3.867706298828125, 1.2645111083984375, 6.3406982421875, -3.9527587890625, 21.803451538085938, -1.3217506408691406, 27.135276794433594, 20.056602478027344, 27.946456909179688, -6.625877380371094, 1.1597061157226562, 0.5970973968505859, 5.651348114013672, 5.4766693115234375, 6.7594146728515625, 13.798759460449219, -3.3430557250976562, -4.055046081542969, 12.773551940917969, 8.57040023803711, 19.565547943115234, -12.447420120239258, -17.54998016357422, 1.3503150939941406, -0.5774002075195312, 10.851699829101562, -6.6859893798828125, 22.441181182861328, 27.886581420898438, 9.557289123535156, 1.3795089721679688, 27.24669647216797, -8.610237121582031, 13.947980880737305, 6.297187805175781, 20.476119995117188, 4.462444305419922, 5.768718719482422, 29.83536148071289, 7.840446472167969, 6.895351409912109, 9.669933319091797, 21.875778198242188, 9.522796630859375, 21.395822525024414], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000271.npy"}
{"epoch": 0.8212121212121212, "step": 272, "batch_size": 128, "mean": 7.7396345138549805, "std": 11.811171531677246, "min": -28.394874572753906, "p10": -6.242243194580075, "median": 6.694849967956543, "p90": 24.36797637939453, "max": 33.80780029296875, "pos_frac": 0.75, "sample": [-2.861358642578125, 5.510679244995117, 11.492790222167969, -0.08331871032714844, 8.557830810546875, -0.436279296875, 14.115310668945312, 25.43012237548828, 24.216598510742188, -9.48337173461914, 7.621883392333984, 16.21361541748047, -3.1579151153564453, -2.0203399658203125, 4.025585174560547, 13.222427368164062, 33.80780029296875, 2.6162567138671875, 16.411087036132812, 0.2563934326171875, 18.11579132080078, 0.4618492126464844, -11.790237426757812, -0.7534446716308594, 6.586688995361328, 4.82586669921875, -2.3369216918945312, 4.794885635375977, 3.9535140991210938, 3.0627059936523438, 8.255859375, 2.413818359375, -15.845603942871094, 9.91461181640625, 4.5124664306640625, 17.342500686645508, 11.150115966796875, 11.401611328125, 26.760421752929688, 6.3985595703125, 3.6799240112304688, 20.55910873413086, -28.394874572753906, 0.3744182586669922, -4.6369781494140625, -5.435615539550781, 2.802398681640625, 17.872604370117188, 11.059829711914062, 2.4216365814208984, 15.40692138671875, 6.3392333984375, 13.116592407226562, 11.837615966796875, 30.80078125, 16.145843505859375, 8.596084594726562, 6.803010940551758, 1.1249847412109375, -5.143774032592773, 20.393142700195312, 26.335281372070312, -2.191883087158203, 2.094083786010742, 7.8349761962890625, -16.184391021728516, 5.21856689453125, 4.520477294921875, 15.114707946777344, 10.206581115722656, -16.422033309936523, 7.946565628051758, -9.383285522460938, 3.988727569580078, 12.970726013183594, 6.46075439453125, 8.317718505859375, 10.528114318847656, 15.660263061523438, 21.172607421875, 15.43716049194336, 0.05491447448730469, 25.75603485107422, 11.251251220703125, 24.9510498046875, -13.159370422363281, 7.105175018310547, 17.789230346679688, 33.28510284423828, -8.124374389648438, 1.9527397155761719, 28.461959838867188, 23.768905639648438, 28.299530029296875, -8.294351577758789, -1.7611312866210938, 11.531341552734375, 5.395660400390625, -3.656707763671875, 1.8569526672363281, 0.2038116455078125, 17.65886688232422, 14.5230712890625, 3.336355209350586, 17.664260864257812, -4.244621276855469, 14.999317169189453, -9.941780090332031, -1.231924057006836, -12.310821533203125, 31.656829833984375, -10.245643615722656, 5.3074188232421875, 11.567276000976562, 31.016456604003906, -0.7923774719238281, 2.5269622802734375, -0.6012725830078125, 12.5242919921875, 10.119813919067383, 24.72119140625, -5.2035064697265625, 24.02114486694336, 22.40430450439453, -1.8676261901855469, 21.65582275390625, 18.67687225341797, 16.035308837890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000272.npy"}
{"epoch": 0.8242424242424242, "step": 273, "batch_size": 128, "mean": 6.944282531738281, "std": 11.066843032836914, "min": -21.499176025390625, "p10": -7.561933135986328, "median": 6.224147796630859, "p90": 20.774617767333986, "max": 40.55793762207031, "pos_frac": 0.7578125, "sample": [16.706180572509766, 3.8960189819335938, -4.412528991699219, 29.527938842773438, 6.190090179443359, 1.0711898803710938, -12.384788513183594, -9.979240417480469, 9.742996215820312, 11.926826477050781, -2.0269012451171875, 17.973281860351562, -1.1178207397460938, 28.569854736328125, 1.7135353088378906, 17.570098876953125, -4.23504638671875, 6.753814697265625, -4.9113006591796875, -4.503473281860352, 24.03087615966797, 28.367111206054688, 9.75830078125, -2.0023117065429688, 5.725311279296875, -18.35243034362793, 4.370330810546875, 12.448532104492188, 6.2654266357421875, 40.55793762207031, 12.995498657226562, -16.295616149902344, -7.554771423339844, 5.336673736572266, 9.98101806640625, -4.5179595947265625, 20.244617462158203, -4.003910064697266, 1.2369976043701172, 15.411067962646484, 18.016021728515625, 15.2335205078125, 3.356781005859375, 0.18982887268066406, 4.48944091796875, 3.254762649536133, -2.517925262451172, -12.083232879638672, -1.9681587219238281, 24.355545043945312, 12.006492614746094, 9.460060119628906, 4.0335693359375, 7.501132965087891, -9.576440811157227, 12.643714904785156, 0.8634223937988281, 13.068550109863281, -9.467964172363281, 0.5714092254638672, 13.315387725830078, 19.868206024169922, 19.25397491455078, 4.078636169433594, 24.511005401611328, 4.078765869140625, 20.819740295410156, 5.1029815673828125, 8.98080825805664, 16.80650520324707, 14.026016235351562, -1.5367431640625, 0.8176288604736328, 3.393634796142578, -10.319225311279297, 2.5081748962402344, -7.578643798828125, 7.675374984741211, 11.402812957763672, 8.679946899414062, 17.286405563354492, 4.1978607177734375, 9.625919342041016, -0.4113006591796875, 5.122859954833984, 20.755279541015625, 6.658605575561523, 0.8856658935546875, 14.436935424804688, 15.292171478271484, 21.60993194580078, 14.001823425292969, 24.404678344726562, -0.76483154296875, 0.3906364440917969, 3.486513137817383, 4.3707427978515625, -9.109691619873047, -21.499176025390625, 7.871761322021484, 4.750146865844727, 6.580940246582031, -2.2533836364746094, 14.479843139648438, 3.738525390625, 13.407161712646484, 15.226924896240234, -3.476348876953125, 20.497901916503906, 25.085105895996094, 10.406923294067383, 2.01861572265625, 1.7888679504394531, 6.258205413818359, -12.902315139770508, 19.3486328125, 3.6068267822265625, 7.536376953125, -2.318492889404297, 9.010381698608398, 18.618375778198242, 10.229236602783203, 31.116676330566406, 6.272981643676758, 20.832603454589844, -14.696516036987305, 18.262163162231445, 5.440093994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000273.npy"}
{"epoch": 0.8272727272727273, "step": 274, "batch_size": 128, "mean": 7.6697797775268555, "std": 10.647385597229004, "min": -16.62456512451172, "p10": -5.57898826599121, "median": 6.755856513977051, "p90": 21.786319732666016, "max": 38.458038330078125, "pos_frac": 0.7890625, "sample": [5.149896621704102, 0.7247085571289062, 13.775558471679688, 3.1336517333984375, 29.9769287109375, 6.904619216918945, -4.101799011230469, 38.458038330078125, 14.47119140625, 27.054458618164062, 4.365285873413086, 8.795761108398438, 19.057472229003906, 6.4395751953125, 23.516342163085938, 8.22064208984375, -16.62456512451172, 11.389045715332031, -7.6202850341796875, 9.191509246826172, 16.02094268798828, 3.2366905212402344, 16.80352783203125, 11.618585586547852, 2.06475830078125, 20.581859588623047, 24.21505355834961, 7.531276702880859, 16.341018676757812, 0.3885955810546875, 9.914962768554688, -6.873687744140625, 15.927703857421875, 15.568435668945312, 9.290359497070312, 6.005035400390625, 3.1062488555908203, 9.293790817260742, 6.607093811035156, -0.177764892578125, 16.123306274414062, 9.259185791015625, 7.917938232421875, 16.95404815673828, 11.875732421875, 3.544330596923828, 2.9656143188476562, -0.6118850708007812, 13.124862670898438, 31.185096740722656, 19.33470916748047, 9.114601135253906, -14.255630493164062, 11.249752044677734, 6.386749267578125, -10.795295715332031, 2.4824752807617188, 9.369258880615234, 7.149513244628906, 6.458946228027344, 9.390729904174805, 5.1180572509765625, 16.622482299804688, -2.2348155975341797, 9.719863891601562, 11.703239440917969, 1.2939376831054688, -6.4589996337890625, -6.181781768798828, 17.474151611328125, -4.349937438964844, 35.677391052246094, -9.309867858886719, 0.9759178161621094, 2.121318817138672, 11.921646118164062, 14.447189331054688, 10.052854537963867, -2.77197265625, 8.212623596191406, 1.3516159057617188, 15.058998107910156, 3.049468994140625, -5.320648193359375, 16.10820770263672, 4.3329315185546875, -8.693038940429688, 2.5087890625, -6.944370269775391, 5.40679931640625, 3.1233367919921875, -12.701324462890625, 5.762794494628906, 8.066558837890625, 3.945648193359375, 1.8395004272460938, 23.000694274902344, 12.433097839355469, -6.885589599609375, 21.308090209960938, 0.9372444152832031, -1.177398681640625, -0.491302490234375, 7.806890487670898, 9.293167114257812, 5.135997772216797, 26.663482666015625, 22.230335235595703, 7.806327819824219, 18.80963897705078, 1.8902778625488281, 3.2570877075195312, 6.5230712890625, 1.4547538757324219, 10.639320373535156, -4.2808380126953125, 35.0418701171875, 2.7281265258789062, -8.5218505859375, 22.156982421875, 17.621784210205078, -4.3139190673828125, 1.0943222045898438, -4.82952880859375, 30.998382568359375, 21.627464294433594, -5.054462432861328, -2.0668697357177734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000274.npy"}
{"epoch": 0.8303030303030303, "step": 275, "batch_size": 128, "mean": 5.793660640716553, "std": 11.393223762512207, "min": -23.949878692626953, "p10": -7.300573348999023, "median": 4.787141799926758, "p90": 23.443871307373048, "max": 33.94365692138672, "pos_frac": 0.6640625, "sample": [-3.602081298828125, 16.529808044433594, 2.3343429565429688, 11.806068420410156, -4.350063323974609, -0.803375244140625, 14.727226257324219, 3.656435012817383, -10.242660522460938, 7.014745712280273, 17.735923767089844, 12.8857421875, 21.314701080322266, 11.487464904785156, 16.9132080078125, -12.481395721435547, 13.71685791015625, -3.561372756958008, 4.148593902587891, 23.393341064453125, -2.7908935546875, -5.21807861328125, -7.272151947021484, 15.957588195800781, 19.516357421875, 5.4496002197265625, -8.394363403320312, 14.848026275634766, 14.23944091796875, 9.08245849609375, 8.424942016601562, 5.470191955566406, 2.9342041015625, 27.950599670410156, 22.237457275390625, 2.29144287109375, 4.216953277587891, 24.11404037475586, -0.8311004638671875, -0.3822784423828125, 2.8568954467773438, -1.9052963256835938, 2.807666778564453, 24.72447967529297, 1.8100357055664062, -11.42230224609375, 23.79047203063965, 19.78118896484375, -11.483169555664062, 5.5640869140625, -2.497648239135742, 3.950958251953125, 8.20120620727539, 9.615543365478516, -3.244342803955078, 12.01129150390625, 3.6359405517578125, 4.742198944091797, -0.3559379577636719, 33.94365692138672, 23.56177520751953, -23.949878692626953, -16.019916534423828, -4.352081298828125, 6.565708160400391, 29.683380126953125, 23.636451721191406, 10.505973815917969, -15.736248016357422, 3.1448822021484375, -3.2326183319091797, -7.366889953613281, 24.808914184570312, 6.918190002441406, 9.085258483886719, 5.61567497253418, 10.083976745605469, -4.452358245849609, 14.598155975341797, 7.8516082763671875, -0.574462890625, 7.2808074951171875, 1.5398292541503906, 12.682910919189453, -0.8559112548828125, 1.8227310180664062, 0.4893798828125, 12.220821380615234, 2.2210006713867188, 15.553787231445312, -4.786552429199219, 10.535140991210938, 24.438201904296875, 4.832084655761719, 21.50594711303711, -2.387327194213867, -1.0930252075195312, 5.281166076660156, 16.920543670654297, 8.2333984375, 9.772785186767578, 3.690540313720703, 2.6920700073242188, 26.75519561767578, 24.269981384277344, -5.829368591308594, 13.687728881835938, -1.9904975891113281, -4.5386810302734375, -10.290603637695312, -14.204410552978516, 4.542022705078125, -5.240745544433594, 19.896591186523438, 5.310356140136719, 8.581106185913086, 1.2686843872070312, 7.431987762451172, 8.174057006835938, 11.72952651977539, -19.14665985107422, -0.8849220275878906, 29.423049926757812, -16.77876091003418, -3.761077880859375, -3.2581405639648438, -1.8170547485351562, -5.701499938964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000275.npy"}
{"epoch": 0.8333333333333334, "step": 276, "batch_size": 128, "mean": 6.278921127319336, "std": 10.13658332824707, "min": -20.9122314453125, "p10": -2.8847354888916015, "median": 5.413814544677734, "p90": 17.843084144592282, "max": 46.178009033203125, "pos_frac": 0.7734375, "sample": [5.936408996582031, 10.286697387695312, 11.887327194213867, 7.5460357666015625, 9.217002868652344, 11.937356948852539, 46.178009033203125, -0.5119857788085938, 11.926437377929688, 12.75103759765625, -20.9122314453125, -1.6041202545166016, -1.6157665252685547, 2.2840805053710938, 13.172782897949219, 0.7363128662109375, -0.8707752227783203, 1.553619384765625, -5.639213562011719, 22.212615966796875, -8.506088256835938, 16.480506896972656, 2.8937110900878906, 6.8075714111328125, 1.8105545043945312, -1.8037109375, 2.13323974609375, 2.6507797241210938, 2.83856201171875, -2.283966064453125, 1.4184036254882812, 5.9247894287109375, 1.2518901824951172, 20.02857208251953, -0.2758769989013672, 2.744152069091797, 25.179784774780273, 3.6235809326171875, 13.357635498046875, 4.954681396484375, 2.4455108642578125, 27.437572479248047, 2.5745391845703125, -0.945648193359375, 9.862586975097656, 6.100988388061523, 2.1670074462890625, -0.7485313415527344, -1.8384819030761719, 4.737083435058594, 3.7351741790771484, -4.011928558349609, 17.437278747558594, 15.376968383789062, 18.418136596679688, 15.514841079711914, -6.0128631591796875, 16.403396606445312, 0.8051528930664062, 5.7612762451171875, 0.24886322021484375, 5.066352844238281, 11.975387573242188, 5.8291015625, -5.4478759765625, 5.878196716308594, 0.2641143798828125, -3.0426483154296875, 12.323143005371094, 5.903707504272461, 0.7068252563476562, 7.8603973388671875, -0.641082763671875, 11.130496978759766, 14.924751281738281, -2.817058563232422, 15.243095397949219, 12.4124755859375, 0.75701904296875, 14.814208984375, 18.82941436767578, -19.92482566833496, 18.363914489746094, -2.192953109741211, 14.91021728515625, 11.371719360351562, -4.7521820068359375, -1.2885589599609375, -17.251468658447266, 9.279556274414062, 30.59185791015625, -1.9621353149414062, 8.465646743774414, -19.462875366210938, 17.619871139526367, 2.9744720458984375, 6.005424499511719, 16.828201293945312, 11.820892333984375, 31.727218627929688, 7.256744384765625, 15.5966796875, 1.3154373168945312, -10.653104782104492, 21.545726776123047, 0.23632049560546875, 7.403167724609375, 18.46434783935547, 3.0313186645507812, -16.755081176757812, 1.8612289428710938, 0.8479728698730469, 15.084091186523438, 8.356857299804688, 4.0108642578125, 20.839248657226562, 0.004791259765625, 1.4653396606445312, 15.296756744384766, 2.7179336547851562, 11.072624206542969, 7.612283706665039, 6.6756439208984375, 12.111091613769531, 16.350799560546875, -1.7574920654296875, 13.268386840820312, 2.1785640716552734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000276.npy"}
{"epoch": 0.8363636363636363, "step": 277, "batch_size": 128, "mean": 6.833721160888672, "std": 12.351408958435059, "min": -29.144912719726562, "p10": -6.713299179077148, "median": 4.782634735107422, "p90": 25.154280471801755, "max": 35.78813171386719, "pos_frac": 0.7109375, "sample": [33.50726318359375, -2.2307090759277344, -9.558643341064453, 16.244935989379883, -0.12618255615234375, -3.9289703369140625, 21.38564682006836, 18.147491455078125, -3.012125015258789, 17.533119201660156, 3.4489803314208984, -29.144912719726562, -7.42431640625, 16.61334228515625, 1.9701766967773438, 19.05828857421875, 9.499366760253906, 33.478515625, 9.946556091308594, 13.84267807006836, 31.090805053710938, 4.325347900390625, 3.853424072265625, 27.915634155273438, -21.97765350341797, -13.625808715820312, 12.191276550292969, 16.8255615234375, 11.235679626464844, -4.978107452392578, -3.3518218994140625, 12.923797607421875, -1.97393798828125, -3.1330528259277344, 3.1568946838378906, 4.44671630859375, 24.931121826171875, 17.225234985351562, 2.6391143798828125, 13.778411865234375, 18.850387573242188, 12.431793212890625, 4.8930816650390625, -3.18572998046875, -6.622344970703125, -9.960617065429688, 35.78813171386719, 16.835161209106445, 2.5757217407226562, 8.58438491821289, 2.6236095428466797, 2.8545494079589844, -5.2368927001953125, -1.6185073852539062, -1.9083213806152344, 32.70521545410156, -2.6821441650390625, -7.6567840576171875, 12.766189575195312, 4.672187805175781, -0.7629776000976562, -4.679128646850586, 29.2618408203125, 3.8924636840820312, 2.0775680541992188, 20.134033203125, 25.674983978271484, 20.070465087890625, 5.9550628662109375, 7.367147445678711, 1.4702186584472656, 9.544628143310547, 9.89571762084961, -8.529052734375, 7.693031311035156, 3.605497360229492, 0.5830364227294922, 8.6363525390625, 10.163383483886719, 9.76934814453125, 19.293163299560547, -6.925525665283203, 0.755279541015625, -7.761371612548828, 7.886445999145508, 0.7844257354736328, 17.10016632080078, 0.7172012329101562, 4.95457649230957, 1.2747650146484375, 18.618377685546875, -2.1660919189453125, -6.438449859619141, 24.882064819335938, 27.42425537109375, 0.7659149169921875, -2.9238433837890625, 18.657958984375, -0.44612884521484375, 0.757049560546875, -5.3229217529296875, 13.769599914550781, -22.652231216430664, 8.738292694091797, 8.80328369140625, 5.419677734375, 26.868141174316406, 7.588354110717773, -7.273386001586914, 4.335990905761719, 33.272403717041016, -3.8195648193359375, -16.53997802734375, 14.195716857910156, 5.078369140625, 5.152778625488281, 0.5423660278320312, 5.622386932373047, 1.2170829772949219, 4.197418212890625, 9.27557373046875, 13.310749053955078, 2.6739120483398438, 28.344953536987305, 26.884510040283203, -6.535087585449219, 20.425457000732422, -1.3496170043945312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000277.npy"}
{"epoch": 0.8393939393939394, "step": 278, "batch_size": 128, "mean": 6.401876449584961, "std": 12.12166976928711, "min": -32.14170837402344, "p10": -9.044068336486816, "median": 6.333687782287598, "p90": 22.361773490905758, "max": 35.013214111328125, "pos_frac": 0.7265625, "sample": [0.9430809020996094, -14.712509155273438, 5.914546966552734, 8.141220092773438, 21.146705627441406, 1.0780792236328125, -16.14617919921875, -4.109001159667969, 3.5425262451171875, -2.3863868713378906, 1.6670970916748047, 19.8825740814209, 11.866241455078125, 4.066551208496094, 25.116600036621094, 1.89910888671875, 12.858966827392578, 20.00470733642578, -13.183544158935547, 20.05768585205078, -32.14170837402344, 12.18246841430664, -5.46954345703125, 2.7086524963378906, 17.952716827392578, -6.347381591796875, 13.589855194091797, 0.5561561584472656, -18.590682983398438, -2.1330909729003906, 25.98595428466797, 11.829483032226562, -6.311309814453125, -3.142608642578125, 23.935001373291016, -5.611091613769531, 9.40704345703125, 4.345951080322266, 24.745702743530273, 26.238243103027344, 14.839151382446289, 11.123832702636719, 9.675209045410156, 23.656099319458008, 11.37298583984375, 20.712268829345703, 8.372554779052734, 7.4598388671875, 8.49176025390625, 10.260444641113281, 1.0227432250976562, 4.97906494140625, -9.769012451171875, 3.0359745025634766, 8.691726684570312, 8.754745483398438, 4.766437530517578, 5.465045928955078, 10.024703979492188, -8.863122940063477, 33.431427001953125, 15.286561965942383, 22.90252685546875, -1.5570964813232422, 12.022857666015625, 5.09222412109375, 9.342514038085938, -6.3695526123046875, -11.366676330566406, 19.534767150878906, 30.216014862060547, 0.5653762817382812, 0.9794464111328125, 0.4957618713378906, 18.626941680908203, 15.855781555175781, -5.2312164306640625, 8.482429504394531, 15.388172149658203, -2.1733551025390625, 26.661582946777344, 6.5833282470703125, -5.3502655029296875, 5.966592788696289, 13.406745910644531, 20.074813842773438, 1.985321044921875, 6.358613967895508, -15.914800643920898, -3.1017684936523438, 28.168174743652344, -7.472743988037109, -9.46627426147461, 16.42206573486328, 6.428718566894531, 1.475067138671875, -4.15546989440918, -1.5215301513671875, 0.40070343017578125, -1.8747100830078125, 11.336406707763672, 18.333152770996094, 6.423881530761719, 4.021507263183594, -0.3895530700683594, 7.147834777832031, -14.772438049316406, 6.3087615966796875, 8.403736114501953, -9.952713012695312, 0.8912162780761719, -16.3455810546875, 22.030799865722656, 35.013214111328125, 4.807561874389648, 5.774711608886719, 12.263420104980469, 19.450111389160156, -1.8615798950195312, 0.2037811279296875, 6.50579833984375, -15.513381958007812, 10.430866241455078, 22.130022048950195, 15.975982666015625, 30.555160522460938, 10.978103637695312, -2.7599735260009766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000278.npy"}
{"epoch": 0.8424242424242424, "step": 279, "batch_size": 128, "mean": 8.402249336242676, "std": 11.102410316467285, "min": -21.953105926513672, "p10": -5.0134132385253904, "median": 6.573467254638672, "p90": 24.10204620361328, "max": 37.49560546875, "pos_frac": 0.78125, "sample": [27.470823287963867, 4.632549285888672, 13.215349197387695, 4.4027862548828125, -6.179527282714844, -2.7019615173339844, 32.11899948120117, 4.434318542480469, 7.256008148193359, 6.195808410644531, 32.412254333496094, 12.442291259765625, 6.212738037109375, 1.9046173095703125, 20.28899383544922, -14.200958251953125, 13.829795837402344, -6.70802116394043, 4.4528656005859375, 25.9067440032959, 20.384132385253906, 15.77676010131836, 10.963653564453125, 37.49560546875, 17.503063201904297, 9.939857482910156, 0.29547119140625, 5.499834060668945, 3.188322067260742, 14.954713821411133, 15.01778793334961, 6.7940521240234375, 17.687211990356445, 10.07373046875, 4.793586730957031, -13.125724792480469, -4.546775817871094, 7.493803024291992, 0.1334667205810547, 4.352651596069336, -0.056095123291015625, -7.750282287597656, 26.30693817138672, 36.38733673095703, -8.840385437011719, -0.24211883544921875, 5.146575927734375, 9.235631942749023, 3.4742279052734375, 1.4301414489746094, -5.912437438964844, -0.6019554138183594, 4.2841339111328125, -8.407012939453125, 10.452163696289062, 19.484237670898438, 26.656753540039062, 4.27008056640625, 15.567367553710938, 9.458221435546875, -4.976593017578125, -6.464805603027344, 18.67772674560547, 9.78531265258789, 13.078361511230469, 21.72943115234375, 19.230941772460938, -5.099327087402344, 19.046127319335938, 2.653043746948242, 20.313173294067383, 1.6130027770996094, 14.263702392578125, 9.910537719726562, 15.916744232177734, 10.317390441894531, 13.743896484375, 23.890037536621094, 4.903942108154297, 19.285612106323242, 8.126220703125, 35.660377502441406, -3.8789825439453125, -0.7320194244384766, 3.4312267303466797, 11.289970397949219, 3.3415908813476562, 31.181976318359375, 26.03564453125, 4.0526580810546875, 5.195701599121094, -21.953105926513672, 3.4679794311523438, -3.1437149047851562, 4.659332275390625, -2.3618698120117188, 0.5968875885009766, 10.060104370117188, -2.524921417236328, 11.544441223144531, 7.09124755859375, -0.3010826110839844, 6.352882385253906, 8.07021713256836, 8.574502944946289, -4.177024841308594, 25.501144409179688, 2.967041015625, 15.480339050292969, 3.4706573486328125, 14.080162048339844, 9.153762817382812, 16.08637809753418, -5.1314697265625, 24.59673309326172, 3.404052734375, 23.31499481201172, 21.17885971069336, -2.5317535400390625, 1.1217117309570312, 8.364555358886719, -3.1133880615234375, -7.128395080566406, 2.670745849609375, 18.34876251220703, 4.775150299072266, 8.999465942382812, 5.9946746826171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000279.npy"}
{"epoch": 0.8454545454545455, "step": 280, "batch_size": 128, "mean": 7.049349784851074, "std": 11.980231285095215, "min": -19.876419067382812, "p10": -7.927172851562499, "median": 4.11871337890625, "p90": 23.38631362915039, "max": 40.001312255859375, "pos_frac": 0.703125, "sample": [-6.6850128173828125, -8.755805969238281, 3.4850234985351562, -2.783550262451172, 8.534172058105469, 40.001312255859375, 17.04974365234375, 20.754592895507812, 14.567657470703125, 5.146018981933594, 18.193031311035156, 6.563289642333984, 16.973224639892578, 10.955284118652344, -5.352142333984375, 17.44598388671875, 3.2808914184570312, 25.559890747070312, 22.23957061767578, -4.708251953125, 12.752159118652344, -1.2208671569824219, 1.012481689453125, -8.386543273925781, 20.43563461303711, 3.7070884704589844, 6.6988983154296875, 31.87958526611328, -16.61798095703125, 2.4010391235351562, 19.200923919677734, 26.46398162841797, 3.5328903198242188, 2.6462059020996094, -19.876419067382812, 22.019775390625, 2.9372482299804688, 13.849777221679688, 4.341804504394531, 23.593944549560547, -2.7535133361816406, -2.4358749389648438, -9.550369262695312, 14.679100036621094, -2.35491943359375, 30.920841217041016, 4.331001281738281, 3.279052734375, 0.15806198120117188, -0.9748382568359375, -2.6556930541992188, 12.58070182800293, 13.524932861328125, 20.95005989074707, 16.53103256225586, -14.553085327148438, 0.9705963134765625, 24.0302734375, 0.5327262878417969, 3.029033660888672, 21.531890869140625, 13.5606689453125, -8.363273620605469, -17.419940948486328, 2.7804031372070312, 15.279388427734375, 15.407173156738281, 5.0770416259765625, -10.690139770507812, -6.061775207519531, 17.997882843017578, 4.1944122314453125, -9.683013916015625, 0.37317466735839844, 10.449562072753906, 21.96521759033203, -2.9713134765625, 11.738483428955078, 8.208362579345703, 2.6231555938720703, -13.108848571777344, 26.87120819091797, 25.742746353149414, 1.348419189453125, -0.22408294677734375, -0.24640655517578125, 26.232900619506836, -3.19549560546875, 3.8145599365234375, 5.313732147216797, -4.317222595214844, 9.467193603515625, -12.890884399414062, -1.4043197631835938, -1.12603759765625, 20.641258239746094, 20.218475341796875, -2.5331554412841797, 13.0828857421875, 0.10614013671875, 2.4439239501953125, 6.20672607421875, 0.4715423583984375, -2.0281829833984375, 22.855276107788086, 12.579851150512695, -0.05730438232421875, 23.29732894897461, 29.524131774902344, 4.551307678222656, 1.0220260620117188, -7.740272521972656, 28.55925750732422, 14.799285888671875, 4.0430145263671875, 11.095703125, 4.028373718261719, 11.88983154296875, -3.1311416625976562, 22.040971755981445, -10.803398132324219, 6.523210525512695, 18.429492950439453, -3.5748825073242188, 25.196807861328125, -0.256805419921875, 2.8053436279296875, 3.6793174743652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000280.npy"}
{"epoch": 0.8484848484848485, "step": 281, "batch_size": 128, "mean": 8.322400093078613, "std": 11.709824562072754, "min": -14.714004516601562, "p10": -4.972241973876953, "median": 6.849170684814453, "p90": 24.302064895629883, "max": 37.115013122558594, "pos_frac": 0.734375, "sample": [2.810791015625, 3.3661346435546875, -4.0623626708984375, 1.8359050750732422, 6.9046173095703125, 21.148948669433594, 6.793724060058594, -3.4389572143554688, 0.001495361328125, 17.454261779785156, 37.115013122558594, 0.9241256713867188, 2.5783767700195312, 18.620712280273438, 20.37347412109375, -14.387691497802734, 16.527130126953125, 23.946182250976562, 10.479972839355469, 2.6172409057617188, 10.619712829589844, 11.500341415405273, 3.8862342834472656, 32.84541320800781, 18.705673217773438, 2.6322288513183594, -6.907386779785156, 4.744453430175781, -2.0652923583984375, -4.768241882324219, 14.150436401367188, 19.409337997436523, 20.667186737060547, 9.375778198242188, 1.498870849609375, 15.1644287109375, 0.5642356872558594, 13.135055541992188, -1.6379165649414062, -7.03753662109375, 12.310615539550781, -1.423635482788086, 10.8953857421875, -4.679874420166016, -12.158500671386719, 33.11663818359375, -12.105926513671875, 4.39818000793457, 25.477783203125, 29.287399291992188, 19.932586669921875, 28.67413330078125, 15.573143005371094, -12.366432189941406, -11.163917541503906, 20.348011016845703, -1.86822509765625, 10.598812103271484, 4.417572021484375, -2.1957855224609375, -0.6986942291259766, 0.9308032989501953, 22.239078521728516, 4.362419128417969, 2.3806724548339844, 23.560115814208984, -8.576732635498047, 10.08831787109375, 10.84820556640625, 10.914787292480469, 0.977996826171875, 31.118667602539062, -3.362701416015625, -1.22760009765625, 16.55170440673828, -1.739532470703125, 11.210037231445312, 29.618911743164062, -2.2103137969970703, 6.255279541015625, 24.197242736816406, 19.790184020996094, 7.5723724365234375, 28.193145751953125, -4.7328643798828125, -3.3586654663085938, -0.2586383819580078, 24.58031463623047, 16.464488983154297, 1.92498779296875, 19.887523651123047, 11.265777587890625, -0.4983654022216797, 12.669916152954102, 16.689376831054688, 15.565387725830078, 27.51495361328125, 1.8057785034179688, 12.075584411621094, 11.88079833984375, -2.3109283447265625, -1.701995849609375, 17.883880615234375, 2.7801570892333984, -2.073760986328125, 23.968708038330078, 0.2751274108886719, 1.91656494140625, 8.647369384765625, 4.695178985595703, 13.415369033813477, -10.853500366210938, -6.524116516113281, 1.0193710327148438, 8.768106460571289, 12.314804077148438, 5.5488739013671875, 24.546649932861328, 18.846435546875, 18.626693725585938, 22.141345977783203, 27.259124755859375, 8.065505981445312, -8.916557312011719, -5.4482421875, -14.714004516601562, 0.6392307281494141, 4.821044921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000281.npy"}
{"epoch": 0.8515151515151516, "step": 282, "batch_size": 128, "mean": 8.031892776489258, "std": 12.45101261138916, "min": -22.780731201171875, "p10": -8.949356079101562, "median": 6.557281494140625, "p90": 24.358123779296875, "max": 34.74609375, "pos_frac": 0.7265625, "sample": [-1.3669586181640625, 3.584392547607422, 14.702987670898438, 1.0749855041503906, 23.32879638671875, 20.091697692871094, -15.943923950195312, 24.464935302734375, -18.53937530517578, 27.272125244140625, -16.399024963378906, 25.633575439453125, -6.793144226074219, 34.74609375, 1.7539558410644531, 22.454971313476562, -4.5040283203125, 0.17501449584960938, -0.2662925720214844, 24.623991012573242, 6.166587829589844, -8.894538879394531, 12.717891693115234, 9.220893859863281, 16.20404052734375, 10.589914321899414, -22.780731201171875, 7.095508575439453, 8.6007080078125, 14.775035858154297, 16.02051544189453, 4.86442756652832, 16.127059936523438, 9.091743469238281, 0.7217369079589844, 6.295982360839844, 3.3330078125, -10.258003234863281, 19.929458618164062, 4.420389175415039, 14.800163269042969, 1.329376220703125, 17.115100860595703, 0.7886199951171875, 7.990886688232422, -10.184013366699219, -1.59619140625, -10.85750961303711, 24.312347412109375, 20.763107299804688, 18.831701278686523, 4.608428955078125, -5.6041107177734375, 6.818580627441406, 14.1982421875, -1.1953582763671875, 30.44530487060547, 20.74505615234375, 4.7869873046875, 23.843059539794922, 10.265373229980469, 4.614967346191406, 17.92926788330078, -2.030366897583008, 24.060733795166016, -2.0772552490234375, 24.983840942382812, 11.401451110839844, 12.2530517578125, 23.774444580078125, 19.557998657226562, -15.467201232910156, -4.077470779418945, 19.526561737060547, 16.675277709960938, -0.0628814697265625, -1.2057723999023438, 2.1970787048339844, 16.681703567504883, 5.881500244140625, 28.163238525390625, 2.2236785888671875, -2.435466766357422, -13.907608032226562, 17.201478958129883, 0.6439285278320312, 3.1286773681640625, -1.0845794677734375, -1.4487152099609375, -18.875160217285156, 16.718852996826172, 8.698583602905273, 17.758880615234375, -5.771078109741211, 23.083511352539062, 6.089790344238281, -0.8375396728515625, -2.5055389404296875, 30.891944885253906, 20.199913024902344, 2.9619216918945312, -9.077262878417969, 0.9566726684570312, -3.2509002685546875, 13.742141723632812, 13.785655975341797, 25.15357208251953, -4.1843109130859375, 4.024171829223633, 11.754348754882812, 20.385543823242188, 29.75914764404297, 20.768680572509766, 9.412399291992188, 4.51336669921875, 2.5114803314208984, -9.229177474975586, 3.414764404296875, 13.318256378173828, 5.937530517578125, 18.800094604492188, 28.054935455322266, -9.626022338867188, 29.195762634277344, -5.837371826171875, 11.935680389404297, 2.451568603515625, 17.354339599609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000282.npy"}
{"epoch": 0.8545454545454545, "step": 283, "batch_size": 128, "mean": 7.293776512145996, "std": 11.380401611328125, "min": -22.967321395874023, "p10": -6.374494171142578, "median": 7.395626068115234, "p90": 21.125019073486328, "max": 39.490997314453125, "pos_frac": 0.7421875, "sample": [2.0298385620117188, -10.758331298828125, -13.864044189453125, 18.725143432617188, 1.4338226318359375, 6.3568878173828125, 19.8775634765625, 8.46795654296875, 1.7049446105957031, 7.538324356079102, 17.29228973388672, 29.18755340576172, 22.474777221679688, -6.398307800292969, -3.561565399169922, 2.2288131713867188, -3.921539306640625, 1.3506507873535156, 15.388525009155273, -10.91883659362793, 4.575716018676758, 7.472358703613281, -12.34141731262207, 10.18145751953125, 28.49676513671875, 7.3188934326171875, 12.513908386230469, -3.9994735717773438, -3.6298675537109375, -2.2338943481445312, 12.8085298538208, 5.200468063354492, 3.670146942138672, 8.443328857421875, 5.7365264892578125, 14.321983337402344, 31.47491455078125, -21.425260543823242, 3.5389671325683594, 12.776878356933594, 11.605710983276367, 10.071311950683594, 9.90566635131836, 9.057903289794922, 22.521072387695312, 5.549198150634766, 17.40022850036621, 3.8442726135253906, 11.971221923828125, 2.0449066162109375, -9.526458740234375, 16.472976684570312, -0.9278717041015625, 20.978561401367188, -0.4706573486328125, 17.64917755126953, -0.26155853271484375, 12.172687530517578, 8.101226806640625, 2.402923583984375, 30.32901954650879, 16.10355567932129, 8.353935241699219, 16.326112747192383, 3.4532318115234375, 6.3447723388671875, -0.39058876037597656, 10.694831848144531, 3.8469009399414062, 16.412521362304688, 21.74179458618164, 19.858932495117188, -20.391708374023438, 13.564796447753906, 13.702789306640625, 12.296356201171875, 22.437358856201172, 0.24272918701171875, -6.884899139404297, 15.899978637695312, 21.379619598388672, 10.8704833984375, -5.779388427734375, -22.967321395874023, -3.9734573364257812, 7.5152740478515625, 15.570541381835938, -5.498390197753906, 17.685089111328125, -2.8875274658203125, 0.47493553161621094, -2.4829788208007812, 2.5050811767578125, 18.334732055664062, -0.6632080078125, 6.486978530883789, 4.248817443847656, 19.133472442626953, 7.7470703125, 14.3924560546875, 1.2174091339111328, 7.099159240722656, 10.770278930664062, 18.049835205078125, 6.840618133544922, -1.129425048828125, 17.36742401123047, 13.357307434082031, 12.141555786132812, -14.414466857910156, 11.88045883178711, 21.056137084960938, 4.4174041748046875, -6.364288330078125, 39.490997314453125, -12.213172912597656, 19.4193172454834, 3.2067947387695312, -3.453031539916992, -8.067859649658203, -5.614200592041016, 21.73296356201172, 6.503898620605469, -2.5100574493408203, 21.285743713378906, 4.866058349609375, 7.6332244873046875, 32.900726318359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000283.npy"}
{"epoch": 0.8575757575757575, "step": 284, "batch_size": 128, "mean": 8.338977813720703, "std": 11.041272163391113, "min": -14.071640014648438, "p10": -5.285462951660156, "median": 7.192497253417969, "p90": 24.51892566680908, "max": 32.276329040527344, "pos_frac": 0.71875, "sample": [2.8563079833984375, -0.7495498657226562, -6.163551330566406, 22.69005012512207, 3.346160888671875, 15.662071228027344, 26.53369140625, 17.77154541015625, -8.791664123535156, 3.7497787475585938, 3.0740890502929688, 17.01053237915039, 11.742782592773438, 1.0192031860351562, -1.1940765380859375, 18.03140640258789, 26.202251434326172, 26.17205810546875, 23.74915313720703, -0.6001605987548828, 14.286418914794922, -1.7553558349609375, 17.801361083984375, 30.169395446777344, 14.936370849609375, 11.473926544189453, 9.589181900024414, 2.3821792602539062, 21.987762451171875, 6.5695037841796875, -4.009624481201172, -5.534515380859375, 22.36076545715332, 1.740997314453125, 11.449363708496094, 13.14122200012207, 7.665718078613281, -9.121856689453125, 12.529972076416016, -1.9248046875, -8.753875732421875, -0.7153358459472656, -11.273658752441406, -2.2943286895751953, 13.869606018066406, -14.071640014648438, 22.087997436523438, 8.289138793945312, -7.386165618896484, 8.945556640625, 10.669548034667969, 20.150665283203125, 15.186302185058594, 16.61957550048828, -3.3467063903808594, -0.014734268188476562, -2.3660354614257812, 6.719276428222656, 32.276329040527344, -10.59857177734375, 10.349861145019531, 8.725341796875, 9.976654052734375, -1.0606460571289062, 11.085929870605469, 5.921884536743164, -3.5027847290039062, 3.4883270263671875, 2.833240509033203, 12.920501708984375, 7.977832794189453, -10.11761474609375, -5.1787261962890625, 1.8343048095703125, 10.289482116699219, 3.159414291381836, 3.0650634765625, 6.2918853759765625, 1.9157562255859375, 8.25836181640625, 11.560161590576172, -0.25656890869140625, 12.801223754882812, 5.885822296142578, -12.869613647460938, 25.861907958984375, 1.4348812103271484, 6.677158355712891, -1.2945556640625, -0.808319091796875, 13.915374755859375, 19.577239990234375, 4.466468811035156, 13.509244918823242, 26.209327697753906, 4.60638427734375, 3.6375350952148438, -2.1281700134277344, -1.6082649230957031, 23.980649948120117, 6.638263702392578, 10.66424560546875, -1.8637237548828125, -5.8250274658203125, 25.876686096191406, 27.637374877929688, 19.59972381591797, 31.431625366210938, -0.4268951416015625, 8.79718017578125, 16.624984741210938, 9.155084609985352, 4.961181640625, 27.714759826660156, 5.744377136230469, 18.104843139648438, 16.385534286499023, -1.2985076904296875, 17.59278106689453, 0.2553215026855469, 2.7270050048828125, 21.371551513671875, 25.77490234375, 22.062713623046875, -10.324920654296875, -1.217254638671875, 22.133697509765625, 31.856735229492188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000284.npy"}
{"epoch": 0.8606060606060606, "step": 285, "batch_size": 128, "mean": 7.5716142654418945, "std": 11.094082832336426, "min": -18.355655670166016, "p10": -6.211428070068359, "median": 6.416448593139648, "p90": 21.97573051452637, "max": 45.305328369140625, "pos_frac": 0.7421875, "sample": [14.081092834472656, 0.5260009765625, 4.380760192871094, 20.21580696105957, 13.084739685058594, 21.96674919128418, -6.094993591308594, 1.4735107421875, 9.334693908691406, 17.84051513671875, 2.649627685546875, 11.648731231689453, 21.213016510009766, 4.4422607421875, 3.8221397399902344, 28.169357299804688, -1.1294574737548828, 2.0303955078125, -2.9816513061523438, 0.439208984375, 32.885215759277344, 4.266380310058594, 8.829841613769531, 4.875097274780273, 13.085174560546875, 10.808940887451172, 12.500242233276367, 2.5658111572265625, 16.055347442626953, -7.8416748046875, -3.6876602172851562, 6.354633331298828, 8.538108825683594, 24.339385986328125, 14.931495666503906, 5.979888916015625, 1.2350616455078125, 13.51449966430664, -18.355655670166016, 7.340200424194336, -4.868339538574219, 16.846961975097656, 19.416297912597656, 4.7650909423828125, 2.01617431640625, -12.525423049926758, -10.635231018066406, -0.9986953735351562, 6.650707244873047, -9.115480422973633, -10.658050537109375, -6.4831085205078125, 7.838165283203125, -17.50341033935547, 1.052032470703125, -8.286575317382812, 14.180084228515625, 3.0901641845703125, 16.921783447265625, -8.02191162109375, -7.23834228515625, 4.119480133056641, 14.131839752197266, -3.2703475952148438, 15.292343139648438, -7.468925476074219, 11.340875625610352, 6.478263854980469, 14.870162963867188, 13.908439636230469, -3.7885665893554688, -2.821239471435547, -0.647125244140625, -0.3050994873046875, 2.6155776977539062, 19.648975372314453, 16.127548217773438, -5.165485382080078, 3.0145187377929688, 20.178892135620117, 14.957557678222656, -0.6493072509765625, 5.463689804077148, 14.044212341308594, 18.291183471679688, 8.011398315429688, 23.012086868286133, 4.702997207641602, 12.86920166015625, -10.979686737060547, 21.996686935424805, -4.4068603515625, 2.060028076171875, 13.73322868347168, 27.55585479736328, 6.3184356689453125, 9.58914566040039, -1.9935455322265625, 4.959075927734375, 0.004329681396484375, 0.22003746032714844, 6.761728286743164, 2.025144577026367, 20.03125762939453, 30.599281311035156, -0.19263839721679688, 13.173301696777344, 11.859184265136719, 24.269378662109375, -1.2495956420898438, 8.708648681640625, 24.833999633789062, 19.949996948242188, 23.671310424804688, 24.771270751953125, -2.5886306762695312, -5.569019317626953, 7.3868255615234375, 19.67193603515625, 20.8026123046875, 14.86202621459961, 1.5553398132324219, 9.824684143066406, 13.098712921142578, 45.305328369140625, -0.65069580078125, 23.3134765625, 3.1461334228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000285.npy"}
{"epoch": 0.8636363636363636, "step": 286, "batch_size": 128, "mean": 5.925222873687744, "std": 10.52664566040039, "min": -24.355621337890625, "p10": -6.44318962097168, "median": 6.893218040466309, "p90": 18.918658447265624, "max": 35.88410186767578, "pos_frac": 0.703125, "sample": [12.17498779296875, 3.6634750366210938, 10.284183502197266, 3.7084922790527344, 1.5187225341796875, 21.076995849609375, 13.468048095703125, -0.06642532348632812, 8.664993286132812, 14.212120056152344, -1.3251419067382812, 16.674606323242188, 23.391990661621094, -1.7482833862304688, 12.115119934082031, 13.509544372558594, -1.3876495361328125, -6.913114547729492, 4.869384765625, -0.22937393188476562, -13.336128234863281, 0.01828765869140625, -1.752960205078125, -3.9369659423828125, 9.905693054199219, -1.62493896484375, -9.915275573730469, -21.964847564697266, 8.954391479492188, 15.102897644042969, 14.447216033935547, -10.100738525390625, 6.475128173828125, 22.587921142578125, 4.439062118530273, 5.972278594970703, 4.155841827392578, -3.0911998748779297, -6.304655075073242, 1.0253448486328125, 18.913192749023438, 15.715065002441406, 8.180919647216797, 10.567632675170898, -24.355621337890625, 7.329154968261719, -1.0417346954345703, -6.328754425048828, 1.0826187133789062, 13.112457275390625, 8.8016357421875, 11.376697540283203, 4.2212066650390625, 10.275651931762695, 23.215660095214844, -16.72026824951172, 15.332504272460938, 26.159526824951172, 6.703826904296875, 14.629898071289062, -5.763713836669922, 1.6086387634277344, -1.595367431640625, 20.98157501220703, 5.0905609130859375, -8.37451171875, 0.32167816162109375, 15.250911712646484, 13.039680480957031, 7.470218658447266, 18.558876037597656, 16.20761489868164, -2.5781707763671875, 0.7968406677246094, -1.67144775390625, 18.931411743164062, 15.25687026977539, 10.378814697265625, 0.702606201171875, 23.950576782226562, 28.14005470275879, -6.710205078125, 9.553081512451172, 2.1976356506347656, 7.867401123046875, -7.582283020019531, 2.6756057739257812, 7.47117805480957, 25.419281005859375, 0.7439956665039062, 7.683673858642578, 16.290977478027344, -1.69329833984375, 8.200111389160156, -21.739013671875, -10.068374633789062, -5.310474395751953, 35.88410186767578, 8.491897583007812, 8.9730224609375, 13.598602294921875, 21.018455505371094, -4.257598876953125, 15.270942687988281, 15.266326904296875, 10.992452621459961, -2.788175582885742, 12.396339416503906, -1.1435012817382812, -3.45281982421875, 8.8675537109375, 17.9494571685791, 8.177162170410156, 4.3497314453125, 13.969192504882812, 19.826892852783203, 1.0030765533447266, 10.225341796875, -0.7424125671386719, -5.031005859375, 2.2387008666992188, 2.9637451171875, -6.035125732421875, -7.642608642578125, 12.23858642578125, 7.678863525390625, 3.465423583984375, 7.082609176635742], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000286.npy"}
{"epoch": 0.8666666666666667, "step": 287, "batch_size": 128, "mean": 8.321319580078125, "std": 11.923121452331543, "min": -20.689128875732422, "p10": -5.46612777709961, "median": 6.732461929321289, "p90": 24.605784606933593, "max": 47.73954772949219, "pos_frac": 0.7890625, "sample": [10.448150634765625, 10.010885238647461, -20.689128875732422, 12.453792572021484, -9.431411743164062, 5.110748291015625, -3.8978471755981445, 15.21142578125, -0.42659759521484375, 10.574905395507812, 24.14411163330078, -10.732872009277344, 18.325698852539062, 6.099273681640625, 24.70195770263672, 4.728496551513672, 6.409694671630859, 6.769866943359375, 16.730743408203125, 8.17587661743164, 5.756509780883789, -1.5348987579345703, 3.7288436889648438, 47.73954772949219, 1.9494972229003906, 19.215877532958984, 33.240455627441406, 23.990142822265625, 0.842681884765625, 9.212760925292969, -2.762868881225586, -9.04217529296875, 11.113998413085938, -9.278182983398438, -19.734893798828125, -16.30577850341797, -2.633665084838867, 5.728157043457031, 1.4443435668945312, 6.3207244873046875, 7.183448791503906, 1.3656234741210938, 42.384521484375, 4.319639205932617, 23.35280990600586, 7.2643280029296875, 16.726394653320312, -5.483489990234375, 18.172927856445312, 9.983705520629883, 12.083526611328125, 0.390289306640625, 0.473846435546875, 11.217491149902344, 26.484848022460938, 5.2917633056640625, 37.400299072265625, -8.594375610351562, 11.1185302734375, 2.570955276489258, 11.908393859863281, 7.75067138671875, -10.812446594238281, 27.173828125, 1.9705657958984375, -3.4045677185058594, -5.5512237548828125, -8.09255599975586, 4.1995086669921875, 15.903181076049805, 3.0117263793945312, 0.6109695434570312, 0.05701637268066406, 4.3231353759765625, 6.8603057861328125, 10.482837677001953, 15.120918273925781, 6.695056915283203, 7.396240234375, 13.683464050292969, 13.50750732421875, 9.932357788085938, -3.5611572265625, 13.380863189697266, 3.54302978515625, 5.093357086181641, 24.56456756591797, 3.707874298095703, 4.512908935546875, 0.8405303955078125, 2.5922698974609375, 31.390419006347656, 7.704021453857422, 16.070566177368164, 17.154342651367188, 3.0516357421875, 0.36246490478515625, -7.2299652099609375, -1.0734100341796875, 2.4806671142578125, -5.458686828613281, 4.42022705078125, -1.7461891174316406, 4.265434265136719, -1.5739593505859375, 28.79228973388672, 17.838119506835938, 24.336395263671875, -0.1932811737060547, 21.572017669677734, 7.9613037109375, -0.15951919555664062, 13.583099365234375, 11.370681762695312, 25.394989013671875, 12.249801635742188, 14.431159973144531, 25.925552368164062, 15.960899353027344, 12.92352294921875, 1.7272109985351562, 15.060138702392578, 22.109024047851562, 31.291454315185547, -2.0158843994140625, 7.8548583984375, 29.197418212890625, 3.283111572265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000287.npy"}
{"epoch": 0.8696969696969697, "step": 288, "batch_size": 128, "mean": 7.473089218139648, "std": 12.708450317382812, "min": -25.474205017089844, "p10": -7.019944763183593, "median": 6.279191970825195, "p90": 24.547680664062497, "max": 39.32244873046875, "pos_frac": 0.671875, "sample": [9.782051086425781, -11.3966064453125, -18.460346221923828, -2.4061050415039062, 26.404083251953125, -1.53985595703125, -1.7252578735351562, 11.879257202148438, -13.052070617675781, 6.396240234375, 6.444402694702148, 7.836902618408203, 6.69873046875, 26.334976196289062, 9.369194030761719, 4.372642517089844, 24.067550659179688, 26.924163818359375, 17.980579376220703, -5.514139175415039, 31.006973266601562, -8.29513168334961, 29.723861694335938, 22.082622528076172, -1.85321044921875, 39.32244873046875, 23.918502807617188, 5.748260498046875, 3.7053070068359375, 2.4637393951416016, -9.099193572998047, 9.533279418945312, -3.4269790649414062, -5.697612762451172, 15.565414428710938, -6.055717468261719, 26.857009887695312, -4.628498077392578, -12.958847045898438, 11.069793701171875, 4.158714294433594, -0.36849212646484375, -6.648590087890625, 11.31589126586914, 29.76617431640625, -3.4150619506835938, -4.292442321777344, -2.552764892578125, -0.914093017578125, 6.6801910400390625, 4.938972473144531, 0.7307586669921875, 0.9081573486328125, 1.0719985961914062, 5.448633193969727, -25.474205017089844, 5.823066711425781, -7.368011474609375, 1.7561759948730469, 19.052520751953125, -6.8707733154296875, 14.348411560058594, 14.686569213867188, -3.5376853942871094, 22.748855590820312, 18.38712501525879, -3.5198516845703125, 8.934263229370117, 16.3682861328125, 5.9111785888671875, 25.145248413085938, 5.016742706298828, -3.8171920776367188, 12.282840728759766, 24.291580200195312, 5.356414794921875, 14.56212043762207, 30.687397003173828, 9.740264892578125, 0.3897132873535156, 9.150726318359375, 18.66898536682129, 20.580474853515625, 7.9434967041015625, 34.50523376464844, -4.2900390625, 0.765289306640625, 6.162143707275391, -1.883636474609375, 23.530229568481445, 21.915191650390625, -19.806838989257812, 15.394760131835938, 5.331298828125, 14.486709594726562, 13.638946533203125, 13.140380859375, 6.583221435546875, -1.9736919403076172, -9.457027435302734, 17.178466796875, -13.777053833007812, 14.242042541503906, -5.274505615234375, 17.740478515625, 15.106086730957031, 8.844255447387695, -8.949396133422852, 20.776172637939453, -3.6847763061523438, 5.393798828125, -3.0369796752929688, 17.63011932373047, -4.864841461181641, 22.16899871826172, 1.1462478637695312, 25.62268829345703, -2.7653350830078125, 30.27750015258789, 23.007225036621094, -10.308652877807617, -3.0195388793945312, 23.431739807128906, 12.220361709594727, 5.350500106811523, 6.453657150268555, -1.1019287109375, 21.254722595214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000288.npy"}
{"epoch": 0.8727272727272727, "step": 289, "batch_size": 128, "mean": 5.1544060707092285, "std": 11.496382713317871, "min": -22.1405029296875, "p10": -8.040201187133789, "median": 2.8341073989868164, "p90": 19.724040222167968, "max": 33.338470458984375, "pos_frac": 0.65625, "sample": [11.266834259033203, 2.1867332458496094, -20.53453826904297, 7.607114791870117, -4.5000457763671875, -3.7929840087890625, -2.4356231689453125, -2.1666946411132812, 15.91741943359375, 17.684925079345703, -20.756752014160156, 16.527851104736328, 23.387863159179688, 12.157135009765625, 2.9033374786376953, 17.667869567871094, -0.6438751220703125, 1.0248794555664062, -7.255615234375, -0.7462882995605469, 1.1395893096923828, 2.09039306640625, -1.177642822265625, 0.35146522521972656, 10.53856086730957, 23.144805908203125, -8.964698791503906, 8.70574951171875, 26.656204223632812, 19.715713500976562, 4.716602325439453, 3.634532928466797, 19.267513275146484, 7.90936279296875, -8.029380798339844, 12.622596740722656, 11.838375091552734, -0.0415496826171875, -9.4915771484375, 29.09149932861328, -3.671985626220703, -16.338790893554688, -2.6444740295410156, 17.73687744140625, 9.47275161743164, 5.29498291015625, 2.0316085815429688, 8.434932708740234, 5.515491485595703, 5.5607757568359375, 15.099411010742188, 2.7648773193359375, 33.338470458984375, 20.435104370117188, -20.485885620117188, 13.279853820800781, -20.7379150390625, -4.622564315795898, -1.5777854919433594, 16.550960540771484, 9.60858154296875, 16.188982009887695, 3.2289886474609375, -1.1285247802734375, 2.1339340209960938, -2.4086456298828125, 1.7881546020507812, -6.802101135253906, 8.009544372558594, -1.1315193176269531, 8.197975158691406, 11.329826354980469, 18.51019287109375, 15.507787704467773, 10.165458679199219, 9.575531005859375, 12.71237564086914, 8.487434387207031, 0.6185283660888672, 4.712984085083008, 27.743732452392578, 0.7294921875, 3.198314666748047, -0.16094970703125, 1.1365680694580078, 13.396232604980469, 5.0497283935546875, -0.9613037109375, -1.4113693237304688, 25.105098724365234, 15.885505676269531, -11.561515808105469, 28.415618896484375, 2.6329002380371094, 0.23203277587890625, -8.065448760986328, 11.661781311035156, -2.4871864318847656, 0.4205169677734375, 19.74346923828125, -0.15204620361328125, 11.787406921386719, 14.185478210449219, -22.1405029296875, 0.5002479553222656, 1.4746475219726562, 17.690696716308594, 1.1214599609375, -5.134616851806641, -4.461751937866211, 28.2930908203125, 8.841190338134766, 22.749267578125, -3.10284423828125, 7.852516174316406, -2.068714141845703, -0.27606201171875, -8.493066787719727, -5.675140380859375, 32.631134033203125, 2.03802490234375, -2.4207000732421875, 6.003440856933594, 0.21543502807617188, -11.571441650390625, -1.2086772918701172, -12.946441650390625, 19.368896484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000289.npy"}
{"epoch": 0.8757575757575757, "step": 290, "batch_size": 128, "mean": 7.689520835876465, "std": 11.841334342956543, "min": -24.046600341796875, "p10": -5.5927268981933596, "median": 6.932914733886719, "p90": 22.937165069580075, "max": 37.07788848876953, "pos_frac": 0.7734375, "sample": [5.356452941894531, 4.070472717285156, 11.287420272827148, 5.584957122802734, 8.491477966308594, -13.067176818847656, 18.483413696289062, 1.0043830871582031, 16.52957534790039, -4.120796203613281, 1.9210205078125, 19.70574951171875, 37.07788848876953, -2.583637237548828, 26.087169647216797, 0.3739795684814453, 15.680885314941406, 4.651998519897461, 13.705970764160156, 11.02261734008789, 10.777236938476562, 10.310297012329102, -1.74468994140625, 23.566364288330078, 28.5751895904541, 3.2735214233398438, 2.8580398559570312, 15.157730102539062, -2.80279541015625, 19.380237579345703, 16.704814910888672, 4.845056533813477, 14.816909790039062, 29.52734375, 17.32353973388672, 16.76686668395996, 17.03150177001953, 26.290895462036133, 3.852651596069336, 10.121444702148438, -2.2963180541992188, 7.721853256225586, 30.35612678527832, 13.125564575195312, 21.479496002197266, 12.026351928710938, -4.136180877685547, 2.5633907318115234, 3.11553955078125, 2.6558494567871094, -2.9415512084960938, -13.019020080566406, 3.1282501220703125, -0.030622482299804688, -3.9790725708007812, -24.046600341796875, 16.35321044921875, 12.246902465820312, 11.767242431640625, 4.760383605957031, -10.243072509765625, 4.475013732910156, 5.8011474609375, 6.091033935546875, 0.23914337158203125, 3.47576904296875, -8.265983581542969, 6.228843688964844, -10.539573669433594, 14.148012161254883, 20.884836196899414, 4.601512908935547, 11.832084655761719, 13.402252197265625, -4.379554748535156, 12.3455810546875, -5.694480895996094, 11.180383682250977, 27.697586059570312, 0.5629310607910156, 1.5820693969726562, 15.096250534057617, -11.451187133789062, 22.00579833984375, -15.891365051269531, -21.691518783569336, 1.3887252807617188, 34.781768798828125, 21.13470458984375, 9.175514221191406, 19.28022003173828, 12.152950286865234, -7.011497497558594, -10.452228546142578, 1.7705078125, -4.194543838500977, 9.902938842773438, 1.2047462463378906, 17.416213989257812, 7.772758483886719, 4.0794525146484375, -2.0152664184570312, -5.5491180419921875, 1.3986244201660156, 8.736331939697266, -4.2536773681640625, 26.852264404296875, -1.1498947143554688, -3.7393360137939453, 30.743621826171875, 17.590248107910156, 22.72875213623047, -22.573015213012695, 10.759140014648438, 2.311237335205078, 13.348541259765625, 30.364761352539062, 3.3330230712890625, 7.636985778808594, 3.621622085571289, 9.74957275390625, 18.741283416748047, 9.073677062988281, 23.4234619140625, 8.9822998046875, 5.611328125, 0.4117012023925781, 17.47792625427246], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000290.npy"}
{"epoch": 0.8787878787878788, "step": 291, "batch_size": 128, "mean": 6.177089214324951, "std": 11.353812217712402, "min": -18.757156372070312, "p10": -8.260350036621093, "median": 4.9746856689453125, "p90": 19.831756210327146, "max": 35.82801055908203, "pos_frac": 0.7421875, "sample": [17.34931755065918, 14.454116821289062, 0.2491607666015625, 4.8808441162109375, 2.7925872802734375, -3.0194168090820312, 6.3103179931640625, 5.9771881103515625, 18.971458435058594, 7.510169982910156, 18.344388961791992, 3.808551788330078, 22.91671371459961, 13.30849838256836, 30.32518768310547, -12.576797485351562, 4.379751205444336, 17.960556030273438, 11.231742858886719, 12.627059936523438, 9.3448486328125, -18.757156372070312, 35.321083068847656, 26.9556884765625, -1.3403587341308594, -12.82733154296875, 12.795120239257812, -3.6090240478515625, 13.241340637207031, 5.041973114013672, 7.221126556396484, 9.638717651367188, 17.76978302001953, 11.453445434570312, -14.78546142578125, 2.934955596923828, 1.0051002502441406, 3.782773971557617, 6.0909576416015625, -5.490287780761719, 14.404090881347656, 30.946632385253906, 22.421804428100586, 0.4134063720703125, 13.833686828613281, 4.907398223876953, -7.056488037109375, 13.762748718261719, 1.7631149291992188, -7.794647216796875, 6.894073486328125, 0.30843353271484375, 2.83892822265625, -15.303537368774414, -5.945159912109375, -4.1075286865234375, -1.2749462127685547, -9.872251510620117, -7.480987548828125, 0.7736701965332031, 3.1999473571777344, 20.12765884399414, 4.339733123779297, -8.045745849609375, 14.602523803710938, 35.82801055908203, 6.5937347412109375, 15.764518737792969, -6.411956787109375, -0.7632560729980469, 13.340316772460938, 5.167873382568359, 1.253753662109375, -3.090320587158203, 10.098751068115234, 14.662132263183594, -1.1513538360595703, 12.849136352539062, 7.4649200439453125, 17.02874755859375, 16.85987091064453, -8.761093139648438, -7.757863998413086, 15.240310668945312, 21.58600616455078, 1.1685428619384766, 19.324260711669922, 1.345968246459961, 9.688827514648438, -12.867622375488281, 2.3994007110595703, 6.004180908203125, 0.5226154327392578, 0.5310688018798828, -0.4703712463378906, 6.332252502441406, 23.15253448486328, 18.729354858398438, 0.21576690673828125, 5.836585998535156, 21.906158447265625, 32.90106201171875, -0.8851394653320312, 20.863727569580078, 12.586845397949219, 16.202529907226562, 2.929056167602539, 0.8854026794433594, -15.513572692871094, -12.456523895263672, 3.6709747314453125, 3.68011474609375, 6.173980712890625, 17.00927734375, 3.1391448974609375, 2.3068199157714844, -12.582271575927734, -3.8127212524414062, -5.442100524902344, 13.067146301269531, 16.96640968322754, -8.810211181640625, 16.260774612426758, -13.691986083984375, 1.9067535400390625, 0.351715087890625, 19.704940795898438, 11.38825798034668], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000291.npy"}
{"epoch": 0.8818181818181818, "step": 292, "batch_size": 128, "mean": 7.291419982910156, "std": 12.803182601928711, "min": -23.061052322387695, "p10": -7.831559753417968, "median": 5.550590515136719, "p90": 26.627797508239745, "max": 36.59585952758789, "pos_frac": 0.703125, "sample": [4.763145446777344, -5.186992645263672, -12.309623718261719, -2.8352737426757812, 30.53325653076172, 4.682865142822266, -17.102561950683594, -1.7308502197265625, 15.83892822265625, -5.168054580688477, 30.81888198852539, 26.9666690826416, 9.105369567871094, -16.728389739990234, 7.363372802734375, 0.9279384613037109, 11.971878051757812, 8.019767761230469, 5.5682373046875, 27.559371948242188, -5.798622131347656, 0.5548248291015625, 10.742874145507812, 36.59585952758789, 5.4253997802734375, -5.047664642333984, 6.75555419921875, 11.487113952636719, 20.789772033691406, -0.17651939392089844, 10.387271881103516, 4.5265350341796875, -10.334098815917969, -4.014518737792969, 14.28125, -10.835014343261719, 1.0240364074707031, 12.099491119384766, 11.84503173828125, -8.172470092773438, 12.53582763671875, 13.745878219604492, 6.839324951171875, -18.96759796142578, 0.7536029815673828, 21.123641967773438, 6.199615478515625, 5.794258117675781, 2.1439342498779297, 28.040313720703125, -23.061052322387695, 24.44756317138672, 5.163747787475586, 27.878036499023438, 18.537704467773438, 4.328693389892578, -1.8771286010742188, 16.17824363708496, -1.5204143524169922, 19.054479598999023, 26.482566833496094, 19.24981689453125, 4.498878479003906, 4.918857574462891, 30.06640625, 8.509124755859375, 8.73904800415039, -2.7344837188720703, 7.601005554199219, -6.316827774047852, 24.230697631835938, 15.188697814941406, 9.858173370361328, 23.794668197631836, 16.677719116210938, -0.32605743408203125, 17.400432586669922, 27.68708038330078, -1.8787803649902344, 20.52581024169922, 8.056770324707031, 5.236747741699219, -5.460262298583984, 29.892715454101562, 0.9053726196289062, 2.127044677734375, -2.9750938415527344, 0.2117919921875, 5.5329437255859375, -4.3367462158203125, -12.898162841796875, 4.8560333251953125, 2.7501182556152344, 24.94116973876953, 3.3262557983398438, 33.37525939941406, 17.799224853515625, 18.693222045898438, 6.375255584716797, -2.4068470001220703, -4.183807373046875, -3.1509838104248047, -7.685455322265625, 16.63385009765625, 14.9588623046875, -6.864936828613281, 3.395925521850586, -0.5429229736328125, 2.7663116455078125, 0.9939041137695312, 23.302812576293945, -8.672958374023438, 15.14145278930664, 29.403045654296875, -14.823745727539062, 8.551347732543945, 13.947784423828125, 24.480255126953125, 2.589069366455078, 8.942760467529297, 36.204620361328125, -0.6910820007324219, 9.139892578125, 14.031356811523438, -2.5550079345703125, 4.593317031860352, -10.468574523925781, -18.843748092651367], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000292.npy"}
{"epoch": 0.8848484848484849, "step": 293, "batch_size": 128, "mean": 6.3917646408081055, "std": 10.589539527893066, "min": -36.03364562988281, "p10": -5.917935180664062, "median": 5.427249908447266, "p90": 18.23972473144531, "max": 40.62684631347656, "pos_frac": 0.78125, "sample": [4.4264068603515625, 8.817047119140625, 5.3490447998046875, 12.212179183959961, 17.35369873046875, 7.413780212402344, 6.150177001953125, 20.597976684570312, -14.044424057006836, -7.645530700683594, 4.862831115722656, 13.300941467285156, 15.53839111328125, 1.3992271423339844, 4.663585662841797, -9.14111328125, 12.201385498046875, -1.8021240234375, -11.598091125488281, 5.0918426513671875, -1.7901992797851562, 13.50478744506836, 22.18811798095703, 5.778232574462891, -6.841117858886719, -6.1941680908203125, 0.6494083404541016, 4.649406433105469, 4.251350402832031, 7.594760894775391, 2.8499908447265625, -0.1480560302734375, -2.100566864013672, 0.22817039489746094, -20.163345336914062, 2.3968582153320312, 16.1026611328125, 5.505455017089844, -0.1447772979736328, 12.02587890625, 6.563453674316406, 3.1101455688476562, -4.03863525390625, 4.948005676269531, 16.6990966796875, 8.840301513671875, -5.986003875732422, 2.7124099731445312, 7.609333038330078, 12.221908569335938, 5.249595642089844, -9.373973846435547, -36.03364562988281, 19.302928924560547, 24.519943237304688, 22.27252960205078, 4.847564697265625, 16.684402465820312, 31.61553192138672, 14.381668090820312, -1.9935684204101562, 1.3479385375976562, 6.807546615600586, 13.1201171875, 13.9505615234375, 4.830234527587891, 15.525135040283203, 17.947410583496094, 9.586700439453125, 18.735031127929688, 11.601888656616211, 0.13062095642089844, 8.315351486206055, 8.617362976074219, 4.391883850097656, 14.366350173950195, -2.3867626190185547, 16.973974227905273, 4.480960845947266, 13.769216537475586, 7.684040069580078, 13.680534362792969, 15.186203002929688, 0.7309780120849609, -12.023441314697266, 40.62684631347656, 1.8802337646484375, 4.118345260620117, -0.12825584411621094, 10.285491943359375, 15.27984619140625, 0.130950927734375, 8.839092254638672, 1.3770465850830078, 21.076936721801758, 0.9698333740234375, 4.906791687011719, -3.254192352294922, 12.514692306518555, -5.970977783203125, 0.42142486572265625, 27.531478881835938, 28.226924896240234, -1.3439178466796875, 9.003791809082031, -0.26323509216308594, 5.052024841308594, 0.8850173950195312, 10.645614624023438, 3.6353607177734375, 21.923561096191406, 12.186302185058594, -1.6027259826660156, -5.0458526611328125, 3.8798370361328125, 12.814079284667969, 18.027450561523438, 8.147762298583984, 0.9307975769042969, 8.710752487182617, -23.571502685546875, 23.781688690185547, -5.89520263671875, 7.2220458984375, 1.3854598999023438, 8.271541595458984, 7.534444808959961, 15.985382080078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000293.npy"}
{"epoch": 0.8878787878787879, "step": 294, "batch_size": 128, "mean": 7.429347991943359, "std": 11.523782730102539, "min": -26.835906982421875, "p10": -6.659272575378417, "median": 6.712677001953125, "p90": 22.60932731628418, "max": 38.820499420166016, "pos_frac": 0.7734375, "sample": [0.4508056640625, -5.585487365722656, 3.251232147216797, 4.400951385498047, 7.316200256347656, -6.4472198486328125, 3.3872241973876953, -8.772979736328125, 17.43755531311035, -2.1529579162597656, -6.036346435546875, 27.016035079956055, 12.803810119628906, 12.080249786376953, -2.657520294189453, 27.023666381835938, 12.707611083984375, 15.7939453125, 10.39659309387207, 3.2517776489257812, 13.861555099487305, -23.908798217773438, -9.579788208007812, 25.786041259765625, 16.46337890625, 7.005817413330078, 22.778968811035156, -10.593156814575195, 10.554073333740234, 2.4668006896972656, 8.615062713623047, 21.185684204101562, 16.76824951171875, 22.903076171875, 4.458305358886719, 11.435382843017578, -10.191993713378906, 3.8147354125976562, 4.339740753173828, 8.943113327026367, 18.43292999267578, 12.87906265258789, 38.820499420166016, -2.0390052795410156, -26.835906982421875, 0.5073966979980469, 4.331813812255859, 2.4979324340820312, 22.56369400024414, -1.6136817932128906, 25.35492706298828, 13.215353012084961, 0.5804328918457031, 18.872230529785156, 9.615936279296875, 24.94125747680664, 5.580207824707031, -7.961402893066406, 5.103252410888672, 8.18402099609375, 6.554840087890625, 31.915969848632812, 20.98537826538086, 2.0661697387695312, 11.230918884277344, -11.68670654296875, -1.228668212890625, 17.389602661132812, -4.069160461425781, 16.556289672851562, 15.54559326171875, 5.803230285644531, 35.030670166015625, 22.715805053710938, 5.2859954833984375, 24.07421875, 7.515777587890625, 18.55136489868164, -7.154062271118164, 19.481367111206055, -14.985786437988281, 6.540191650390625, 12.396949768066406, 20.538345336914062, 2.2492332458496094, 13.493614196777344, 15.1832275390625, -0.5723800659179688, 2.9971389770507812, 19.288644790649414, 4.775581359863281, 1.6237640380859375, 5.240964889526367, 8.419071197509766, 2.3503494262695312, 5.13671875, 9.417030334472656, 7.1494293212890625, 26.568161010742188, 1.0411491394042969, 5.82200813293457, 17.130836486816406, 3.4551467895507812, 11.758996963500977, 12.230348587036133, 8.054908752441406, 0.19237136840820312, -1.677642822265625, 3.2718276977539062, -14.031295776367188, 2.9908790588378906, 16.996726989746094, -4.644172668457031, -11.449993133544922, -2.0765304565429688, 20.738670349121094, 11.516660690307617, 6.870513916015625, 12.127302169799805, -0.31450843811035156, -3.059589385986328, 8.699678421020508, 2.5572509765625, -5.14923095703125, 2.1550750732421875, 17.641510009765625, 15.008464813232422, -17.078018188476562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000294.npy"}
{"epoch": 0.8909090909090909, "step": 295, "batch_size": 128, "mean": 8.025482177734375, "std": 10.643074989318848, "min": -22.817123413085938, "p10": -4.8218591690063475, "median": 7.018093109130859, "p90": 23.556982421875, "max": 34.15747833251953, "pos_frac": 0.7734375, "sample": [3.016469955444336, 6.928215026855469, -9.16431999206543, 26.15082550048828, 16.693313598632812, 7.688726425170898, 5.2059173583984375, -4.832624435424805, 10.647075653076172, 10.467792510986328, 16.683441162109375, 14.867919921875, 1.5874481201171875, 13.874710083007812, 16.369285583496094, -4.3055419921875, 15.20522689819336, 0.7495269775390625, -0.9961624145507812, -0.20426177978515625, 13.099227905273438, 0.1200408935546875, 9.997955322265625, 20.863550186157227, 1.7629146575927734, 5.0716705322265625, -18.2606201171875, 15.404365539550781, 21.085281372070312, -5.410369873046875, 4.870109558105469, 21.22504234313965, 9.195343017578125, -1.531717300415039, -6.396583557128906, -0.9204349517822266, 28.549468994140625, 5.560253143310547, 8.45208740234375, 3.040313720703125, 19.87603759765625, 10.847564697265625, -6.1126556396484375, 5.5215606689453125, 18.53840446472168, 15.37551498413086, 6.229856491088867, 1.699066162109375, 12.254570007324219, 18.84925079345703, 15.734565734863281, 4.731464385986328, 9.533519744873047, -8.576416015625, -2.5663490295410156, -11.498729705810547, 23.548500061035156, 9.141494750976562, 4.991569519042969, 2.8552932739257812, 3.615020751953125, -0.13190460205078125, 30.50048828125, 2.493793487548828, 6.566764831542969, 7.195850372314453, 9.594230651855469, 3.512744903564453, 19.433494567871094, 26.829113006591797, 7.3838043212890625, -22.817123413085938, 9.154373168945312, 10.093612670898438, -5.3175506591796875, 1.8617401123046875, -3.7280006408691406, 17.617401123046875, 8.807926177978516, 34.15747833251953, 4.829328536987305, 24.440155029296875, 14.119850158691406, 13.303550720214844, 15.622791290283203, 2.221435546875, 10.635147094726562, 11.497650146484375, -3.907745361328125, 5.2506866455078125, -6.147346496582031, 13.475685119628906, 7.073974609375, -2.4832763671875, -2.034423828125, 23.345060348510742, -2.7322826385498047, -2.0111465454101562, -0.77197265625, 6.082704544067383, 3.3974838256835938, 2.8790283203125, -6.338020324707031, 6.792385101318359, 31.374208450317383, -4.8172454833984375, 6.157428741455078, 6.962211608886719, 23.57677459716797, 10.278564453125, 1.488321304321289, 29.787277221679688, 28.547149658203125, 13.225622177124023, 25.579940795898438, 9.177078247070312, 25.440284729003906, 1.3075675964355469, 12.87005615234375, 3.1498947143554688, 10.333969116210938, 8.911117553710938, 21.988201141357422, 1.2819995880126953, 9.4324951171875, -2.79644775390625, 25.343597412109375, -10.087356567382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000295.npy"}
{"epoch": 0.8939393939393939, "step": 296, "batch_size": 128, "mean": 8.093473434448242, "std": 12.181978225708008, "min": -18.53314971923828, "p10": -5.33729953765869, "median": 6.119113922119141, "p90": 25.76041107177734, "max": 42.305328369140625, "pos_frac": 0.7578125, "sample": [0.30096435546875, 27.441314697265625, 10.857471466064453, 9.230598449707031, 14.297233581542969, 18.023941040039062, 17.446945190429688, 10.10089111328125, 9.717941284179688, 4.180671691894531, 20.170181274414062, 4.967475891113281, 12.238189697265625, 10.631645202636719, 23.905670166015625, -0.8931045532226562, 26.854385375976562, 32.39179992675781, -0.5630073547363281, 25.29156494140625, -7.729404449462891, 6.950157165527344, 27.99945831298828, 1.936767578125, 1.0139236450195312, 16.028350830078125, -11.98455810546875, -4.806919097900391, 34.39115905761719, 11.280708312988281, 0.7659778594970703, 23.46245574951172, 7.49156379699707, 13.190841674804688, 9.61175537109375, -6.631500244140625, 19.925392150878906, -7.537689208984375, 4.988872528076172, -3.06951904296875, 24.23656463623047, -18.045642852783203, 27.872909545898438, 15.357612609863281, 0.2515907287597656, 9.230056762695312, -18.53314971923828, -4.986928939819336, 0.902313232421875, -4.235069274902344, 1.291534423828125, 24.41339111328125, 1.3509044647216797, 27.841812133789062, -4.22412109375, -9.381072998046875, 18.178985595703125, 8.367916107177734, 4.069061279296875, 16.55254364013672, 24.790699005126953, 6.2989501953125, -1.622201919555664, 16.607894897460938, 2.3990516662597656, 31.218246459960938, 24.59833526611328, 6.562843322753906, 16.740848541259766, 27.887609481811523, 19.789024353027344, 4.997520446777344, 22.340087890625, 14.065458297729492, -3.4135208129882812, 0.35982513427734375, -14.392845153808594, 15.006103515625, 8.741720199584961, 22.084274291992188, -4.3344268798828125, 11.164604187011719, 2.0330657958984375, 4.604991912841797, -11.399436950683594, 5.939277648925781, -13.48453140258789, 0.7496795654296875, 34.758819580078125, -1.4755935668945312, 42.305328369140625, -0.2421588897705078, 0.35540771484375, 2.1292877197265625, -1.519775390625, 4.214990615844727, 8.280620574951172, -0.4244117736816406, -10.984001159667969, 9.474220275878906, 9.960479736328125, 4.644287109375, -0.5854415893554688, 13.312965393066406, 4.932647705078125, 14.221336364746094, 8.211601257324219, 29.701026916503906, 9.501213073730469, 14.655616760253906, -3.3239402770996094, 9.86262321472168, -2.9837398529052734, 1.2857742309570312, 1.0291824340820312, -11.98040771484375, 1.964813232421875, 11.147041320800781, -6.1548309326171875, 32.31599426269531, -2.827911376953125, 5.446521759033203, 0.49179840087890625, 1.1241378784179688, 4.90625, 5.280067443847656, 5.8770751953125, 12.360763549804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000296.npy"}
{"epoch": 0.896969696969697, "step": 297, "batch_size": 128, "mean": 6.616468906402588, "std": 11.412753105163574, "min": -20.642864227294922, "p10": -7.028573608398437, "median": 6.191139221191406, "p90": 22.3859992980957, "max": 33.75098419189453, "pos_frac": 0.7109375, "sample": [13.786415100097656, 4.902309417724609, 18.287750244140625, 3.29931640625, 19.48536491394043, -6.772987365722656, 7.17681884765625, -6.958251953125, 18.947952270507812, 14.144683837890625, 13.5465087890625, 4.6604156494140625, 8.262725830078125, 14.084869384765625, 28.027034759521484, 22.07147216796875, 0.46088409423828125, 12.71577262878418, 8.870820999145508, 14.719629287719727, -1.0044746398925781, -13.263771057128906, 8.165534973144531, -7.192657470703125, -8.56528091430664, 33.75098419189453, 5.9736328125, -0.7824287414550781, 6.6131744384765625, 10.007347106933594, 7.3040313720703125, -17.435081481933594, 25.27570343017578, 13.888999938964844, 26.936126708984375, -1.0955371856689453, 18.206863403320312, 18.035961151123047, 6.7575225830078125, -4.943695068359375, -4.529052734375, 6.549781799316406, -3.5886287689208984, 19.748775482177734, 10.113277435302734, -20.642864227294922, 23.119895935058594, -12.390155792236328, -12.103443145751953, -3.696826934814453, 26.876541137695312, -7.672882080078125, 32.064453125, 12.374435424804688, 25.949111938476562, 4.800804138183594, 6.4290008544921875, 5.649772644042969, -1.6551437377929688, -5.746681213378906, 5.084831237792969, 6.6872711181640625, 4.200614929199219, 17.410919189453125, 9.133321762084961, 13.534423828125, -5.495731353759766, 13.155372619628906, 17.188186645507812, 12.80352783203125, 4.08220100402832, -5.057945251464844, 0.66253662109375, -5.785984039306641, 16.885330200195312, 6.8462982177734375, 18.809463500976562, 3.844249725341797, 2.985380172729492, 8.066825866699219, 31.194610595703125, 17.669349670410156, 0.0469970703125, -12.017219543457031, 6.4086456298828125, 11.674667358398438, 13.219831466674805, -0.8198776245117188, 0.42997169494628906, 10.184711456298828, 17.16766357421875, -17.472274780273438, 7.572296142578125, 30.93321990966797, 4.375091552734375, 7.379467010498047, 0.31008148193359375, -3.7193679809570312, -2.5337677001953125, -5.742794036865234, -8.610279083251953, 6.9335174560546875, 3.7085494995117188, -2.6495361328125, 26.558250427246094, 4.6595001220703125, 0.4299964904785156, -4.314262390136719, 7.3782806396484375, -2.514190673828125, 26.622665405273438, 12.632553100585938, 10.377883911132812, -3.108673095703125, 19.10009765625, 23.14417266845703, 5.149906158447266, 4.950675964355469, 5.245553970336914, 19.82861328125, -0.37945556640625, 4.907405853271484, 5.450080871582031, -5.6773529052734375, 5.636165618896484, -13.719097137451172, 0.5487899780273438, -8.686805725097656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000297.npy"}
{"epoch": 0.9, "step": 298, "batch_size": 128, "mean": 7.067852973937988, "std": 12.333399772644043, "min": -30.95307159423828, "p10": -7.561730003356933, "median": 4.243168830871582, "p90": 23.8456672668457, "max": 39.91038513183594, "pos_frac": 0.7421875, "sample": [2.0053329467773438, 9.351036071777344, 30.979461669921875, 8.957008361816406, 15.005683898925781, 3.429027557373047, 13.353981018066406, 12.469423294067383, 0.5038280487060547, 2.284456253051758, 25.97229766845703, -1.8058280944824219, 14.916366577148438, 15.764183044433594, 25.291362762451172, 8.134441375732422, 10.117000579833984, -3.77325439453125, 15.968269348144531, 16.817703247070312, 30.797836303710938, -8.177703857421875, -30.95307159423828, 39.91038513183594, 2.837718963623047, 9.841529846191406, -2.242105484008789, 21.528533935546875, 9.110469818115234, 5.2084503173828125, -1.1766738891601562, 7.735134124755859, 1.8504104614257812, 24.757816314697266, 21.1302490234375, 5.360355377197266, 3.398212432861328, -15.209901809692383, 25.739635467529297, 4.430475234985352, 20.272674560546875, 2.0555953979492188, 1.6922760009765625, 8.761749267578125, 15.513877868652344, 12.482177734375, 1.8337841033935547, -12.849723815917969, 32.623695373535156, 22.981143951416016, 21.93023681640625, 20.290138244628906, -10.271379470825195, 19.926498413085938, 15.053024291992188, 10.919551849365234, 18.885223388671875, 8.193443298339844, -1.0764617919921875, -9.76226806640625, -3.3397598266601562, -19.228851318359375, 23.027076721191406, 28.369613647460938, -0.9881134033203125, 5.535411834716797, 25.201393127441406, -0.24682998657226562, 0.120086669921875, -8.450538635253906, -3.7853660583496094, 1.6176643371582031, 10.454879760742188, 0.5806789398193359, -9.673851013183594, 9.655534744262695, 2.5718994140625, 16.71759033203125, 8.591428756713867, 1.1006431579589844, 17.760738372802734, 2.3424148559570312, 6.341682434082031, -3.693937301635742, -2.7119903564453125, 33.05994415283203, 1.7574939727783203, -5.094570159912109, 20.498519897460938, 2.71868896484375, 9.697433471679688, 2.4589309692382812, -15.155174255371094, 2.823486328125, -2.0122909545898438, 18.305809020996094, 0.2669944763183594, 24.55548858642578, -7.468608856201172, 4.0558624267578125, -10.934410095214844, -2.8869171142578125, 23.541458129882812, 19.37953758239746, 37.018646240234375, 5.620536804199219, 4.0525665283203125, -7.779012680053711, 6.358863830566406, 11.2244873046875, 1.3675193786621094, -4.795440673828125, 1.6428604125976562, 12.388908386230469, -0.16402053833007812, -19.65013885498047, -2.1468734741210938, 3.1994705200195312, 3.8289794921875, -4.62092399597168, 4.041629791259766, 17.118696212768555, -2.616046905517578, 6.112981796264648, 2.3924636840820312, 17.466079711914062, 1.746185302734375, 2.364715576171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000298.npy"}
{"epoch": 0.9030303030303031, "step": 299, "batch_size": 128, "mean": 5.74097204208374, "std": 11.336711883544922, "min": -20.58761215209961, "p10": -7.494217300415039, "median": 3.4544143676757812, "p90": 21.744638061523435, "max": 33.385231018066406, "pos_frac": 0.625, "sample": [-2.7371253967285156, -8.793685913085938, -4.103681564331055, -9.21392822265625, 14.272796630859375, 1.9525032043457031, 0.5215167999267578, -12.358051300048828, 33.385231018066406, 26.004539489746094, 15.632373809814453, -0.7066078186035156, 6.7137451171875, 6.831829071044922, -10.84173583984375, -2.3225860595703125, 20.266204833984375, 1.073089599609375, 5.764556884765625, 7.453952789306641, -0.99078369140625, 8.341012954711914, 9.75351333618164, 25.050323486328125, 20.299713134765625, -3.792144775390625, 30.39038848876953, 2.4056644439697266, 15.507034301757812, -0.04433441162109375, -7.617424011230469, 3.6255111694335938, -3.8229103088378906, 22.539813995361328, -10.274543762207031, 9.381477355957031, 2.271665573120117, 9.2738037109375, -2.338104248046875, 6.968236923217773, 17.223445892333984, 11.692590713500977, 1.590555191040039, -5.604494094848633, 26.866989135742188, 3.0727596282958984, 11.752803802490234, 7.688026428222656, 1.2041912078857422, -5.074718475341797, 4.8416748046875, -0.25959014892578125, -0.6050319671630859, 1.460317611694336, -0.9586563110351562, 0.8433933258056641, 2.780780792236328, -2.54864501953125, 28.097991943359375, 27.122879028320312, 6.362089157104492, 12.989803314208984, 15.520614624023438, 13.747198104858398, -0.7270622253417969, -3.8770599365234375, 4.918891906738281, -1.194183349609375, 13.193351745605469, -4.18255615234375, 3.2833175659179688, 22.270065307617188, 32.734466552734375, -7.330474853515625, 15.165390014648438, 13.586128234863281, -4.246772766113281, -20.58761215209961, -3.142120361328125, 14.139947891235352, 4.697118759155273, 2.721099853515625, 21.416397094726562, -2.5933265686035156, 15.488410949707031, -1.589141845703125, 13.577617645263672, 9.66650390625, 15.68936538696289, 17.03411102294922, -1.97613525390625, -7.5386962890625, 7.345062255859375, 4.6167449951171875, 3.703685760498047, -9.806312561035156, 18.66829490661621, -6.4256134033203125, -0.28254127502441406, 3.780282974243164, 2.2605438232421875, -10.302591323852539, 32.43650817871094, -5.782402038574219, 2.6031341552734375, 4.182643890380859, 3.7651920318603516, 14.723548889160156, -5.070648193359375, -11.939815521240234, -7.475154876708984, -4.075021743774414, 23.297958374023438, 5.1093292236328125, -0.09372520446777344, 29.399179458618164, -3.1602401733398438, 11.881591796875, -1.8710174560546875, 1.8077754974365234, 21.519454956054688, 11.61334228515625, -9.881385803222656, -0.05764007568359375, 21.37646484375, 20.763763427734375, -14.447891235351562, 14.535087585449219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000299.npy"}
{"epoch": 0.906060606060606, "step": 300, "batch_size": 128, "mean": 5.618292808532715, "std": 11.403313636779785, "min": -19.797855377197266, "p10": -9.007386016845702, "median": 4.618700981140137, "p90": 21.005278015136717, "max": 31.964065551757812, "pos_frac": 0.6875, "sample": [-5.697265625, -1.4093971252441406, 4.3039398193359375, 8.863800048828125, -5.4398193359375, 10.335630416870117, -7.796726226806641, 25.030136108398438, 1.227996826171875, -1.5838813781738281, 4.315893173217773, -8.870040893554688, 26.413318634033203, 10.098167419433594, 8.107341766357422, -19.0772705078125, -2.049560546875, -8.40045166015625, 3.6506118774414062, -4.289310455322266, 2.1925392150878906, -2.6378173828125, 3.170175552368164, 10.170013427734375, -3.55810546875, 1.0595626831054688, 31.394195556640625, 16.27252197265625, 10.626991271972656, 11.259603500366211, 14.585906982421875, 6.114934921264648, -7.016242980957031, -13.995773315429688, -15.107769012451172, -3.730775833129883, 2.9726638793945312, 9.72006607055664, 11.686111450195312, -3.913787841796875, 3.6268062591552734, 9.422348022460938, 10.555450439453125, 22.622234344482422, -13.342704772949219, 0.0454254150390625, 25.484088897705078, 12.815990447998047, 23.12723159790039, 2.4186267852783203, 4.171365737915039, -4.318229675292969, 17.859975814819336, 10.522003173828125, 17.405303955078125, -6.187221527099609, 31.964065551757812, 7.9702911376953125, 12.337284088134766, -1.4814128875732422, -0.6520233154296875, 16.371994018554688, 19.823257446289062, 7.020641326904297, 10.686286926269531, 9.812564849853516, 4.171453475952148, 10.816730499267578, 21.60986328125, -3.208576202392578, 5.7113189697265625, -11.843547821044922, -17.87281036376953, -9.327857971191406, -9.998748779296875, 19.98956298828125, 6.686725616455078, 5.807563781738281, 14.59423828125, 13.825942993164062, -19.797855377197266, -9.343212127685547, 6.517234802246094, -15.159587860107422, -0.9483642578125, 4.03619384765625, 0.5317707061767578, 24.58685302734375, -16.694320678710938, 12.415548324584961, 0.7969799041748047, 5.125888824462891, 2.5202198028564453, -0.588592529296875, 17.58686065673828, 4.282135009765625, 20.612152099609375, 22.056991577148438, 0.10097885131835938, 9.314022064208984, 20.746170043945312, 17.56707763671875, -2.4988365173339844, 0.10960769653320312, 0.563385009765625, 20.3828125, 14.048328399658203, 27.763195037841797, -1.8577003479003906, 18.652423858642578, 16.419395446777344, -1.817911148071289, -1.8636894226074219, 4.9215087890625, 2.7948055267333984, 10.995338439941406, 2.6018733978271484, 6.761018753051758, -10.38116455078125, -2.9562606811523438, 8.881721496582031, -3.960735321044922, 12.375106811523438, 27.575416564941406, 5.420623779296875, 0.1394939422607422, 7.112640380859375, 30.652305603027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000300.npy"}
{"epoch": 0.9090909090909091, "step": 301, "batch_size": 128, "mean": 7.946765422821045, "std": 10.742015838623047, "min": -15.672782897949219, "p10": -4.888168907165527, "median": 6.31156063079834, "p90": 22.253990936279298, "max": 32.645416259765625, "pos_frac": 0.7890625, "sample": [22.23431396484375, 5.687328338623047, 15.624679565429688, -13.918949127197266, 26.471878051757812, 22.299903869628906, 32.645416259765625, 2.8301239013671875, 20.222763061523438, 12.73956298828125, -1.31500244140625, 12.8182373046875, 19.9390869140625, 1.7174758911132812, 0.34973907470703125, 18.0262451171875, 8.5673828125, 3.551544189453125, 0.2035388946533203, 21.297399520874023, 5.7073211669921875, 12.616836547851562, 8.070930480957031, 6.047876358032227, 0.06684112548828125, 8.51181411743164, 6.852649688720703, 27.460769653320312, 3.6496448516845703, 1.7281341552734375, 17.856678009033203, 20.716716766357422, 5.598846435546875, -3.4254493713378906, 27.56743621826172, -13.154762268066406, 24.645153045654297, 2.686260223388672, 1.5179309844970703, 10.763214111328125, 27.337005615234375, 3.0879554748535156, -2.0507640838623047, -12.849456787109375, 14.612159729003906, 2.9042129516601562, -4.86834716796875, 6.369594573974609, -9.230117797851562, 7.500946044921875, 3.7664222717285156, 20.640869140625, 1.4516143798828125, 12.536930084228516, 10.462911605834961, -5.818122863769531, 18.255462646484375, 16.290800094604492, 18.306198120117188, -1.4590396881103516, -15.672782897949219, 15.841255187988281, 22.031333923339844, -9.525192260742188, -1.25177001953125, 14.644813537597656, 15.739128112792969, 28.05010986328125, 10.402572631835938, 11.263542175292969, 7.839088439941406, 3.2016849517822266, -5.3363037109375, 17.626739501953125, 6.9593048095703125, 0.07465744018554688, -2.18475341796875, 0.8585300445556641, 0.4091033935546875, -9.348804473876953, 5.7866973876953125, 0.28412818908691406, 9.709953308105469, 3.8801498413085938, -3.871999740600586, 9.792457580566406, 15.337860107421875, -0.04251861572265625, 4.587102890014648, 3.5493698120117188, 1.3628387451171875, 1.2968597412109375, 25.211605072021484, 3.7502822875976562, -2.6260223388671875, 19.935026168823242, 9.751068115234375, 0.1824951171875, 31.365203857421875, -4.934419631958008, 13.62017822265625, 20.32305145263672, -9.191444396972656, 24.155723571777344, 8.456787109375, 16.062591552734375, 5.217967987060547, 20.86919403076172, 12.396957397460938, 6.02238655090332, -2.15777587890625, 16.708908081054688, -0.7617568969726562, 11.275543212890625, 9.836490631103516, 20.00522232055664, -9.045188903808594, -14.836166381835938, 5.485328674316406, 6.214061737060547, 3.31951904296875, 6.25352668762207, 24.185813903808594, 25.76995086669922, -3.59063720703125, 16.03744125366211, 6.5750274658203125, -2.6778488159179688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000301.npy"}
{"epoch": 0.9121212121212121, "step": 302, "batch_size": 128, "mean": 7.295844078063965, "std": 11.463251113891602, "min": -27.828369140625, "p10": -5.821343994140625, "median": 7.563982009887695, "p90": 22.31513156890869, "max": 46.6781005859375, "pos_frac": 0.71875, "sample": [17.267974853515625, 24.273330688476562, 14.852020263671875, -6.045135498046875, -2.6638565063476562, 12.633773803710938, -0.4440269470214844, 8.697219848632812, -0.7515106201171875, 8.03531265258789, 2.2029953002929688, 46.6781005859375, 8.514572143554688, 19.656570434570312, 17.969335556030273, 12.760406494140625, 8.300006866455078, 3.2465896606445312, -11.992315292358398, 18.176963806152344, 8.347663879394531, 0.8317413330078125, 1.7250499725341797, 24.583362579345703, 8.371980667114258, 0.20704269409179688, -4.4680938720703125, -0.7192878723144531, 16.881887435913086, 11.567371368408203, 9.200485229492188, -1.1106033325195312, -2.4318199157714844, 0.800994873046875, 14.33572006225586, 11.779129028320312, 16.065486907958984, -1.259683609008789, -0.2268524169921875, 14.4365234375, -0.3394775390625, 8.05633544921875, 12.078012466430664, -4.917335510253906, 12.879249572753906, -12.746074676513672, 1.0347938537597656, 5.018272399902344, 4.08489990234375, 18.292587280273438, 12.45733642578125, 8.538982391357422, 29.85118865966797, 16.878150939941406, 5.383918762207031, 1.3034801483154297, 1.8483772277832031, -11.344078063964844, 13.004146575927734, 9.893543243408203, 25.08028221130371, -0.2696704864501953, 23.646636962890625, -5.280811309814453, 13.819793701171875, 13.330169677734375, -0.04366302490234375, -9.327373504638672, 3.9870834350585938, 12.941987991333008, -0.8405494689941406, 7.058074951171875, 20.17730712890625, 6.2057952880859375, 4.886112213134766, 20.375595092773438, -24.009307861328125, -6.0930328369140625, -2.967853546142578, -10.248641967773438, 4.158535003662109, -6.0635223388671875, 22.040822982788086, -3.6202926635742188, -0.9586334228515625, 18.049530029296875, 29.34154510498047, 16.20685577392578, 33.62841033935547, -1.5070152282714844, 16.610885620117188, 6.9850006103515625, 3.477203369140625, 8.818328857421875, 12.252685546875, 9.325302124023438, 27.718544006347656, -9.270469665527344, -1.5120849609375, -0.7699241638183594, 0.7089881896972656, 16.880699157714844, -5.725433349609375, 0.886322021484375, 0.6391429901123047, 9.223125457763672, 3.574268341064453, 2.9943389892578125, 9.679588317871094, -27.828369140625, 2.9254379272460938, -6.892784118652344, 9.138587951660156, 26.056814193725586, 26.826705932617188, 7.0926513671875, 12.83172607421875, 22.955184936523438, 10.098098754882812, 23.41094207763672, 16.466278076171875, 0.6372528076171875, 1.5335712432861328, 17.694061279296875, 17.474084854125977, -2.9949493408203125, -13.741615295410156, 12.440887451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000302.npy"}
{"epoch": 0.9151515151515152, "step": 303, "batch_size": 128, "mean": 6.386440277099609, "std": 10.723404884338379, "min": -17.996131896972656, "p10": -7.2489776611328125, "median": 5.921382904052734, "p90": 20.599365234374993, "max": 30.68994140625, "pos_frac": 0.71875, "sample": [17.600631713867188, 9.623924255371094, -5.103727340698242, 30.68994140625, 4.9549560546875, 13.093666076660156, 8.450454711914062, 10.414535522460938, -2.5641098022460938, 8.456657409667969, 12.523128509521484, 3.054046630859375, -0.2914581298828125, -2.419628143310547, 9.989019393920898, -11.31317138671875, -17.665634155273438, 19.317882537841797, -7.125885009765625, 19.570159912109375, 23.52392578125, 12.152008056640625, 3.1423721313476562, 6.555992126464844, 17.342857360839844, -3.9940338134765625, 24.766700744628906, 6.7086334228515625, 9.905719757080078, -14.55572509765625, -8.520866394042969, 3.9734249114990234, 6.089359283447266, -7.53619384765625, 11.9859619140625, -3.1764373779296875, 8.11062240600586, 1.3014602661132812, -5.5578155517578125, 1.4757881164550781, 2.6529464721679688, -2.7432994842529297, 22.028583526611328, 15.358489990234375, 3.1336097717285156, 10.486465454101562, 11.743583679199219, 6.526031494140625, -2.7816696166992188, -1.0717353820800781, 14.82696533203125, 12.557266235351562, 26.720108032226562, 2.417327880859375, 4.28558349609375, 17.5358829498291, 7.292810440063477, -17.996131896972656, 4.049964904785156, -5.324453353881836, 1.8175697326660156, 17.813674926757812, -8.181671142578125, 9.960552215576172, -4.029273986816406, 24.742477416992188, 5.228290557861328, 28.037796020507812, -2.00384521484375, 2.2177391052246094, 6.764717102050781, 1.5177764892578125, 9.140439987182617, 5.817390441894531, -2.220815658569336, 9.676868438720703, 19.376922607421875, -3.679931640625, -4.014751434326172, 10.938911437988281, -17.789161682128906, 16.07155990600586, 0.987945556640625, 1.933645248413086, 3.7286453247070312, 0.020982742309570312, 14.555862426757812, 10.045097351074219, -7.891017913818359, -7.917713165283203, 22.588794708251953, -3.1939163208007812, 6.479335784912109, 5.307075500488281, -9.8095703125, 13.573686599731445, 1.4144935607910156, 18.901973724365234, 23.085268020629883, -3.511444091796875, 16.000099182128906, 17.155792236328125, 17.580711364746094, -13.350791931152344, -1.5246009826660156, -5.41534423828125, 24.801605224609375, -2.1913414001464844, 10.124673843383789, 12.226005554199219, 8.314529418945312, 2.7476367950439453, 28.225875854492188, 19.98684310913086, 6.0253753662109375, 19.054668426513672, 2.462749481201172, 28.19647979736328, 4.312095642089844, 7.78753662109375, 0.9369277954101562, -9.308860778808594, 3.8046417236328125, 27.543174743652344, 9.129642486572266, 15.573684692382812, 4.0732574462890625, -0.9585361480712891], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000303.npy"}
{"epoch": 0.9181818181818182, "step": 304, "batch_size": 128, "mean": 6.036580562591553, "std": 11.190499305725098, "min": -23.530227661132812, "p10": -7.289920425415039, "median": 4.727694511413574, "p90": 20.97725257873535, "max": 37.57879638671875, "pos_frac": 0.6953125, "sample": [7.047208786010742, -1.2658462524414062, -9.123382568359375, 4.160331726074219, 15.098533630371094, 14.161239624023438, 8.890144348144531, 16.832412719726562, -3.0658111572265625, 3.114320755004883, -0.7553634643554688, 24.695884704589844, -0.67303466796875, 21.4373779296875, 8.355880737304688, 14.005470275878906, 16.517227172851562, 13.766651153564453, 0.5210781097412109, 24.068641662597656, 26.0621337890625, -4.512401580810547, 19.125335693359375, 3.3994369506835938, 37.57879638671875, 20.118446350097656, 0.881378173828125, -4.6769256591796875, 10.366031646728516, 22.759246826171875, -0.34717559814453125, 20.57052230834961, -23.530227661132812, 1.3932266235351562, 2.8285446166992188, 7.111236572265625, 36.865570068359375, 24.390933990478516, 2.0605697631835938, -6.4081878662109375, 18.502418518066406, 10.721519470214844, -1.5412368774414062, 8.958114624023438, -6.201183319091797, -6.453315734863281, 13.282470703125, 12.72960090637207, -7.351371765136719, 3.852548599243164, 1.3333797454833984, 13.482048034667969, 10.074100494384766, -3.5274276733398438, -7.263584136962891, 11.209783554077148, 2.5685958862304688, 9.947036743164062, 11.907783508300781, 21.99654769897461, 0.6695709228515625, -12.98006820678711, -4.8607330322265625, 11.260826110839844, 12.194271087646484, -13.583663940429688, 0.7271137237548828, 14.034370422363281, -0.9427318572998047, 2.264617919921875, 10.690528869628906, -3.1254730224609375, 22.137290954589844, -16.25796127319336, -4.484779357910156, -0.04821968078613281, 2.5859756469726562, -7.474159240722656, -0.9687309265136719, 30.484615325927734, 0.340118408203125, 10.477279663085938, -1.4929656982421875, 5.1243896484375, -13.388931274414062, -1.9366607666015625, 10.482826232910156, -1.4999237060546875, 31.358795166015625, 14.489286422729492, 5.5120697021484375, 5.629676818847656, 2.6967315673828125, 12.811897277832031, 4.395044326782227, 9.092056274414062, 4.526525497436523, -9.350807189941406, 6.575206756591797, 11.725555419921875, 9.210617065429688, 12.559661865234375, 3.98077392578125, 10.046470642089844, 4.928863525390625, -5.787017822265625, -1.051605224609375, 9.108060836791992, 2.7668113708496094, 20.62459945678711, 10.160858154296875, -7.3901214599609375, 1.8098602294921875, 6.793144226074219, 6.293754577636719, -1.724466323852539, -7.5843048095703125, 8.146183013916016, -11.297115325927734, 15.0218505859375, -22.707740783691406, 20.78005599975586, 2.2705078125, -0.849609375, 2.6371631622314453, 13.765464782714844, 1.2207565307617188, 27.004669189453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000304.npy"}
{"epoch": 0.9212121212121213, "step": 305, "batch_size": 128, "mean": 7.815164089202881, "std": 11.930849075317383, "min": -21.566848754882812, "p10": -7.587422180175781, "median": 7.561371803283691, "p90": 23.549733352661132, "max": 45.49371337890625, "pos_frac": 0.765625, "sample": [31.464080810546875, 17.2457275390625, 13.863899230957031, 7.875396728515625, 7.633331298828125, 0.5996608734130859, 14.322090148925781, 17.655006408691406, 24.14649200439453, 9.51534652709961, 2.6509838104248047, 20.41153335571289, -8.291351318359375, 17.171646118164062, 7.3116912841796875, 18.852508544921875, 23.29397964477539, 3.7099761962890625, 5.36309814453125, 12.516862869262695, 5.124397277832031, 1.8936386108398438, 20.010971069335938, 1.9551849365234375, 13.291770935058594, 4.559070587158203, 1.8685302734375, 0.36217498779296875, 11.384117126464844, 5.872394561767578, 6.167869567871094, 0.4896202087402344, -2.397127151489258, -3.7705459594726562, -3.205120086669922, 9.305709838867188, -8.131023406982422, 4.840394973754883, 26.425399780273438, 14.270545959472656, -4.395313262939453, 22.74730682373047, 15.110591888427734, 6.159126281738281, -7.427234649658203, 5.9625701904296875, 14.239540100097656, -1.225046157836914, 3.023862838745117, -7.8062896728515625, 9.127941131591797, 15.67218017578125, 18.541645050048828, -21.566848754882812, 7.489412307739258, 6.611934661865234, 14.994331359863281, 13.257959365844727, 14.48299789428711, -11.290687561035156, 13.404647827148438, 9.641241073608398, 7.1383209228515625, -4.632728576660156, 9.279939651489258, 14.982925415039062, 3.8916168212890625, 30.415695190429688, -0.989471435546875, 14.009822845458984, 2.3033370971679688, 5.805486679077148, 23.16878318786621, -1.8934860229492188, -12.31915283203125, 0.7149658203125, -10.660308837890625, -11.045967102050781, 5.982635498046875, 13.973640441894531, 22.127243041992188, 18.652114868164062, -12.0096435546875, 12.378347396850586, 27.57137680053711, 3.4574012756347656, 11.678955078125, 11.63189697265625, 13.350048065185547, 24.282962799072266, 9.408576965332031, 8.889455795288086, 5.9333343505859375, 1.6646595001220703, -5.012311935424805, -1.8645210266113281, -20.542739868164062, 17.153663635253906, 8.102775573730469, -7.445423126220703, 10.36651611328125, 27.962570190429688, -2.303131103515625, 8.245931625366211, -8.968879699707031, -7.493621826171875, -12.236404418945312, 30.84395980834961, 12.657905578613281, 25.189430236816406, 1.2056846618652344, 45.49371337890625, 1.1681060791015625, 7.659711837768555, 9.677261352539062, -4.6492919921875, 31.65545654296875, 0.2123260498046875, -3.1413002014160156, 8.074810028076172, -10.435806274414062, 7.409156799316406, 36.67432403564453, 2.303577423095703, 8.332206726074219, -6.8321533203125, 32.05133056640625, 15.297554016113281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000305.npy"}
{"epoch": 0.9242424242424242, "step": 306, "batch_size": 128, "mean": 6.879961013793945, "std": 11.120269775390625, "min": -26.561908721923828, "p10": -5.8118505477905265, "median": 5.4394683837890625, "p90": 21.377573776245114, "max": 43.97608184814453, "pos_frac": 0.7421875, "sample": [10.921142578125, 12.321771621704102, -5.632049560546875, 7.0809326171875, 14.039909362792969, 2.9330272674560547, 2.708465576171875, 7.197914123535156, -11.236164093017578, 23.621185302734375, -7.42839241027832, 33.19581604003906, 17.435096740722656, 11.063423156738281, 6.0749664306640625, -6.231386184692383, -2.3885498046875, 23.217529296875, -1.7609939575195312, -3.9401626586914062, -16.297054290771484, 17.692916870117188, 5.21002197265625, 4.255176544189453, 15.068258285522461, 14.05364990234375, 10.830070495605469, 20.319883346557617, 2.4967613220214844, 5.991058349609375, 3.703641891479492, 3.4337921142578125, -4.600521087646484, 8.304351806640625, -5.451351165771484, 21.089767456054688, 7.6694183349609375, 9.248815536499023, -0.12569046020507812, 3.659473419189453, -8.013885498046875, 0.6139202117919922, 16.64124298095703, 2.3729934692382812, 2.43017578125, 18.134185791015625, 4.539312362670898, -3.4825592041015625, 10.189956665039062, 8.111360549926758, -5.601743698120117, 7.8883209228515625, 2.9808387756347656, 0.6409034729003906, 3.986560821533203, 6.1316986083984375, 24.42795181274414, -9.587623596191406, 9.739692687988281, 2.8647918701171875, -2.0578460693359375, 1.7398567199707031, -1.288412094116211, 9.772209167480469, -1.9734382629394531, -2.2880401611328125, 19.57989501953125, 2.3251800537109375, 2.07305908203125, 0.7173919677734375, 18.51812171936035, 13.878425598144531, 16.344955444335938, -3.430267333984375, -1.189788818359375, 11.594898223876953, 8.049110412597656, -9.256149291992188, 5.159431457519531, 1.6354331970214844, -0.7633686065673828, 0.28796958923339844, 0.26880645751953125, 27.56555938720703, -0.4152069091796875, 22.049121856689453, 30.118804931640625, -11.429393768310547, 19.618972778320312, 43.97608184814453, 0.2647285461425781, 4.08148193359375, 28.3343505859375, 7.242454528808594, 3.384103775024414, 12.8719482421875, -3.6572036743164062, 4.847042083740234, 17.682098388671875, 10.118484497070312, 6.088569641113281, 5.668914794921875, 11.944328308105469, 11.151100158691406, -12.298782348632812, 3.3257522583007812, 26.00586700439453, -6.477563858032227, -8.735797882080078, -1.5348358154296875, 5.843547821044922, 22.3223934173584, 32.51226806640625, 4.578987121582031, 18.607192993164062, -7.318443298339844, 30.955520629882812, 13.5404052734375, 17.699356079101562, -3.9182300567626953, 14.990985870361328, 10.139625549316406, 10.897163391113281, -26.561908721923828, 10.107717514038086, 17.13525390625, 3.8292770385742188, 9.031452178955078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000306.npy"}
{"epoch": 0.9272727272727272, "step": 307, "batch_size": 128, "mean": 8.414079666137695, "std": 11.19717025756836, "min": -25.684524536132812, "p10": -3.656970214843749, "median": 7.903270721435547, "p90": 22.491070556640622, "max": 36.663917541503906, "pos_frac": 0.765625, "sample": [-3.3430633544921875, 10.567741394042969, -2.4163436889648438, 4.474601745605469, 12.404289245605469, 10.924652099609375, 16.40283203125, 9.630546569824219, 0.47470664978027344, 5.284608840942383, 24.983169555664062, 9.37139892578125, 0.9407291412353516, -5.54408073425293, -5.2752685546875, -0.9814224243164062, 15.2506103515625, -1.1287288665771484, -1.5975799560546875, -2.2920303344726562, -5.265617370605469, 36.663917541503906, 11.73134994506836, 35.90785217285156, 6.283241271972656, 17.26573944091797, 0.00997161865234375, -2.3437652587890625, 9.548027038574219, 27.549224853515625, 23.591827392578125, 5.629819869995117, -6.53546142578125, 0.04984855651855469, 7.624177932739258, 10.598457336425781, -22.00023651123047, 15.321014404296875, 11.731027603149414, 16.46533203125, 24.87824249267578, 8.122030258178711, 29.964000701904297, 5.951761245727539, -1.2239837646484375, 7.646553039550781, 21.065017700195312, 4.03375244140625, 19.94898223876953, 20.757326126098633, 1.952117919921875, 12.54815673828125, 35.45317077636719, -0.8678836822509766, 24.86455535888672, 10.551055908203125, 6.249944686889648, 22.164474487304688, 9.812564849853516, 34.5438346862793, -0.13871002197265625, 25.695941925048828, 3.3959808349609375, -13.47613525390625, 14.69888687133789, 13.353475570678711, -2.4572296142578125, 14.274986267089844, 13.129859924316406, 7.050056457519531, -0.7049732208251953, -0.5596408843994141, 18.159011840820312, 14.324600219726562, 17.214736938476562, 20.962448120117188, -6.131643295288086, 10.303752899169922, 0.140472412109375, -21.335433959960938, 3.5343704223632812, 19.638416290283203, 2.3482913970947266, 2.0058822631835938, 18.079021453857422, 3.6009674072265625, -25.684524536132812, 12.582927703857422, 14.172691345214844, -5.473014831542969, 7.684511184692383, 6.362815856933594, -4.6707763671875, -0.5271549224853516, -0.6352615356445312, 19.01556396484375, 12.962547302246094, 14.302303314208984, 26.336105346679688, 10.906562805175781, 13.766128540039062, 11.079715728759766, 12.047988891601562, 10.812007904052734, 20.3466796875, -4.3894195556640625, -8.509895324707031, 4.4971160888671875, 16.01519203186035, 5.2851409912109375, 2.4405441284179688, 3.0587539672851562, -1.1561431884765625, 23.253128051757812, 3.7184371948242188, 9.817132949829102, 7.58001708984375, 18.16748046875, 4.401037216186523, 0.400848388671875, 14.285919189453125, 2.946746826171875, 18.94215965270996, 0.20972824096679688, 4.070030212402344, 11.420366287231445, -3.2814788818359375, 14.961372375488281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000307.npy"}
{"epoch": 0.9303030303030303, "step": 308, "batch_size": 128, "mean": 7.529571533203125, "std": 11.688912391662598, "min": -21.92131805419922, "p10": -7.425758361816405, "median": 6.3185272216796875, "p90": 23.178816604614255, "max": 33.50030517578125, "pos_frac": 0.765625, "sample": [13.916351318359375, 5.6440277099609375, 16.242013931274414, 5.116443634033203, 5.241432189941406, 20.204837799072266, 4.791618347167969, -19.190269470214844, 9.85601806640625, 30.71532440185547, 9.612785339355469, 2.174713134765625, 18.777921676635742, -3.2190933227539062, 23.026153564453125, -3.115825653076172, 3.1098709106445312, -3.572284698486328, 7.948966979980469, 19.629127502441406, 17.17010498046875, 3.460540771484375, 1.3210296630859375, 19.541259765625, 1.3886947631835938, 24.11289405822754, 3.270538330078125, 16.01806640625, -2.0117034912109375, 7.862895965576172, -4.0027618408203125, 26.690460205078125, 22.049915313720703, 8.708160400390625, -8.46435546875, 7.546073913574219, -14.715225219726562, 4.948822021484375, 13.772359848022461, -8.59933090209961, 17.994787216186523, 3.087371826171875, -8.27508544921875, 6.560455322265625, 1.356231689453125, 27.559478759765625, 18.58260154724121, 2.768085479736328, 30.610252380371094, 32.53778076171875, 23.535030364990234, 8.46868896484375, 20.561386108398438, -3.7584495544433594, -0.04886627197265625, 2.0438232421875, 2.7639236450195312, 9.273317337036133, 14.95065689086914, 2.3769702911376953, 9.799022674560547, 0.8341827392578125, -21.92131805419922, -17.88922119140625, 22.067771911621094, 7.994422912597656, 3.48565673828125, -2.7273178100585938, 18.434097290039062, -7.155935287475586, 28.760101318359375, 12.342971801757812, 11.713836669921875, -2.1927108764648438, 2.8754501342773438, 33.50030517578125, 5.711517333984375, 32.27622985839844, -4.796112060546875, -2.137899398803711, 3.3621063232421875, 13.321128845214844, 29.63463592529297, 20.777130126953125, -8.204254150390625, 3.692352294921875, -7.8968505859375, 19.054094314575195, -9.267990112304688, 0.8288192749023438, -7.2238616943359375, -10.928741455078125, 6.6937103271484375, 12.757804870605469, 7.83551025390625, 25.962047576904297, 2.7267608642578125, 8.48721694946289, 9.529048919677734, 3.2306442260742188, 5.943401336669922, -12.78912353515625, 8.282520294189453, -14.281484603881836, 26.458145141601562, -3.807647705078125, -1.4350051879882812, 1.2009658813476562, 0.6877193450927734, 20.69232177734375, 6.07659912109375, 7.310279846191406, 15.911102294921875, 0.42218017578125, 7.016153335571289, 12.557220458984375, 3.936492919921875, 17.208328247070312, 2.0293502807617188, 5.630073547363281, 11.19576644897461, 9.674362182617188, 17.624679565429688, -0.0504302978515625, 21.3872013092041, 13.135543823242188, 10.999252319335938, -4.504201889038086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000308.npy"}
{"epoch": 0.9333333333333333, "step": 309, "batch_size": 128, "mean": 6.7162885665893555, "std": 10.365317344665527, "min": -21.750076293945312, "p10": -4.804743194580078, "median": 5.366440773010254, "p90": 20.93260498046875, "max": 37.54505157470703, "pos_frac": 0.78125, "sample": [3.6785812377929688, 1.2635307312011719, -11.3189697265625, 11.175483703613281, 19.786712646484375, 11.441083908081055, -5.5713958740234375, 17.11672019958496, 1.667266845703125, 13.195808410644531, -4.7444305419921875, 0.3499584197998047, 26.430206298828125, -2.2877464294433594, 0.4296092987060547, 20.9288330078125, 2.6063499450683594, 20.577667236328125, 9.638839721679688, 37.54505157470703, 9.495674133300781, -9.957298278808594, -1.4502449035644531, 18.378456115722656, -12.614681243896484, 11.349433898925781, 9.513038635253906, 11.449531555175781, 8.842147827148438, 5.074075698852539, 27.902503967285156, 19.608308792114258, 6.114269256591797, -6.8795013427734375, 7.192939758300781, 8.703140258789062, 3.32073974609375, 3.8941211700439453, 0.8413181304931641, 9.772651672363281, 8.6934814453125, -3.724668502807617, 0.6435909271240234, 14.265304565429688, 4.6912078857421875, 5.658805847167969, 4.180732727050781, -0.16588592529296875, 15.065948486328125, 6.71795654296875, -6.376924514770508, 0.6384067535400391, 27.2606201171875, 12.393882751464844, -14.056079864501953, -18.17791748046875, 10.774345397949219, -2.9422378540039062, 6.293548583984375, 8.006271362304688, 2.354541778564453, 0.9832420349121094, 7.263906478881836, -3.1637344360351562, 35.21417236328125, 24.238685607910156, 3.1901779174804688, 3.9842910766601562, 4.073783874511719, -0.5969009399414062, -2.2682571411132812, -0.2053680419921875, 0.982421875, 1.4775657653808594, 27.018768310546875, 16.772689819335938, 6.487129211425781, 32.951141357421875, 4.555864334106445, 9.922027587890625, 21.176902770996094, 2.9322452545166016, 10.084602355957031, 4.475860595703125, 13.685920715332031, -6.5249176025390625, 11.41494369506836, 8.77081298828125, 10.407752990722656, 16.192012786865234, -4.945472717285156, 18.883724212646484, 7.8070220947265625, 4.710109710693359, 4.04248046875, 2.8180007934570312, 10.753662109375, -1.0964279174804688, 9.815689086914062, 6.165863037109375, 12.27700424194336, 7.959136962890625, 23.557188034057617, 5.819133758544922, 1.4454803466796875, 25.120990753173828, -0.15215301513671875, 8.880897521972656, 10.613685607910156, 0.5531196594238281, 3.5879974365234375, 0.7359619140625, -4.012453079223633, -0.4131145477294922, 0.06102561950683594, -8.544792175292969, 1.8429641723632812, -5.349082946777344, 0.3540840148925781, -21.750076293945312, 2.9899024963378906, 14.296051025390625, 7.845920562744141, 20.94140625, 9.482833862304688, 27.82898712158203, -1.534637451171875, 4.140316009521484], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000309.npy"}
{"epoch": 0.9363636363636364, "step": 310, "batch_size": 128, "mean": 7.466680526733398, "std": 11.238346099853516, "min": -20.6158447265625, "p10": -4.96348991394043, "median": 6.7861175537109375, "p90": 22.73809585571289, "max": 34.787513732910156, "pos_frac": 0.734375, "sample": [10.679954528808594, -0.9133758544921875, 2.698436737060547, 13.636226654052734, 34.787513732910156, 21.181167602539062, 22.89051055908203, -4.938556671142578, 14.039688110351562, 6.7908477783203125, 3.830986976623535, 1.765045166015625, -7.064178466796875, 28.357467651367188, 3.3380203247070312, -7.296302795410156, -4.852382659912109, 9.1441650390625, 23.427013397216797, 25.616287231445312, 6.7813873291015625, 26.12427520751953, 26.199344635009766, -13.359443664550781, 8.80228042602539, 1.970245361328125, 4.379600524902344, 6.411037445068359, 10.330163955688477, 12.95068359375, 21.90753173828125, -5.6966094970703125, -16.67310333251953, 20.37841796875, 22.23971939086914, 9.776294708251953, 18.7706298828125, -3.285137176513672, 2.8062286376953125, 0.8870124816894531, -3.439401626586914, 14.806053161621094, 18.723651885986328, 14.7418212890625, 0.5056076049804688, -2.529153823852539, 1.6844406127929688, -20.6158447265625, 11.037065505981445, 24.464340209960938, 6.890445709228516, -1.0166873931884766, 17.699813842773438, 17.503921508789062, 11.349586486816406, 7.709232330322266, 8.931556701660156, 15.138660430908203, -2.5612049102783203, 1.740774154663086, -6.550159454345703, 14.061225891113281, 3.7937164306640625, 7.815574645996094, 3.7152786254882812, -5.02166748046875, -6.269508361816406, 24.883148193359375, -17.859939575195312, -1.2494163513183594, 1.6524505615234375, 22.470535278320312, 14.241859436035156, 0.3143482208251953, -0.88165283203125, -1.0402297973632812, -8.268829345703125, 6.1546630859375, -20.409027099609375, 25.144119262695312, -1.6045360565185547, 3.553842544555664, -0.0193023681640625, 1.9571075439453125, 0.6155853271484375, 10.499954223632812, 15.275886535644531, 4.97955322265625, 0.33377838134765625, 10.380970001220703, 0.5699577331542969, 27.380477905273438, -3.357177734375, -1.6545352935791016, -1.7566814422607422, -1.7719001770019531, 3.767608642578125, 17.597082138061523, 17.5594482421875, 27.013195037841797, 1.968719482421875, 15.438995361328125, 3.959125518798828, 22.672775268554688, -15.045654296875, 19.451210021972656, -0.5626487731933594, -2.5798110961914062, 30.913482666015625, -3.9664230346679688, 11.747909545898438, 15.407661437988281, 16.471712112426758, 4.7586212158203125, 11.844442367553711, 1.9133415222167969, 7.218671798706055, -3.6364974975585938, 21.87970733642578, 19.474510192871094, 9.804349899291992, 9.485126495361328, 16.287168502807617, 9.81097412109375, 13.859010696411133, 11.94915771484375, 13.483734130859375, 2.125185012817383], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000310.npy"}
{"epoch": 0.9393939393939394, "step": 311, "batch_size": 128, "mean": 6.980600357055664, "std": 10.663087844848633, "min": -21.74688720703125, "p10": -5.260951423645019, "median": 6.735271453857422, "p90": 21.730005836486818, "max": 38.99241638183594, "pos_frac": 0.7265625, "sample": [-3.7456626892089844, 9.147628784179688, 7.522163391113281, -4.666065216064453, -0.0876922607421875, -21.74688720703125, 10.603927612304688, 20.6192626953125, 8.259027481079102, 11.141204833984375, 8.928070068359375, 22.973770141601562, -0.24872589111328125, 0.8227081298828125, 15.9959716796875, -5.945652008056641, 0.0668792724609375, 16.620635986328125, 11.665760040283203, -0.9199295043945312, 6.498147964477539, 4.498991012573242, 9.407848358154297, -1.719146728515625, -10.113174438476562, 8.388631820678711, -0.33016395568847656, 22.768381118774414, 13.912071228027344, 0.34107208251953125, 9.930084228515625, -1.4257240295410156, -2.9535465240478516, 0.38994789123535156, 9.709419250488281, 17.802993774414062, 14.664379119873047, -1.3351192474365234, 27.907075881958008, 10.973251342773438, 21.109962463378906, 15.237911224365234, 7.4230194091796875, -11.864837646484375, 13.647125244140625, -0.7847728729248047, 31.412200927734375, 9.438446044921875, -5.55390739440918, 3.575878143310547, 18.284889221191406, 4.65521240234375, 1.6582794189453125, -5.665712356567383, 7.803274154663086, -5.135398864746094, -8.13670539855957, 20.573516845703125, -13.789005279541016, 8.490577697753906, 8.91455078125, 3.1338157653808594, 5.571989059448242, 18.01458740234375, 12.761131286621094, 5.618915557861328, 3.3514328002929688, -9.176620483398438, 10.060441970825195, 11.722137451171875, -0.7091217041015625, 12.851188659667969, 5.716651916503906, 14.093883514404297, 8.506851196289062, 25.282241821289062, 9.697919845581055, 19.9826602935791, -3.6304492950439453, 6.899253845214844, 7.538448333740234, -0.11494064331054688, 5.1529083251953125, 1.8977699279785156, 22.921478271484375, 23.155967712402344, 2.6734771728515625, 0.03562164306640625, 0.505950927734375, 12.28487777709961, 25.00564956665039, -1.3085441589355469, 1.4743919372558594, -0.34569549560546875, 3.9542903900146484, -1.5171585083007812, -3.2682037353515625, 0.90509033203125, 29.23352813720703, 8.406726837158203, -0.3186759948730469, -2.9334030151367188, 21.722148895263672, -0.377288818359375, 29.18603515625, 1.4560699462890625, 9.14764404296875, -6.749671936035156, 2.1636009216308594, 13.856441497802734, 38.99241638183594, -12.5218505859375, 21.54443359375, 11.760406494140625, 3.0215988159179688, 1.8971118927001953, 7.9093017578125, 15.911273956298828, 6.5712890625, -18.014022827148438, 29.391754150390625, 5.315093994140625, -9.26829719543457, 3.2602691650390625, 21.74833869934082, 8.23977279663086, 15.284019470214844, 9.364166259765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000311.npy"}
{"epoch": 0.9424242424242424, "step": 312, "batch_size": 128, "mean": 7.98387336730957, "std": 10.951324462890625, "min": -21.126934051513672, "p10": -4.717486000061035, "median": 7.463300704956055, "p90": 21.290604400634763, "max": 38.27375030517578, "pos_frac": 0.75, "sample": [13.876510620117188, 1.4160308837890625, -4.800413131713867, -11.804628372192383, 8.799400329589844, 13.807144165039062, 10.418502807617188, 12.081001281738281, -7.852653503417969, 17.05744171142578, 13.482643127441406, 7.234771728515625, 1.9996528625488281, 5.68317985534668, 23.9354248046875, -7.126422882080078, 12.254409790039062, 17.5841064453125, -1.8504638671875, -3.7953128814697266, 13.72830581665039, 4.964576721191406, 16.118133544921875, 20.259658813476562, 6.424509048461914, 13.432086944580078, 11.744583129882812, 35.22782897949219, 2.0161285400390625, 16.988235473632812, 0.7519989013671875, 5.154548645019531, 14.086517333984375, 15.862613677978516, -3.6470298767089844, 0.5252552032470703, 10.947517395019531, 10.885665893554688, -2.090362548828125, 28.492950439453125, -10.322547912597656, 15.207454681396484, 10.891448974609375, 10.2452392578125, -4.68194580078125, 12.9036865234375, 22.116912841796875, 38.27375030517578, 17.988037109375, 30.231590270996094, 1.1561260223388672, -0.7309627532958984, -1.3642158508300781, 2.1050853729248047, -8.417404174804688, 7.411577224731445, -0.069580078125, 1.5947608947753906, 11.4100341796875, -0.7926807403564453, 17.61974334716797, 1.7091903686523438, -2.250946044921875, -13.697891235351562, -7.9173736572265625, -5.026145935058594, 17.245832443237305, 14.31072998046875, 27.219482421875, 21.57849884033203, -0.4583473205566406, 2.0757598876953125, 6.1740570068359375, 27.06524658203125, 17.587356567382812, -7.154457092285156, 0.12183761596679688, 2.6046066284179688, -4.443769454956055, 17.27576446533203, 12.694091796875, 16.427200317382812, 7.515024185180664, -3.337697982788086, 25.589624404907227, 16.643157958984375, 38.15611267089844, 15.363788604736328, -2.949678421020508, 13.631561279296875, 16.079345703125, 6.439035415649414, 2.4996604919433594, 1.1578922271728516, -21.126934051513672, 14.097719192504883, -2.4669113159179688, -2.8328857421875, 11.718978881835938, 4.778036117553711, 5.695528030395508, 2.7164878845214844, 16.538612365722656, 10.906021118164062, -2.097402572631836, 13.64963150024414, -0.16493988037109375, -10.824333190917969, 6.121856689453125, 5.286445617675781, 15.019058227539062, 28.638412475585938, 16.669544219970703, 8.527385711669922, 10.0103759765625, 8.158920288085938, -2.1517715454101562, 16.66168975830078, 24.122222900390625, 3.197772979736328, 3.4209136962890625, 0.77703857421875, 9.825531005859375, 1.0282516479492188, -10.163158416748047, 1.8993339538574219, 21.167221069335938, 16.152359008789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000312.npy"}
{"epoch": 0.9454545454545454, "step": 313, "batch_size": 128, "mean": 6.924525260925293, "std": 11.950004577636719, "min": -18.085693359375, "p10": -7.492190551757812, "median": 6.073019981384277, "p90": 24.014691162109372, "max": 45.51483154296875, "pos_frac": 0.6953125, "sample": [7.938146591186523, 9.067008972167969, -0.09548568725585938, 4.1142730712890625, 0.34527587890625, 15.85284423828125, -1.2258796691894531, 0.5257186889648438, 20.640548706054688, 10.212203979492188, -4.214054107666016, 1.2391014099121094, 13.458114624023438, 13.912727355957031, 2.7956314086914062, -18.085693359375, 2.5693359375, -9.360248565673828, 8.3133544921875, -1.4409046173095703, -4.495765686035156, 22.643798828125, 3.2368927001953125, -1.2805633544921875, -6.226345062255859, 6.828041076660156, -4.881660461425781, -14.98101806640625, -15.944986343383789, 26.45003890991211, 5.93147087097168, -1.5119400024414062, 9.319549560546875, 22.572479248046875, 3.8782958984375, -4.511396408081055, 11.130645751953125, -3.7546730041503906, 22.09326171875, 6.4415283203125, 2.4211349487304688, 10.650350570678711, 10.527786254882812, 15.308975219726562, -0.6541404724121094, 5.119998931884766, -3.709430694580078, 13.447990417480469, 13.898345947265625, 26.254867553710938, 12.248916625976562, 6.131254196166992, 10.309577941894531, -0.7577095031738281, -8.580806732177734, -8.588333129882812, 11.757110595703125, 3.2887840270996094, 2.660938262939453, -7.961750030517578, 4.473232269287109, 6.505496978759766, 10.58120346069336, 9.239906311035156, 20.0595703125, 4.4286651611328125, 11.241615295410156, -7.403961181640625, 45.51483154296875, 25.4836483001709, 32.219085693359375, -9.852331161499023, 16.88849639892578, -4.530740737915039, 23.45519256591797, 2.366548538208008, -14.911785125732422, 6.0147857666015625, 3.815916061401367, 27.713104248046875, 30.587753295898438, 18.615432739257812, -0.8783187866210938, -10.597766876220703, -3.4186019897460938, 8.897993087768555, -4.341625213623047, 11.139747619628906, 10.297311782836914, 8.077167510986328, 25.962234497070312, 10.667892456054688, -9.609262466430664, 25.627197265625, -6.617898941040039, 22.863311767578125, 6.873870849609375, 22.077489852905273, -3.965909957885742, -2.6313018798828125, 0.9417877197265625, 0.09996604919433594, -7.69805908203125, 27.10120391845703, 5.05345344543457, 0.5694751739501953, 7.683347702026367, 16.65362548828125, -6.0921783447265625, 39.292755126953125, 15.068981170654297, -0.11415672302246094, 10.270992279052734, 3.592731475830078, 23.701278686523438, 2.658956527709961, -12.276458740234375, 7.545497894287109, 11.092838287353516, 14.715545654296875, 3.650918960571289, 27.790672302246094, 10.103059768676758, 16.74272918701172, 24.745986938476562, -6.244302749633789, 7.64080810546875, -4.154884338378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000313.npy"}
{"epoch": 0.9484848484848485, "step": 314, "batch_size": 128, "mean": 8.825431823730469, "std": 12.801202774047852, "min": -32.65733337402344, "p10": -6.926908111572265, "median": 8.78945541381836, "p90": 24.089513778686523, "max": 41.550537109375, "pos_frac": 0.75, "sample": [6.107917785644531, 6.160129547119141, 12.407306671142578, 11.966567993164062, 6.311248779296875, 18.650775909423828, 23.710281372070312, 27.972572326660156, -5.4148101806640625, 1.8376541137695312, 41.550537109375, 8.827110290527344, 13.646629333496094, 5.858661651611328, 5.872562408447266, 9.001190185546875, 12.233734130859375, 1.8750686645507812, -0.9665679931640625, 5.360801696777344, -2.0744400024414062, 2.8342208862304688, -0.8352909088134766, 20.27823257446289, 7.694618225097656, 21.805770874023438, 8.109382629394531, -12.666099548339844, 0.40581512451171875, 12.483453750610352, 24.052051544189453, 10.4732666015625, 34.978515625, -0.0372467041015625, -3.802959442138672, 26.00811767578125, 31.504928588867188, 8.885063171386719, 9.593080520629883, -14.875137329101562, -1.73345947265625, -6.487968444824219, 22.748594284057617, -10.243896484375, 21.844329833984375, -0.9861106872558594, -1.7185192108154297, 29.4090576171875, 23.650054931640625, -0.6526355743408203, -16.36865997314453, -6.857170104980469, -5.881950378417969, 4.638206481933594, -0.123687744140625, 14.093912124633789, 4.250129699707031, -13.256538391113281, 17.639205932617188, 1.2398300170898438, 15.796815872192383, 21.50145149230957, 22.436811447143555, 31.560333251953125, 32.171112060546875, 0.99945068359375, 24.176925659179688, 12.802339553833008, -11.185903549194336, -10.02325439453125, 21.40496826171875, -2.163970947265625, -32.65733337402344, -19.547569274902344, 9.582763671875, 3.49774169921875, 23.57135772705078, 4.515960693359375, 5.0399017333984375, -8.78173828125, -0.02277374267578125, 26.016117095947266, 5.205249786376953, 17.735626220703125, 2.4495925903320312, 2.1876792907714844, 20.97913360595703, -5.583793640136719, 17.16033935546875, 18.198305130004883, 8.182579040527344, -8.447196960449219, 5.169029235839844, -19.740699768066406, 14.531740188598633, 19.250587463378906, 19.842466354370117, 11.161767959594727, 30.483692169189453, 28.07794952392578, 14.4884033203125, 12.499227523803711, 5.231842041015625, 13.490962982177734, 17.46051025390625, 9.942070007324219, 2.6507034301757812, 19.931182861328125, 1.1703567504882812, 23.394081115722656, 18.50582504272461, 23.590133666992188, 13.74185562133789, 8.532661437988281, 10.109596252441406, 17.7126522064209, 10.774826049804688, 8.751800537109375, 26.30804443359375, 16.828842163085938, 7.2812957763671875, 12.891166687011719, 1.6242218017578125, 1.6824111938476562, -1.8916187286376953, 11.539093017578125, -2.0203323364257812, -7.089630126953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000314.npy"}
{"epoch": 0.9515151515151515, "step": 315, "batch_size": 128, "mean": 7.53550386428833, "std": 10.290609359741211, "min": -12.311649322509766, "p10": -5.655188751220703, "median": 5.826208114624023, "p90": 22.24126739501953, "max": 33.8485107421875, "pos_frac": 0.734375, "sample": [-3.2996902465820312, 3.3876495361328125, -6.339332580566406, 12.363048553466797, 8.820261001586914, 3.021728515625, -1.4223785400390625, 11.677789688110352, 9.92135238647461, 0.4501838684082031, -0.361724853515625, 15.225387573242188, -7.9311676025390625, 2.55999755859375, -4.117387771606445, -4.32819938659668, 2.6098861694335938, 10.401302337646484, 12.278182983398438, 9.536102294921875, 2.8790245056152344, 1.6894378662109375, 2.192371368408203, -0.8424320220947266, 5.454626083374023, 20.509057998657227, 22.425853729248047, -0.024127960205078125, 24.970855712890625, 3.1801986694335938, 11.984588623046875, 22.53802490234375, -5.670265197753906, 9.736549377441406, 2.7061614990234375, 5.288917541503906, 5.175498962402344, 23.026260375976562, -3.310688018798828, -7.241981506347656, -1.4815692901611328, 5.884239196777344, 26.509292602539062, 17.3128662109375, -1.442758560180664, 19.78234100341797, 7.6843109130859375, -1.2641983032226562, -7.1100311279296875, 0.008884429931640625, 31.37896728515625, 4.891820907592773, 3.6407699584960938, -0.0479278564453125, 3.281097412109375, 9.122737884521484, 4.421180725097656, 17.385040283203125, 22.898643493652344, 23.39105987548828, -2.0267410278320312, 18.081321716308594, -6.8825531005859375, 6.993247985839844, -11.875246047973633, 8.266372680664062, -1.6892166137695312, 1.3423004150390625, 21.614303588867188, 10.472747802734375, 10.583118438720703, 4.272197723388672, -12.311649322509766, 10.12269401550293, 14.022804260253906, 12.060546875, 4.413665771484375, 8.025630950927734, 11.952247619628906, 18.85516357421875, -3.477062225341797, 27.387306213378906, 11.638931274414062, 2.9194717407226562, 20.24970245361328, 0.3055915832519531, 11.43695068359375, 7.410285949707031, 7.199146270751953, -5.6487274169921875, 0.8087615966796875, -3.833171844482422, 3.0461196899414062, 21.8349609375, 9.211402893066406, 3.9954376220703125, 30.198532104492188, 16.822433471679688, -0.8029460906982422, 18.607471466064453, -5.747043609619141, 22.162158966064453, -6.247486114501953, 22.822134017944336, 20.78271484375, 12.608009338378906, -6.903007507324219, 6.93665885925293, -10.463310241699219, 8.928268432617188, -0.362518310546875, 13.532127380371094, 33.33668518066406, 11.669668197631836, 15.912590026855469, -6.860189437866211, 33.8485107421875, 15.24853515625, 15.933151245117188, 0.8274993896484375, 2.374889373779297, 3.209156036376953, 13.940681457519531, 21.36734390258789, -5.115177154541016, 13.767852783203125, 5.768177032470703, -1.7048664093017578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000315.npy"}
{"epoch": 0.9545454545454546, "step": 316, "batch_size": 128, "mean": 9.417740821838379, "std": 12.822270393371582, "min": -20.403472900390625, "p10": -6.342971038818359, "median": 7.9586286544799805, "p90": 26.429478454589844, "max": 42.38572692871094, "pos_frac": 0.78125, "sample": [11.767351150512695, 11.144683837890625, 8.544937133789062, 18.36449432373047, 8.749847412109375, 29.593505859375, 20.786346435546875, -1.604461669921875, -0.13479232788085938, 4.3039093017578125, 12.953315734863281, 42.38572692871094, 26.742752075195312, -6.055549621582031, 39.554901123046875, 24.725845336914062, -14.656036376953125, 4.206632614135742, 9.502132415771484, 25.31414794921875, 23.26663589477539, 0.2658843994140625, 38.9967041015625, 23.763168334960938, -2.6202392578125, 15.650634765625, -17.019546508789062, 11.415496826171875, 8.259292602539062, 26.810935974121094, -3.5118045806884766, 26.405441284179688, -1.4133033752441406, -2.9539337158203125, -13.545989990234375, -9.067522048950195, 7.854419708251953, 6.747428894042969, -1.786355972290039, 4.866115570068359, 19.15264892578125, 13.448448181152344, 0.6155548095703125, 38.849395751953125, -6.22406005859375, 8.062837600708008, 3.634868621826172, 11.039588928222656, 8.585464477539062, -9.284774780273438, 6.566497802734375, 11.149057388305664, -6.620429992675781, -14.0728759765625, 6.298959732055664, 4.963859558105469, 5.416820526123047, 6.7073516845703125, 4.24810791015625, 27.915771484375, 15.258342742919922, 23.18743896484375, 10.060894012451172, 27.086349487304688, 18.17141342163086, 19.903350830078125, 6.0337066650390625, 4.62065315246582, 6.1695709228515625, 26.06090545654297, -4.29817008972168, 14.10293197631836, 20.149261474609375, -2.727275848388672, 21.318809509277344, -19.53236961364746, 2.8579559326171875, 0.8708915710449219, -10.504758834838867, 4.882164001464844, 5.45361328125, -5.478065490722656, 14.335502624511719, 19.591495513916016, 10.792266845703125, 7.698640823364258, 18.8055419921875, 14.461889266967773, 22.277488708496094, 22.557912826538086, 28.2589111328125, -20.403472900390625, 6.841392517089844, 21.54755401611328, -6.976787567138672, -0.19078826904296875, 1.9524459838867188, 2.0950927734375, 2.560943603515625, -8.403732299804688, 14.423065185546875, 1.5701885223388672, -0.17038345336914062, 26.485565185546875, 3.6701812744140625, 23.31147003173828, 4.877227783203125, 8.348316192626953, 3.857088088989258, 8.950233459472656, 16.643646240234375, 26.803131103515625, 23.53160858154297, 22.46481704711914, 4.467674255371094, -0.5402622222900391, 1.7351493835449219, 20.800682067871094, 13.838218688964844, 6.2946624755859375, 24.90465545654297, 20.942398071289062, 27.844711303710938, 8.982593536376953, 2.0274887084960938, 1.5186271667480469, -12.921756744384766, 0.33371734619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000316.npy"}
{"epoch": 0.9575757575757575, "step": 317, "batch_size": 128, "mean": 5.920985221862793, "std": 12.059452056884766, "min": -21.856101989746094, "p10": -7.288861656188964, "median": 4.045978546142578, "p90": 22.112305450439454, "max": 41.70378112792969, "pos_frac": 0.6484375, "sample": [-5.415248870849609, 3.5131893157958984, 7.121772766113281, 7.21356201171875, 8.109786987304688, 11.481307983398438, 9.402023315429688, 7.8516693115234375, -8.910621643066406, -3.11895751953125, 20.16474151611328, -1.3195037841796875, 4.301570892333984, 0.6594104766845703, 19.437530517578125, 20.1053466796875, 20.30866050720215, 13.810592651367188, -15.126415252685547, 4.975826263427734, -9.951835632324219, -5.87005615234375, -1.1573009490966797, -9.715652465820312, 17.708839416503906, 22.144920349121094, 28.364185333251953, 17.900728225708008, 18.99520492553711, 29.11334228515625, 3.4355525970458984, -7.104696273803711, 4.369052886962891, 4.121025085449219, 1.4854507446289062, 3.786649703979492, 30.475608825683594, 7.878950119018555, -1.7166786193847656, 12.196487426757812, 17.774423599243164, 11.29119873046875, -4.349725723266602, 7.776500701904297, 4.863424301147461, 20.41919708251953, 12.506362915039062, 2.3835010528564453, -2.2268218994140625, 1.6804237365722656, -0.4632148742675781, 41.70378112792969, 6.6744842529296875, 0.6497669219970703, 10.45956039428711, -6.922657012939453, -4.623741149902344, 1.174713134765625, 22.09832763671875, 29.583736419677734, 32.669227600097656, -6.080879211425781, 25.090240478515625, 18.879287719726562, 2.6957550048828125, -14.854759216308594, -4.485496520996094, 24.292037963867188, 13.255912780761719, -0.8851070404052734, 5.5851593017578125, -0.9771709442138672, 13.461851119995117, 0.7143058776855469, -6.942474365234375, 1.5568218231201172, -14.35980224609375, -3.3526363372802734, 7.406402587890625, -21.856101989746094, -0.23933029174804688, -10.779022216796875, 19.769563674926758, -0.9839038848876953, -10.501201629638672, -16.191913604736328, 37.32817840576172, 16.14308738708496, 31.408340454101562, 3.8631725311279297, -2.699615478515625, -0.43213653564453125, 11.033744812011719, 2.0340576171875, 7.031217575073242, -2.8310165405273438, -3.1734390258789062, -2.5416393280029297, 2.3618850708007812, -0.5600433349609375, -7.126962661743164, -2.5120182037353516, 2.116039276123047, 3.4440689086914062, 23.365692138671875, -0.27392005920410156, 15.467216491699219, 12.861980438232422, 24.3470458984375, -7.6666259765625, -4.322029113769531, 6.426361083984375, 15.984359741210938, 0.113128662109375, -9.475784301757812, 7.232398986816406, 6.496917724609375, -15.246627807617188, -3.8982372283935547, 6.203821182250977, 22.069278717041016, 14.778251647949219, 10.970367431640625, 8.3004150390625, -5.245994567871094, 7.9143524169921875, 3.9709320068359375, 6.259803771972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000317.npy"}
{"epoch": 0.9606060606060606, "step": 318, "batch_size": 128, "mean": 7.150138854980469, "std": 11.64959716796875, "min": -21.95079803466797, "p10": -7.771471214294434, "median": 6.552562713623047, "p90": 19.597382354736325, "max": 43.937713623046875, "pos_frac": 0.75, "sample": [-1.3606719970703125, 17.4527645111084, -7.701967239379883, -1.9353523254394531, 28.781448364257812, 3.071075439453125, -8.19412612915039, -17.4781551361084, 14.847814559936523, 2.397167205810547, 18.70428466796875, 22.55846405029297, 5.273345947265625, -7.933647155761719, -3.5808868408203125, 32.825801849365234, 9.03628921508789, 10.895118713378906, 3.3282089233398438, 4.175727844238281, 14.20611572265625, 2.591888427734375, 19.865463256835938, -13.140487670898438, 8.141098022460938, 9.623870849609375, -0.4746284484863281, 39.16175842285156, 1.525787353515625, 6.054840087890625, -11.061141967773438, -3.94207763671875, -12.3936767578125, 17.910343170166016, 2.9446773529052734, 19.48249053955078, 2.383098602294922, -4.8267059326171875, 12.329879760742188, 19.25749969482422, -1.2015457153320312, -7.074981689453125, 15.8333740234375, -0.8295707702636719, -3.8240623474121094, 11.471736907958984, 3.894287109375, -13.253501892089844, 8.83148193359375, 19.069915771484375, -6.781976699829102, 31.342323303222656, 10.959991455078125, -9.835330963134766, 17.20269775390625, 10.813262939453125, 8.7689208984375, 0.12511444091796875, 1.8889007568359375, 14.789819717407227, -16.60137939453125, 13.497713088989258, 9.611099243164062, -0.09832191467285156, 18.764266967773438, 0.21867752075195312, 1.2881717681884766, 1.3183135986328125, 12.29183578491211, 8.285591125488281, 14.604637145996094, -5.523090362548828, 3.4443130493164062, 2.04644775390625, 4.844856262207031, 9.608448028564453, 6.207878112792969, 10.056427001953125, 0.7714481353759766, 15.502681732177734, 31.513084411621094, 13.4906005859375, -9.12509536743164, 13.076786041259766, 4.871849060058594, 12.970626831054688, -7.698432922363281, 13.721588134765625, 3.026296615600586, 14.536346435546875, 7.7287445068359375, 3.7850875854492188, 20.349136352539062, 24.23334503173828, 0.157928466796875, -11.907676696777344, 4.7370147705078125, 25.395732879638672, 9.273872375488281, -3.6176509857177734, 19.17315673828125, 14.4068603515625, 8.990592956542969, 3.1602554321289062, 4.131940841674805, 9.585233688354492, 43.937713623046875, 3.441509246826172, 2.0115833282470703, 14.920059204101562, 6.897247314453125, 13.165925979614258, 13.657562255859375, 19.44443130493164, -0.09094810485839844, 27.47967529296875, 4.8280487060546875, -21.95079803466797, 29.696975708007812, 11.2166748046875, -8.794296264648438, -2.5562610626220703, 2.1920719146728516, 16.188583374023438, 8.547481536865234, -0.8694992065429688, 6.973320007324219, 17.779800415039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000318.npy"}
{"epoch": 0.9636363636363636, "step": 319, "batch_size": 128, "mean": 7.147370338439941, "std": 11.38214111328125, "min": -20.092727661132812, "p10": -4.718228912353515, "median": 6.029579162597656, "p90": 24.343730163574218, "max": 35.21977996826172, "pos_frac": 0.7265625, "sample": [3.0259857177734375, 18.477630615234375, 5.6190643310546875, 30.586502075195312, -1.7302970886230469, 4.098785400390625, -3.1615219116210938, 8.208206176757812, 0.4574317932128906, 11.713722229003906, 30.902145385742188, 0.5950164794921875, 5.0878448486328125, 13.463333129882812, 9.052474975585938, 3.462799072265625, -19.488311767578125, 8.6114501953125, -5.6341705322265625, 11.35540771484375, -6.376899719238281, 2.6064453125, 0.5542449951171875, 15.142276763916016, -0.4462394714355469, 6.473634719848633, 8.780426025390625, 35.21977996826172, 0.9542713165283203, 9.867935180664062, 2.187652587890625, 2.0104598999023438, 16.4642333984375, 4.794013977050781, -1.225942611694336, 13.317405700683594, 6.95258903503418, -11.557060241699219, 12.198745727539062, 9.26163101196289, 1.09771728515625, 14.771903991699219, -2.7073631286621094, 26.233184814453125, 30.923995971679688, 28.814544677734375, 29.681991577148438, -4.55975341796875, 6.0830078125, 23.6744384765625, -0.6436767578125, 5.211851119995117, 8.951644897460938, 7.531063079833984, -3.8277359008789062, 4.250640869140625, 22.356101989746094, 6.873376846313477, -0.4997978210449219, -14.598358154296875, 10.43648910522461, 26.2794189453125, -0.8157196044921875, 7.565814971923828, -13.086349487304688, 13.284610748291016, 16.943023681640625, 2.6612911224365234, -18.13751220703125, -4.004951477050781, 30.936447143554688, 8.39874267578125, -7.083484649658203, -13.248222351074219, 27.00621795654297, -7.2066650390625, 11.0897216796875, -2.0099563598632812, 6.85394287109375, 7.4351043701171875, 18.457855224609375, 26.43535614013672, 12.23394775390625, 1.4872150421142578, 3.0454254150390625, 15.928115844726562, 0.6229095458984375, 14.430747985839844, -1.4767227172851562, 5.9761505126953125, -2.2997608184814453, 9.91817855834961, 14.06298828125, 4.364959716796875, -0.6671218872070312, 17.164569854736328, 3.8060035705566406, -2.15753173828125, 0.8031349182128906, -5.088005065917969, 0.902435302734375, -2.2763519287109375, 1.5467376708984375, 14.5367431640625, -20.092727661132812, 24.861541748046875, 13.018058776855469, 20.322402954101562, -1.1833457946777344, 9.650516510009766, 6.636234283447266, 17.54810333251953, 13.542182922363281, 19.956897735595703, 12.640769958496094, 1.2299957275390625, 24.121810913085938, -4.071758270263672, -2.8456192016601562, 21.465530395507812, 1.3086509704589844, 25.65582275390625, 23.082477569580078, -0.6893959045410156, -8.928688049316406, -1.5581016540527344, 6.8571319580078125, 5.777097702026367], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000319.npy"}
{"epoch": 0.9666666666666667, "step": 320, "batch_size": 128, "mean": 7.208101272583008, "std": 11.32276725769043, "min": -23.475936889648438, "p10": -7.3963462829589846, "median": 6.785915374755859, "p90": 18.910615158081054, "max": 37.590087890625, "pos_frac": 0.78125, "sample": [19.36858558654785, 16.015518188476562, 8.991119384765625, 4.307216644287109, 6.606391906738281, 16.89978790283203, 4.164054870605469, -0.08544158935546875, 7.65931510925293, 0.20690536499023438, -5.94866943359375, 4.9417724609375, 8.575397491455078, 18.316238403320312, 1.6657352447509766, 8.636993408203125, 1.718587875366211, 8.970680236816406, -0.6880092620849609, 4.081939697265625, 16.01116180419922, 1.5357437133789062, 37.590087890625, 13.632568359375, 13.454826354980469, 1.814849853515625, 14.365570068359375, 24.47346305847168, 4.91302490234375, 7.383644104003906, 2.138885498046875, -9.982870101928711, 1.0316581726074219, 16.066978454589844, 3.9620361328125, 12.592870712280273, 33.492218017578125, 3.8396224975585938, 12.298049926757812, 1.2687911987304688, -3.205516815185547, 16.215423583984375, 12.181968688964844, 17.12610626220703, -7.36962890625, 21.438949584960938, 23.754188537597656, 0.0036449432373046875, 11.836788177490234, -3.315948486328125, 22.066268920898438, 12.369796752929688, 1.776824951171875, 2.112010955810547, 6.627655029296875, -0.043437957763671875, 5.047023773193359, -12.876426696777344, -0.61944580078125, 31.15579605102539, -0.6012325286865234, 17.050628662109375, 19.623586654663086, 16.841224670410156, 16.155929565429688, 5.705629348754883, 11.176254272460938, -19.463191986083984, 1.9411773681640625, 10.153366088867188, 17.416109085083008, 10.801898956298828, 8.23849868774414, 10.067203521728516, -11.379985809326172, 33.47441864013672, -23.475936889648438, 6.862190246582031, 12.113761901855469, -1.0855636596679688, 12.097183227539062, 17.783157348632812, 15.340858459472656, -2.9161815643310547, -9.388053894042969, 1.479827880859375, 5.74530029296875, 2.0287933349609375, 1.6676750183105469, 16.77855682373047, 11.882820129394531, 29.672714233398438, 4.025047302246094, 17.053741455078125, -3.66302490234375, 0.5700721740722656, 14.8331298828125, 16.590843200683594, 0.47101593017578125, 1.6252670288085938, 16.34198570251465, 15.757497787475586, -12.087203979492188, 6.7096405029296875, -10.372539520263672, -17.584007263183594, 17.741004943847656, 0.14147186279296875, -14.865135192871094, 1.7210674285888672, 16.02517318725586, -12.988128662109375, -2.411243438720703, 13.837818145751953, 1.0712738037109375, 12.60516357421875, 25.657852172851562, 18.71434211730957, 8.135231018066406, -19.24878692626953, 25.991256713867188, -7.458686828613281, -1.8721466064453125, 13.841938018798828, -4.327653884887695, 14.337099075317383, 2.9509220123291016, 16.40766143798828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000320.npy"}
{"epoch": 0.9696969696969697, "step": 321, "batch_size": 128, "mean": 5.884194850921631, "std": 10.884230613708496, "min": -25.335983276367188, "p10": -5.66988067626953, "median": 5.27056884765625, "p90": 21.619747161865234, "max": 33.80058288574219, "pos_frac": 0.6953125, "sample": [-6.239051818847656, -6.101409912109375, 2.344146728515625, 8.109466552734375, 11.089683532714844, -3.201751708984375, 15.623130798339844, -2.4733734130859375, 7.144599914550781, 28.199050903320312, 21.834638595581055, 11.720722198486328, 16.936965942382812, 3.295074462890625, 0.062168121337890625, -3.008331298828125, 0.1234283447265625, 6.582069396972656, 1.5704460144042969, -14.472846984863281, 5.576023101806641, 4.9556884765625, -3.2087326049804688, 2.6998748779296875, 9.55372428894043, 8.083953857421875, 33.80058288574219, 13.099029541015625, 23.629484176635742, 7.885293960571289, -4.33740234375, -1.0260696411132812, 0.7072830200195312, -8.132678985595703, 9.60394287109375, 0.3985481262207031, 6.997215270996094, 25.585853576660156, -1.504669189453125, 6.498563766479492, 9.171672821044922, 16.0531005859375, 0.5642242431640625, 8.643768310546875, 6.8921661376953125, 15.086156845092773, -8.849090576171875, 18.639007568359375, -23.4000244140625, 18.964702606201172, 11.695289611816406, -0.37109375, 9.550758361816406, 26.496734619140625, -0.13722991943359375, 0.5437984466552734, 8.455223083496094, 6.4472198486328125, 4.0390777587890625, 3.7211837768554688, -2.6186294555664062, -0.155426025390625, 9.919708251953125, 22.941120147705078, 11.327205657958984, 5.045845031738281, -1.7926406860351562, -12.317399978637695, 9.345882415771484, -1.2006359100341797, 15.263092041015625, 23.09900665283203, 13.440399169921875, 28.6778564453125, -8.821203231811523, -3.621723175048828, 5.801902770996094, 3.31341552734375, 18.108421325683594, -3.680767059326172, 6.8484344482421875, -3.9528465270996094, -2.0379371643066406, 21.19536590576172, -5.4849395751953125, 5.034385681152344, -0.0175933837890625, -0.6059722900390625, 18.54510498046875, 29.942771911621094, 4.3824920654296875, -2.5417022705078125, 21.527650833129883, 16.688644409179688, 25.448217391967773, 8.448001861572266, 7.6410064697265625, -25.335983276367188, 25.074554443359375, 7.028009414672852, 8.185661315917969, -3.1439743041992188, 14.136627197265625, 4.824867248535156, 13.84716796875, 4.986717224121094, 28.52785873413086, 4.8472900390625, 18.27025604248047, 2.91802978515625, 11.671070098876953, 2.7348175048828125, 3.9364700317382812, 6.190986633300781, 0.42464637756347656, -5.4357147216796875, 5.495292663574219, 7.881401062011719, -3.930938720703125, 10.258583068847656, -5.3358612060546875, 7.079292297363281, -15.50107192993164, -17.137683868408203, -2.8998260498046875, -9.212898254394531, -6.4576263427734375, 3.9014053344726562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000321.npy"}
{"epoch": 0.9727272727272728, "step": 322, "batch_size": 128, "mean": 6.103288650512695, "std": 12.025768280029297, "min": -27.539615631103516, "p10": -8.339048194885253, "median": 5.603798866271973, "p90": 22.582004356384278, "max": 35.20094299316406, "pos_frac": 0.671875, "sample": [-0.824798583984375, 5.796689987182617, 2.156240463256836, -8.942398071289062, 1.4691581726074219, 24.277427673339844, 2.748903274536133, 10.288230895996094, -2.8749618530273438, 0.005290985107421875, 28.30062484741211, 29.80274200439453, 13.29056167602539, 4.3387298583984375, 16.505413055419922, 13.490848541259766, -9.284774780273438, 9.746757507324219, -4.065706253051758, -4.063068389892578, -2.376596450805664, 2.449941635131836, 28.914199829101562, 21.231952667236328, -0.12275505065917969, -0.6756725311279297, 2.6937255859375, 13.95730209350586, 19.157745361328125, 7.755739212036133, -1.5663299560546875, -16.123821258544922, 16.20018768310547, -0.89959716796875, -2.0584716796875, 3.5568771362304688, 11.382713317871094, 11.51385498046875, 5.992645263671875, 14.410903930664062, 4.312644958496094, 27.96118927001953, 7.3887939453125, 3.4843292236328125, 27.11855697631836, 15.579177856445312, 17.530712127685547, 9.780004501342773, 17.658727645874023, -13.376335144042969, 11.633163452148438, -1.1689338684082031, 5.837226867675781, -11.999053955078125, -18.041946411132812, -20.68610382080078, 0.192108154296875, 29.27783966064453, 1.874176025390625, 6.676658630371094, -3.5443382263183594, 3.483612060546875, 23.077560424804688, -8.498908996582031, 14.918609619140625, -4.336132049560547, -27.539615631103516, 17.698020935058594, -3.4447669982910156, -16.35968017578125, 2.0433349609375, 4.099220275878906, 5.410907745361328, 4.7071380615234375, -2.5364532470703125, 22.39031410217285, -5.488264083862305, 22.392532348632812, -16.899139404296875, -5.9026947021484375, 6.773670196533203, 11.44049072265625, 22.765060424804688, -5.343683242797852, 8.895736694335938, 27.358964920043945, 9.385833740234375, -1.012277603149414, 3.9682579040527344, 22.57239532470703, 10.03143310546875, 2.0014572143554688, -0.5672130584716797, -6.563934326171875, 5.832389831542969, 16.692424774169922, 22.60442543029785, 21.018966674804688, 6.4124603271484375, -8.270536422729492, 14.410369873046875, 18.59259796142578, 3.943511962890625, 6.680091857910156, -1.2688217163085938, 16.702537536621094, -0.41754913330078125, 11.637577056884766, 1.7686767578125, -5.032012939453125, 7.932355880737305, 7.39617919921875, 12.754405975341797, 13.487293243408203, 10.376663208007812, 4.989225387573242, -6.448116302490234, -15.614967346191406, -0.3876323699951172, 21.250099182128906, -16.89703369140625, -0.9432258605957031, 8.679702758789062, 6.037300109863281, -1.1945686340332031, 26.930511474609375, 10.397869110107422, 35.20094299316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000322.npy"}
{"epoch": 0.9757575757575757, "step": 323, "batch_size": 128, "mean": 7.68452787399292, "std": 11.425592422485352, "min": -24.318531036376953, "p10": -4.746961402893066, "median": 6.879419326782227, "p90": 22.13979568481445, "max": 45.39984130859375, "pos_frac": 0.765625, "sample": [7.736625671386719, 21.756263732910156, 17.9508056640625, -1.0375289916992188, 13.323448181152344, 41.422576904296875, 13.454841613769531, 3.2007970809936523, 14.134269714355469, 1.4079570770263672, 10.05264663696289, 6.183149337768555, 21.836044311523438, -2.9854660034179688, 12.708236694335938, 21.63086700439453, 11.984384536743164, -7.58575439453125, -2.3685455322265625, 10.902259826660156, -9.022003173828125, -2.2866973876953125, 3.2381439208984375, 23.804336547851562, 5.8654937744140625, 7.843441009521484, -4.955314636230469, 18.781139373779297, 10.22607421875, 23.57176971435547, 3.1617431640625, 1.6810646057128906, 0.11774253845214844, -0.19617080688476562, 1.2428207397460938, -1.2209014892578125, 7.630130767822266, 1.9650306701660156, 31.229660034179688, 1.2826690673828125, 3.2943191528320312, -7.855869293212891, 2.7545089721679688, 13.551956176757812, 9.932321548461914, 11.64898681640625, 15.578567504882812, -1.0902423858642578, 20.29163360595703, 19.286300659179688, 33.30308532714844, 8.60498046875, 6.080974578857422, 11.786060333251953, 16.159828186035156, -1.256026268005371, 17.4041748046875, -4.65766716003418, 7.9165496826171875, -13.453842163085938, -0.40163421630859375, 17.161102294921875, 15.247627258300781, -3.39947509765625, -2.9988784790039062, 2.8306732177734375, 1.5529975891113281, 11.701301574707031, 23.416418075561523, 14.205772399902344, -8.213737487792969, 20.076904296875, 10.101951599121094, -18.7589111328125, 24.319629669189453, 9.568416595458984, 25.91948699951172, 11.806488037109375, -19.50860023498535, 8.271522521972656, 3.8569793701171875, 22.848548889160156, 0.565277099609375, 9.904640197753906, 4.500577926635742, -0.5625572204589844, 13.785232543945312, 14.022037506103516, 0.17740631103515625, 28.708885192871094, 6.809177398681641, 0.5940093994140625, 1.3154449462890625, -2.4532318115234375, -24.318531036376953, -1.008371353149414, 10.3995361328125, 21.540054321289062, -2.2493019104003906, 6.544822692871094, 26.279743194580078, 20.287220001220703, 10.056503295898438, 0.28874778747558594, -0.31154632568359375, 14.847480773925781, 7.659690856933594, -5.627960205078125, 5.946598052978516, 4.228584289550781, 6.9496612548828125, 0.448883056640625, 1.83221435546875, -10.932674407958984, 7.3585205078125, 3.338134765625, 13.075088500976562, 45.39984130859375, 5.805976867675781, 11.972000122070312, 19.04766082763672, -9.332942962646484, 3.8587265014648438, 26.923294067382812, -6.408866882324219, 2.0293216705322266, 3.2960052490234375, 8.475330352783203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000323.npy"}
{"epoch": 0.9787878787878788, "step": 324, "batch_size": 128, "mean": 8.656021118164062, "std": 11.548503875732422, "min": -22.932432174682617, "p10": -4.443235778808593, "median": 7.1941986083984375, "p90": 22.612742614746093, "max": 45.11570739746094, "pos_frac": 0.796875, "sample": [1.8943862915039062, 9.859344482421875, 20.340970993041992, -0.8232498168945312, 7.512420654296875, 9.238075256347656, -4.1933135986328125, 27.985137939453125, 45.11570739746094, 8.980684280395508, -4.055103302001953, -3.88763427734375, 17.92753028869629, 5.223871231079102, 9.482551574707031, 5.129695892333984, 18.479904174804688, 5.732234954833984, 12.569917678833008, -6.522693634033203, 4.418031692504883, 15.609432220458984, 5.553466796875, -14.286109924316406, 5.004539489746094, 19.044780731201172, 1.8657302856445312, 27.243728637695312, 9.561660766601562, 4.601715087890625, 6.03228759765625, 21.87456512451172, -4.3594970703125, 34.15907287597656, 9.371994018554688, 3.84869384765625, 2.6956100463867188, -3.6421661376953125, -5.391044616699219, 9.37530517578125, 33.587738037109375, 20.54496192932129, 15.215782165527344, 1.2822036743164062, 19.4503173828125, 6.886077880859375, 12.223217010498047, -3.4076385498046875, 14.498027801513672, 5.919761657714844, 10.751214981079102, 19.377792358398438, -18.28972625732422, 7.504112243652344, 4.868495941162109, 10.426860809326172, 2.6135120391845703, 37.331451416015625, 17.614639282226562, 22.343109130859375, 32.15068817138672, 4.295265197753906, 8.172782897949219, 4.014251708984375, 22.618179321289062, 2.5742149353027344, 2.0961074829101562, -22.932432174682617, 25.92401885986328, -0.06066131591796875, -16.48004150390625, 5.521335601806641, 13.095510482788086, -5.256862640380859, 32.265174865722656, 20.345428466796875, 25.086170196533203, 10.159305572509766, 11.987457275390625, 30.633464813232422, 16.658538818359375, 14.602462768554688, 2.723592758178711, -11.16094970703125, 7.39019775390625, 6.150962829589844, 12.698511123657227, 9.322166442871094, 16.692550659179688, 12.563270568847656, 14.67684555053711, 18.537403106689453, 13.919662475585938, -9.573928833007812, -4.2669677734375, 6.998199462890625, 10.20770263671875, 6.371089935302734, 1.8814201354980469, 13.630317687988281, 4.935478210449219, -6.125175476074219, -8.266937255859375, -4.743946075439453, 10.135223388671875, 0.1870880126953125, 5.584785461425781, 19.50836181640625, -0.15355300903320312, 20.586990356445312, -4.6386260986328125, -0.6285915374755859, 14.095390319824219, 16.530658721923828, 23.037532806396484, 1.7363452911376953, 0.17225265502929688, 4.4495697021484375, 22.61041259765625, 6.208263397216797, 17.990966796875, -0.42156982421875, 1.7596778869628906, -2.3838272094726562, 0.7616653442382812, 1.483713150024414, 13.122650146484375, 4.889410018920898], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000324.npy"}
{"epoch": 0.9818181818181818, "step": 325, "batch_size": 128, "mean": 7.662583827972412, "std": 11.330479621887207, "min": -16.584861755371094, "p10": -6.5848342895507805, "median": 7.871223449707031, "p90": 20.305911827087403, "max": 36.500396728515625, "pos_frac": 0.734375, "sample": [18.788772583007812, 15.726598739624023, 0.39429283142089844, 6.797119140625, 16.82460594177246, 12.776588439941406, 4.820106506347656, 15.398780822753906, 10.024490356445312, 4.9957122802734375, 19.10556983947754, 14.021648406982422, 23.2467098236084, -3.690052032470703, -12.461212158203125, -4.911170959472656, 13.371040344238281, 0.1860942840576172, 11.880428314208984, 12.432769775390625, 8.153579711914062, 15.245620727539062, 13.089996337890625, 10.108301162719727, -3.6864547729492188, 3.3094921112060547, -1.8838958740234375, 30.463417053222656, -9.191892623901367, -6.4266815185546875, 30.80670166015625, -16.584861755371094, -3.2474842071533203, 18.905445098876953, 0.8131370544433594, 18.276718139648438, 8.077896118164062, -4.351875305175781, 18.000198364257812, 8.011100769042969, -13.155654907226562, 17.049728393554688, 12.797119140625, 5.170953750610352, -3.444875717163086, -9.088813781738281, 7.731346130371094, -0.2462921142578125, -0.5974311828613281, 30.328842163085938, 20.88654327392578, -0.8994598388671875, 2.4630126953125, 26.250320434570312, 2.4577808380126953, -1.9344711303710938, -11.436969757080078, 2.5367889404296875, 8.56967544555664, 20.063579559326172, 14.408538818359375, 6.231803894042969, 2.6829872131347656, -9.61669921875, -5.3398284912109375, 24.420303344726562, 12.60919189453125, 33.50025177001953, 3.7114334106445312, 19.97449493408203, 11.18642807006836, 9.950515747070312, 20.234888076782227, 8.252033233642578, 14.886444091796875, 1.0298004150390625, 17.472618103027344, 10.931957244873047, -5.4190521240234375, -11.140981674194336, -0.5475730895996094, 16.3761043548584, -12.45880126953125, 7.339260101318359, 16.716873168945312, 10.581077575683594, 1.9792957305908203, -8.842178344726562, 16.616302490234375, 2.128917694091797, 15.950857162475586, 0.46575164794921875, 15.138717651367188, 3.7947654724121094, -0.1318359375, 17.82910919189453, 4.2356719970703125, 1.0167236328125, 12.444717407226562, 20.471633911132812, -4.177558898925781, 4.428050994873047, 1.54052734375, 17.047149658203125, 19.75583839416504, 22.707120895385742, 33.420188903808594, 18.73523712158203, 8.942352294921875, -4.251861572265625, 2.9507980346679688, -9.585334777832031, 3.843963623046875, 16.023433685302734, -14.440940856933594, 12.615303039550781, -4.099822998046875, 5.726284027099609, 26.93157196044922, 17.50897979736328, -6.953857421875, -2.446563720703125, 15.042531967163086, 36.500396728515625, 1.81634521484375, 0.3285789489746094, 10.132408142089844, -3.421966552734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000325.npy"}
{"epoch": 0.9848484848484849, "step": 326, "batch_size": 128, "mean": 6.1351189613342285, "std": 10.499245643615723, "min": -17.61737060546875, "p10": -5.467852783203125, "median": 3.8794021606445312, "p90": 22.206340789794922, "max": 32.10267639160156, "pos_frac": 0.6796875, "sample": [0.4052276611328125, -1.6148147583007812, 4.2587890625, -1.8544235229492188, 7.87261962890625, -12.622142791748047, -5.2618408203125, 6.589900970458984, 0.5261821746826172, 12.530288696289062, 8.081901550292969, 14.978748321533203, 17.72464370727539, 3.0862197875976562, -0.388214111328125, 8.752464294433594, 5.882923126220703, 1.3890609741210938, 2.8839874267578125, -0.17729759216308594, 5.954811096191406, 7.197052001953125, -13.794288635253906, 8.818695068359375, -4.554176330566406, 17.46364402770996, 3.8651580810546875, -2.1425094604492188, 1.265420913696289, 14.79425048828125, 15.426528930664062, 12.353233337402344, -3.5222930908203125, 22.32770538330078, -0.9918289184570312, -0.5849609375, -2.4139862060546875, -1.2793045043945312, 20.85803985595703, -7.1661834716796875, 4.484062194824219, 4.202434539794922, 15.867080688476562, 4.830345153808594, -4.5244293212890625, 25.37335205078125, 6.3292999267578125, 24.603591918945312, 27.129722595214844, -1.1666030883789062, 23.807884216308594, 0.6432380676269531, -1.3146286010742188, 32.10267639160156, 31.205413818359375, 19.977684020996094, 15.642690658569336, 4.909601211547852, 0.6668701171875, -1.564849853515625, 5.803485870361328, 3.893646240234375, 27.523239135742188, -5.429176330566406, -0.8134422302246094, 16.12110710144043, 21.096107482910156, 1.2852134704589844, 3.2422866821289062, 21.661930084228516, -1.9464912414550781, 9.858604431152344, 3.7167320251464844, 21.04216194152832, 5.580177307128906, -5.579517364501953, -8.529460906982422, 10.07000732421875, 3.8349456787109375, 0.6731395721435547, 5.861354827880859, -1.3202362060546875, 8.118606567382812, 0.17212677001953125, 27.395309448242188, 2.4113616943359375, -5.822071075439453, -5.352821350097656, -0.7128486633300781, 15.290084838867188, 24.45953369140625, -5.558097839355469, 20.52440643310547, -3.2603225708007812, -6.7895355224609375, 0.6549148559570312, 18.638526916503906, -1.8731002807617188, 11.588325500488281, -8.735908508300781, 3.5499649047851562, 3.556865692138672, 9.020599365234375, 11.253944396972656, 5.7716217041015625, 3.5658721923828125, -17.61737060546875, -3.7072677612304688, 22.154327392578125, 10.762222290039062, 6.712352752685547, -1.9802894592285156, -4.52189826965332, 27.205467224121094, 1.8043060302734375, 7.731645584106445, 2.6961517333984375, 2.5326614379882812, 22.597572326660156, 16.091033935546875, -9.987579345703125, 5.56181526184082, -5.613456726074219, 28.61421012878418, 7.175422668457031, -8.883575439453125, -2.2548446655273438, 12.58447265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000326.npy"}
{"epoch": 0.9878787878787879, "step": 327, "batch_size": 128, "mean": 7.178014755249023, "std": 11.791072845458984, "min": -22.726055145263672, "p10": -6.930901718139648, "median": 5.744791030883789, "p90": 22.748145675659178, "max": 39.31951904296875, "pos_frac": 0.65625, "sample": [5.962184906005859, 21.990760803222656, -6.392051696777344, 1.7008323669433594, 8.194128036499023, 19.077190399169922, 10.135082244873047, 9.940536499023438, 3.5820083618164062, 28.249290466308594, 7.855766296386719, 24.147079467773438, 21.716285705566406, 22.352066040039062, -7.724231719970703, -4.362949371337891, -2.3551177978515625, -0.07310867309570312, 12.35263442993164, -7.66162109375, -1.6241912841796875, 15.264289855957031, 18.89263916015625, -2.4778213500976562, 22.845993041992188, 12.390243530273438, -4.058841705322266, 26.694679260253906, -16.42535400390625, 3.1168594360351562, 3.4353561401367188, 4.555227279663086, 22.89080810546875, 5.2226409912109375, -7.315595626831055, -1.996673583984375, 16.500825881958008, -4.94012451171875, 24.967132568359375, 18.788345336914062, 9.099187850952148, 1.5467300415039062, 15.334403991699219, 2.186981201171875, 3.5079593658447266, 9.550445556640625, 27.433609008789062, -2.0697021484375, 4.29547119140625, -0.0848846435546875, 21.138809204101562, 15.451446533203125, 19.99756622314453, -22.726055145263672, 19.500629425048828, -3.33251953125, 16.514057159423828, 11.332054138183594, 10.821029663085938, 16.608917236328125, 14.624675750732422, -0.6624107360839844, 1.9649429321289062, 13.060722351074219, 1.7873306274414062, 26.540557861328125, -12.382186889648438, 14.005517959594727, -2.4703140258789062, -7.938438415527344, 23.449691772460938, 7.336874008178711, 15.849151611328125, -10.127758026123047, 21.617355346679688, -2.366668701171875, 4.6117706298828125, 0.7621879577636719, 20.395187377929688, 29.0693359375, 39.31951904296875, 3.787261962890625, 18.397109985351562, 17.285232543945312, 17.66352081298828, -0.8120155334472656, 4.60870361328125, 6.4374847412109375, 7.093162536621094, -0.2524528503417969, -0.5252838134765625, 21.27838134765625, 5.527397155761719, 6.613288879394531, 22.70621109008789, -6.809810638427734, 7.757862091064453, -1.1715850830078125, 6.898292541503906, -15.010009765625, 32.74981689453125, -2.2016143798828125, -3.8754425048828125, 7.743154525756836, -0.6374359130859375, -1.2137832641601562, -1.1356487274169922, -7.213447570800781, -11.541046142578125, 9.484039306640625, -3.427976608276367, 8.30535888671875, 19.561649322509766, -10.607330322265625, -5.442138671875, -0.31072235107421875, 3.6226444244384766, 1.7175712585449219, 29.668750762939453, 20.54726219177246, 7.448741912841797, -3.610107421875, -16.277069091796875, 15.579212188720703, 8.80459976196289, -0.694244384765625, 0.3798675537109375, -0.07794189453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000327.npy"}
{"epoch": 0.990909090909091, "step": 328, "batch_size": 128, "mean": 8.072820663452148, "std": 11.075932502746582, "min": -17.52454948425293, "p10": -4.701250076293945, "median": 6.826849937438965, "p90": 21.774157524108883, "max": 47.25291442871094, "pos_frac": 0.734375, "sample": [18.580135345458984, -1.1472434997558594, -8.262222290039062, 2.7673778533935547, 4.6560821533203125, 23.013870239257812, 16.225967407226562, 4.4377288818359375, 11.927589416503906, 17.115615844726562, 19.89727783203125, 3.6992645263671875, 23.526779174804688, -1.48712158203125, 12.247869491577148, 20.009002685546875, 18.070880889892578, 6.19683837890625, 11.274505615234375, 2.7348899841308594, 0.33170127868652344, 14.476547241210938, 12.318862915039062, 47.25291442871094, 17.549047470092773, 23.076522827148438, -3.8320579528808594, 5.023750305175781, 20.209121704101562, 9.051458358764648, -0.6834259033203125, 3.7624244689941406, 6.3402099609375, 9.151031494140625, -0.41071319580078125, 18.99567413330078, 3.8416595458984375, 6.3243865966796875, -1.484588623046875, -4.800010681152344, 16.986257553100586, 0.9062957763671875, 12.014640808105469, -9.29867935180664, 15.13739013671875, -15.162239074707031, 13.074491500854492, 3.7687644958496094, 2.2580718994140625, -6.893180847167969, 1.8927764892578125, 18.816146850585938, 7.061971664428711, -14.032970428466797, 6.591728210449219, 11.645401000976562, 19.49686622619629, 1.5843887329101562, 19.11370086669922, -1.2445220947265625, -8.27096176147461, 10.030954360961914, 13.675918579101562, 5.727745056152344, 15.199211120605469, 8.794166564941406, 16.226470947265625, 16.924362182617188, -4.658924102783203, -0.42279052734375, -0.00640869140625, 0.08203887939453125, -15.003616333007812, -4.065574645996094, 19.863956451416016, 21.57621192932129, 27.147735595703125, 8.357864379882812, -17.52454948425293, 20.480918884277344, 7.323892593383789, 4.780616760253906, 11.185470581054688, 31.332809448242188, 10.332855224609375, 2.4062843322753906, 15.389923095703125, 7.88153076171875, -0.3457984924316406, 14.767757415771484, 25.095504760742188, -4.588367462158203, 5.79754638671875, 32.430946350097656, 9.468017578125, 10.751670837402344, 3.13909912109375, -2.5557289123535156, -1.3355579376220703, 15.930427551269531, -0.6895027160644531, -0.054355621337890625, -0.8103923797607422, 22.355804443359375, 23.921051025390625, -9.18071174621582, 17.86517333984375, 10.7567138671875, -8.568618774414062, 1.1971588134765625, 21.169189453125, -4.317512512207031, 5.91033935546875, -5.371856689453125, 8.351921081542969, 32.21585464477539, 7.745475769042969, 22.23603057861328, 1.8421058654785156, 13.667850494384766, -0.03212738037109375, -6.758262634277344, 6.5417633056640625, 4.601627349853516, -3.6338653564453125, 3.9677581787109375, 11.069953918457031, 28.301971435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000328.npy"}
{"epoch": 0.9939393939393939, "step": 329, "batch_size": 128, "mean": 7.758512496948242, "std": 12.950913429260254, "min": -24.59845542907715, "p10": -7.178224754333496, "median": 5.654890060424805, "p90": 26.078231048583984, "max": 40.751190185546875, "pos_frac": 0.7421875, "sample": [30.028778076171875, 11.00372314453125, -0.449462890625, 18.273681640625, 13.020309448242188, 11.145301818847656, 30.735061645507812, 30.818695068359375, 15.241249084472656, 24.58892059326172, -11.95101547241211, 26.52442169189453, -7.1807708740234375, 9.047163009643555, -16.839252471923828, 2.8066787719726562, 16.27471923828125, 19.982276916503906, 14.118345260620117, 5.7006683349609375, 12.466232299804688, 34.83122253417969, 29.14240264892578, 0.31072998046875, -6.307868957519531, 4.318450927734375, 0.14107513427734375, -18.833602905273438, 18.0963134765625, -0.6979045867919922, -1.7896175384521484, 4.156005859375, 10.329315185546875, -17.38144302368164, -7.234001159667969, -4.548114776611328, 14.95772933959961, 4.76439094543457, 24.718521118164062, 9.11583137512207, 1.9662952423095703, 18.815465927124023, -4.49359130859375, 11.285655975341797, -0.19298553466796875, 8.002487182617188, 13.410476684570312, 11.098018646240234, 17.63885498046875, 0.80487060546875, -2.240032196044922, 18.180923461914062, 0.9985198974609375, -1.358489990234375, 34.87120819091797, 5.351078033447266, 0.5242767333984375, 13.268060684204102, 11.914804458618164, 26.057533264160156, -0.1322193145751953, 18.854049682617188, 3.4207305908203125, 28.07919692993164, 16.45387840270996, 1.4499969482421875, 19.044586181640625, -8.228485107421875, 0.6113357543945312, 5.0110321044921875, -21.111431121826172, 5.7617950439453125, -4.7261199951171875, 10.633811950683594, 23.974151611328125, 3.00640869140625, -0.5312862396240234, 1.55889892578125, 0.7885665893554688, 20.247440338134766, 1.4684715270996094, 13.366741180419922, -0.05598640441894531, 11.623291015625, 20.5238037109375, -11.742599487304688, -1.9651641845703125, 3.2037887573242188, 12.229047775268555, 3.326709747314453, 4.840339660644531, -0.464874267578125, -7.177133560180664, -24.59845542907715, 15.070808410644531, 11.197601318359375, 22.20314598083496, 22.064132690429688, 4.173759460449219, 11.648422241210938, -16.167789459228516, 16.782936096191406, 15.675605773925781, 4.391109466552734, 11.988800048828125, 16.625045776367188, -17.39178466796875, 2.0839366912841797, 14.282360076904297, -3.4925613403320312, 0.434967041015625, -1.3794898986816406, 26.766014099121094, 3.965841293334961, 1.4140777587890625, 29.535987854003906, -0.9094753265380859, 12.172616958618164, -6.114677429199219, 26.12652587890625, 0.5023651123046875, -21.57097625732422, 33.274444580078125, 0.9219322204589844, 40.751190185546875, 10.10775375366211, 12.25299072265625, 5.609111785888672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000329.npy"}
{"epoch": 0.996969696969697, "step": 330, "batch_size": 128, "mean": 6.829625606536865, "std": 11.239670753479004, "min": -24.026596069335938, "p10": -7.771026992797852, "median": 6.323962211608887, "p90": 22.565741729736324, "max": 31.472694396972656, "pos_frac": 0.75, "sample": [5.85906982421875, 2.1572265625, 25.861644744873047, 9.82524299621582, -7.964807510375977, 26.360031127929688, -0.21050643920898438, 10.33056640625, 18.93560028076172, -12.647430419921875, 3.479635238647461, -5.271675109863281, 21.51471710205078, -6.540971755981445, 4.331840515136719, 12.372390747070312, 20.334274291992188, 2.5625, 12.281639099121094, 13.617889404296875, 28.025951385498047, 11.403755187988281, 27.5682430267334, 3.4044418334960938, 8.251449584960938, -11.617156982421875, 5.297660827636719, 16.204395294189453, -21.246061325073242, -14.880935668945312, 20.80224609375, 11.662452697753906, 3.9968929290771484, 25.695873260498047, 13.01251220703125, -5.993633270263672, 0.4636726379394531, -1.4311103820800781, -1.0514373779296875, 1.3447837829589844, 2.3143558502197266, -11.380126953125, 2.0708065032958984, 5.625816345214844, -1.2729473114013672, 8.115829467773438, 8.189796447753906, -2.3518600463867188, 8.34771728515625, -8.774627685546875, -7.770641326904297, 18.29102897644043, -4.9863128662109375, 3.676311492919922, 3.199281692504883, 4.35333251953125, 1.0818290710449219, -24.026596069335938, 31.472694396972656, 5.154624938964844, -1.3589668273925781, 18.561227798461914, -1.0916595458984375, -6.0752410888671875, 24.55417251586914, 11.243232727050781, 10.736177444458008, -7.196815490722656, 13.19175910949707, -4.495967864990234, 5.1244964599609375, 18.757549285888672, 27.3541259765625, 12.325057983398438, 4.248500823974609, -7.7719268798828125, 12.756752014160156, 0.7860107421875, 15.126907348632812, 4.54005241394043, 23.47039794921875, 11.160049438476562, 0.7133121490478516, -11.616226196289062, 3.4156875610351562, 6.218223571777344, 19.44134521484375, 7.1297760009765625, 10.164518356323242, 26.715087890625, 12.775810241699219, 24.607254028320312, -2.6312789916992188, 3.6923828125, 17.59744644165039, 7.456348419189453, -18.599647521972656, -11.727493286132812, 17.71316909790039, 28.38641357421875, 5.515556335449219, 12.08193588256836, 14.235895156860352, 29.033233642578125, -3.0668792724609375, 22.17803192138672, -3.5630264282226562, 6.507537841796875, 7.022928237915039, -8.068122863769531, 14.436418533325195, 10.832931518554688, 0.884429931640625, 9.082332611083984, 13.415786743164062, 6.289464950561523, 19.903188705444336, 6.35845947265625, 13.747802734375, 3.4079208374023438, 4.747976303100586, 7.309532165527344, 6.8633270263671875, 1.5572986602783203, 10.64295768737793, 10.126541137695312, -0.8097076416015625, 6.687103271484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-harmless-8xh200-20260410-180850/margin_logs/step_0000330.npy"}