Files
llama-3-8b-base-new-dpo-hh-…/margin_logs/margins.jsonl

662 lines
1.1 MiB
Plaintext
Raw Normal View History

{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.0013527870178222656, "std": 0.2564818859100342, "min": -0.736083984375, "p10": -0.3432229995727539, "median": 0.038166046142578125, "p90": 0.29227676391601565, "max": 0.645111083984375, "pos_frac": 0.578125, "sample": [0.1120758056640625, 0.12518310546875, 0.31621551513671875, 0.13765716552734375, -0.12592506408691406, 0.23141098022460938, -0.21887779235839844, 0.21950721740722656, 0.04480743408203125, 0.020877838134765625, 0.0570220947265625, 0.058269500732421875, -0.4338226318359375, -0.030628204345703125, 0.645111083984375, -0.395477294921875, 0.09050941467285156, 0.0007190704345703125, -0.34615325927734375, 0.016077041625976562, -0.33638572692871094, 0.293853759765625, 0.17610931396484375, 0.22386932373046875, 0.21470260620117188, -0.08536529541015625, 0.0907745361328125, -0.03816986083984375, 0.39190101623535156, 0.16336441040039062, 0.08024787902832031, -0.031158447265625, 0.08477020263671875, 0.002460479736328125, -0.242034912109375, 0.07232666015625, -0.60186767578125, 0.20531463623046875, 0.155731201171875, -0.14299774169921875, -0.25698089599609375, 0.12331962585449219, -0.26497650146484375, 0.15140533447265625, -0.0920257568359375, -0.18599319458007812, 0.19028091430664062, 0.2496490478515625, 0.42162322998046875, 0.17873382568359375, -0.1525421142578125, -0.4972076416015625, 0.32010650634765625, -0.10365867614746094, -0.233795166015625, -0.19828224182128906, -0.4018898010253906, -0.13407135009765625, -0.09596633911132812, 0.031524658203125, 0.28859710693359375, -0.192962646484375, -0.736083984375, 0.3026123046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000001.npy"}
{"epoch": 0.0015117157974300832, "step": 2, "batch_size": 64, "mean": 0.03744968771934509, "std": 0.2875921130180359, "min": -0.7604827880859375, "p10": -0.2812448501586914, "median": 0.03963661193847656, "p90": 0.3654294967651367, "max": 0.8134727478027344, "pos_frac": 0.5625, "sample": [0.30594635009765625, -0.24289894104003906, -0.11509323120117188, -0.13417816162109375, 0.06942558288574219, 0.36568641662597656, -0.14640045166015625, 0.1497650146484375, 0.30261993408203125, 0.10124588012695312, 0.13028717041015625, -0.0031890869140625, 0.0361480712890625, 0.5662612915039062, 0.09694290161132812, -0.01091766357421875, 0.1128997802734375, 0.0411834716796875, -0.21860504150390625, -0.1236419677734375, -0.08812713623046875, 0.10360527038574219, 0.1790008544921875, -0.5114288330078125, 0.3056755065917969, -0.14553451538085938, 0.28168487548828125, 0.26990509033203125, 0.1686878204345703, 0.038089752197265625, 0.19541168212890625, -0.10783576965332031, -0.2644004821777344, -0.19707489013671875, -0.140472412109375, 0.1349811553955078, 0.19672012329101562, -0.0714111328125, 0.53369140625, 0.1271820068359375, 0.8134727478027344, 0.2990264892578125, -0.7604827880859375, -0.08274078369140625, 0.05890846252441406, 0.029361724853515625, 0.4510040283203125, -0.1599273681640625, -0.29346656799316406, 0.10005569458007812, -0.27509117126464844, -0.1937713623046875, 0.19167327880859375, 0.28173065185546875, -0.09406471252441406, -0.3380699157714844, -0.29186248779296875, 0.36483001708984375, 0.009979248046875, 0.44391632080078125, -0.126708984375, -0.6550216674804688, 0.6160736083984375, -0.28388214111328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000002.npy"}
{"epoch": 0.0030234315948601664, "step": 3, "batch_size": 64, "mean": 0.004606842994689941, "std": 0.28233811259269714, "min": -0.643951416015625, "p10": -0.352471923828125, "median": 0.0060291290283203125, "p90": 0.382952117919922, "max": 0.6574783325195312, "pos_frac": 0.5, "sample": [-0.1661834716796875, -0.12054634094238281, -0.00199127197265625, -0.5257568359375, -0.31201171875, 0.426666259765625, -0.16051483154296875, 0.05686187744140625, 0.137542724609375, -0.12680816650390625, -0.17875289916992188, 0.136627197265625, 0.144317626953125, 0.08634185791015625, -0.13080596923828125, 0.22462844848632812, -0.212799072265625, 0.1158905029296875, 0.015779495239257812, -0.2654266357421875, 0.5568084716796875, 0.04879951477050781, -0.35916900634765625, 0.14013290405273438, 0.012493133544921875, 0.1903553009033203, 0.080108642578125, 0.224273681640625, -0.07558250427246094, -0.4016571044921875, 0.1376190185546875, -0.643951416015625, -0.5628204345703125, -0.04206085205078125, -0.11207962036132812, -0.13547706604003906, -0.29831695556640625, -0.19793701171875, 0.17755508422851562, 0.4104194641113281, 0.3981475830078125, -0.00043487548828125, 0.6574783325195312, 0.5395355224609375, 0.33553314208984375, -0.33684539794921875, 0.07665252685546875, 0.3120918273925781, 0.23170852661132812, -0.11792755126953125, -0.6022720336914062, -0.11194610595703125, 0.12529754638671875, 0.28409576416015625, -0.4431419372558594, 0.07358551025390625, 0.18742752075195312, 0.34749603271484375, -0.10540771484375, 0.4225006103515625, -0.041431427001953125, -0.03631591796875, -0.178314208984375, -0.015247344970703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000003.npy"}
{"epoch": 0.0045351473922902496, "step": 4, "batch_size": 64, "mean": 0.03709092736244202, "std": 0.3874465525150299, "min": -0.6391983032226562, "p10": -0.4192924499511719, "median": -0.032672882080078125, "p90": 0.39606857299804693, "max": 1.47393798828125, "pos_frac": 0.453125, "sample": [-0.448486328125, 0.1968536376953125, -0.10394287109375, -0.02542877197265625, 0.23560333251953125, -0.24787139892578125, 0.11277580261230469, 0.33576202392578125, 0.1650390625, 0.463592529296875, 0.12411117553710938, -0.21327590942382812, -0.275054931640625, -0.12078094482421875, 0.013153076171875, 0.009521484375, -0.0380096435546875, -0.18148040771484375, 0.23577880859375, -0.45067596435546875, -0.279754638671875, 0.12878036499023438, 0.37953948974609375, -0.5712127685546875, 0.2532005310058594, 0.618896484375, -0.02899932861328125, 0.364654541015625, -0.22229385375976562, 0.3193511962890625, 1.47393798828125, -0.14839935302734375, 0.3768768310546875, -0.11795806884765625, 0.07209014892578125, 1.394073486328125, 0.657562255859375, -0.402069091796875, -0.0500335693359375, 0.0457763671875, -0.11735153198242188, -0.0077667236328125, 0.2751502990722656, -0.14956092834472656, -0.42667388916015625, 0.6513290405273438, -0.24936294555664062, -0.11396980285644531, -0.6391983032226562, 0.05574798583984375, 0.2105255126953125, -0.08040618896484375, -0.05694389343261719, -0.4897041320800781, 0.1402587890625, -0.0423126220703125, 0.37952423095703125, -0.0873565673828125, 0.4031524658203125, -0.2702770233154297, -0.036346435546875, -0.5089569091796875, -0.1630706787109375, -0.35381317138671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000004.npy"}
{"epoch": 0.006046863189720333, "step": 5, "batch_size": 64, "mean": 0.01996755599975586, "std": 0.30351829528808594, "min": -0.658538818359375, "p10": -0.3285346984863281, "median": 0.0078105926513671875, "p90": 0.43601417541503956, "max": 1.02630615234375, "pos_frac": 0.5, "sample": [-0.35120201110839844, -0.196319580078125, 0.04264640808105469, 0.10369873046875, -0.3373260498046875, -0.6109466552734375, 0.07434844970703125, 0.489532470703125, 0.614044189453125, 0.27257537841796875, 0.09468841552734375, -0.224609375, 0.044971466064453125, 0.045032501220703125, -0.503082275390625, -0.30802154541015625, -0.138763427734375, -0.22652435302734375, -0.658538818359375, -0.003200531005859375, -0.05109405517578125, -0.07802391052246094, 0.12464714050292969, 0.2463836669921875, -0.468048095703125, 1.02630615234375, 0.0962677001953125, 0.21836090087890625, 0.5763702392578125, 0.3111381530761719, -0.13520050048828125, 0.01882171630859375, -0.35855865478515625, 0.23633575439453125, 0.0750732421875, -0.28714752197265625, -0.05681419372558594, -0.12845993041992188, 0.13983535766601562, 0.11102676391601562, -0.027256011962890625, -0.0602569580078125, -0.2110595703125, 0.23693084716796875, 0.610443115234375, -0.03576087951660156, 0.553192138671875, 0.15317535400390625, 0.5679779052734375, -0.21691131591796875, -0.14233779907226562, -0.02634429931640625, -0.05181884765625, 0.1254425048828125, 0.1175537109375, -0.0216217041015625, -0.14873886108398438, -0.20996475219726562, 0.0225067138671875, -0.09722518920898438, 0.10214042663574219, 0.19623565673828125, -0.2282123565673828, 0.22961044311523438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000005.npy"}
{"epoch": 0.007558578987150416, "step": 6, "batch_size": 64, "mean": -0.025151371955871582, "std": 0.30422496795654297, "min": -0.94940185546875, "p10": -0.4840263366699219, "median": 0.01514434814453125, "p90": 0.30699806213378905, "max": 0.6299667358398438, "pos_frac": 0.515625, "sample": [-0.3053131103515625, 0.09221076965332031, 0.1555328369140625, 0.09577178955078125, -0.12187957763671875, 0.5511550903320312, -0.09195327758789062, 0.07794570922851562, -0.3424835205078125, -0.22311973571777344, -0.026430130004882812, 0.0068359375, -0.7701759338378906, 0.43727874755859375, 0.0234527587890625, 0.09820556640625, -0.1431427001953125, 0.6299667358398438, -0.0033721923828125, -0.4844017028808594, 0.20491790771484375, 0.129425048828125, 0.13285446166992188, 0.3808307647705078, -0.0829010009765625, 0.08856201171875, -0.03372955322265625, -0.2603759765625, -0.120758056640625, -0.12329292297363281, 0.0764617919921875, 0.21442794799804688, 0.170440673828125, -0.08732223510742188, -0.6469917297363281, 0.3038177490234375, -0.4831504821777344, -0.08172607421875, -0.374542236328125, 0.1197052001953125, 0.2298583984375, -0.035068511962890625, -0.50177001953125, 0.3083610534667969, -0.3287506103515625, -0.0523223876953125, 0.225128173828125, 0.26807403564453125, -0.2113189697265625, 0.37018775939941406, -0.07889556884765625, 0.29389190673828125, -0.48749542236328125, -0.4971942901611328, 0.04824066162109375, 0.11797142028808594, -0.041980743408203125, 0.028778076171875, -0.94940185546875, -0.26214599609375, 0.1390838623046875, 0.15896224975585938, 0.32483863830566406, 0.14054298400878906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000006.npy"}
{"epoch": 0.009070294784580499, "step": 7, "batch_size": 64, "mean": 0.008793264627456665, "std": 0.2351851910352707, "min": -0.5226058959960938, "p10": -0.3063945770263672, "median": 0.013639450073242188, "p90": 0.3406044006347658, "max": 0.4787254333496094, "pos_frac": 0.53125, "sample": [0.11524200439453125, -0.3023223876953125, 0.0195465087890625, 0.03081512451171875, 0.2590522766113281, 0.42709922790527344, 0.05034065246582031, 0.033905029296875, 0.36344146728515625, 0.15108871459960938, 0.28143310546875, 0.15412521362304688, 0.058437347412109375, 0.4503059387207031, -0.007720947265625, -0.00763702392578125, -0.07068252563476562, -0.11109161376953125, 0.185882568359375, 0.2365264892578125, -0.08165740966796875, -0.1084136962890625, -0.1031646728515625, 0.0290374755859375, -0.427520751953125, 0.11646080017089844, -0.5226058959960938, 0.4498443603515625, -0.175994873046875, 0.11443138122558594, -0.4291534423828125, 0.2835235595703125, 0.014507293701171875, 0.4787254333496094, -0.04836273193359375, -0.008514404296875, -0.3376617431640625, 0.15995407104492188, 0.027181625366210938, 0.37076568603515625, -0.25585174560546875, -0.11493873596191406, 0.0037384033203125, 0.1277923583984375, -0.36894989013671875, 0.18671417236328125, -0.04448699951171875, -0.14351654052734375, 0.3632659912109375, -0.370361328125, -0.27957916259765625, -0.3081398010253906, 0.07975959777832031, -0.16587066650390625, -0.10132598876953125, -0.052398681640625, -0.07219696044921875, -0.22002410888671875, -0.248687744140625, 0.0127716064453125, 0.28772735595703125, 0.17363739013671875, 0.18512725830078125, -0.2306060791015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000007.npy"}
{"epoch": 0.010582010582010581, "step": 8, "batch_size": 64, "mean": 0.03777092695236206, "std": 0.35031718015670776, "min": -0.6025009155273438, "p10": -0.3026918411254883, "median": -0.0027933120727539062, "p90": 0.5613216400146485, "max": 1.0086822509765625, "pos_frac": 0.5, "sample": [-0.007844924926757812, 0.837554931640625, -0.125640869140625, -0.13001251220703125, -0.277435302734375, 0.27193450927734375, 0.7220382690429688, 0.7223663330078125, -0.28104209899902344, 0.086090087890625, 0.56658935546875, -0.0947265625, -0.058567047119140625, -0.5969314575195312, 0.9654922485351562, 1.0086822509765625, 0.336151123046875, -0.19989585876464844, -0.1540374755859375, 0.1350860595703125, 0.0065898895263671875, -0.6025009155273438, -0.1990203857421875, 0.00225830078125, 0.0649261474609375, -0.1322498321533203, -0.23034286499023438, -0.09658050537109375, 0.20091629028320312, 0.06083488464355469, 0.4164390563964844, 0.038799285888671875, 0.11659622192382812, 0.07569122314453125, -0.38114166259765625, -0.30083465576171875, 0.030826568603515625, 0.13786697387695312, 0.0582733154296875, -0.29134368896484375, 0.5490303039550781, -0.10371971130371094, -0.14715957641601562, -0.099945068359375, -0.020885467529296875, 0.2904815673828125, -0.07303810119628906, -0.5019454956054688, -0.49069786071777344, -0.20311737060546875, -0.17766761779785156, 0.5674037933349609, -0.4190177917480469, -0.01605987548828125, 0.13818359375, 0.24546051025390625, -0.30348777770996094, -0.135528564453125, 0.14368057250976562, 0.04547119140625, 0.009418487548828125, 0.5390777587890625, -0.15927886962890625, 0.03882598876953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000008.npy"}
{"epoch": 0.012093726379440665, "step": 9, "batch_size": 64, "mean": -0.004266202449798584, "std": 0.3521674573421478, "min": -0.8134727478027344, "p10": -0.4397251129150391, "median": 0.005555152893066406, "p90": 0.36469573974609376, "max": 1.1850814819335938, "pos_frac": 0.515625, "sample": [-0.11975860595703125, 0.06500244140625, 0.042682647705078125, 0.026092529296875, 0.25055885314941406, -0.4062957763671875, -0.4306488037109375, -0.4436149597167969, -0.209381103515625, -0.259979248046875, -0.16530609130859375, -0.015748977661132812, 1.1850814819335938, 0.0694580078125, -0.170623779296875, -0.4150199890136719, -0.44768524169921875, 0.46484375, -0.041782379150390625, 0.0699462890625, 0.618499755859375, 0.0350494384765625, 0.3955230712890625, 0.2658843994140625, 0.0034351348876953125, -0.21666717529296875, 0.2713298797607422, 0.36481475830078125, 0.07923126220703125, -0.6178627014160156, -0.15592193603515625, -0.6417140960693359, 0.33672332763671875, 0.72802734375, -0.20944786071777344, 0.03956794738769531, 0.02814483642578125, -0.628143310546875, -0.3222503662109375, 0.2942657470703125, 0.46894073486328125, 0.0076751708984375, -0.09722900390625, 0.292999267578125, -0.061100006103515625, -0.06189727783203125, 0.19682884216308594, 0.2398242950439453, -0.14743995666503906, -0.8134727478027344, 0.07155990600585938, -0.14086151123046875, -0.15413475036621094, 0.171630859375, 0.36441802978515625, 0.1953125, 0.22809600830078125, -0.0263671875, -0.021556854248046875, 0.07508277893066406, -0.097991943359375, -0.7283935546875, -0.30687713623046875, 0.3556060791015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000009.npy"}
{"epoch": 0.013605442176870748, "step": 10, "batch_size": 64, "mean": 0.015347808599472046, "std": 0.3735538423061371, "min": -0.6936874389648438, "p10": -0.37594146728515626, "median": -0.058208465576171875, "p90": 0.48040466308593754, "max": 1.0567626953125, "pos_frac": 0.453125, "sample": [-0.338775634765625, -0.1330585479736328, 0.089324951171875, -0.15668296813964844, -0.1050567626953125, -0.3748626708984375, -0.23838233947753906, -0.047576904296875, -0.486602783203125, -0.06884002685546875, -0.2840423583984375, 0.07926559448242188, -0.37640380859375, 0.601318359375, 0.4705352783203125, 0.014492034912109375, 0.1461925506591797, -0.30718994140625, 1.03826904296875, 0.2786598205566406, -0.016435623168945312, -0.10913467407226562, 0.7684783935546875, 0.4846343994140625, -0.29404640197753906, -0.3085823059082031, 0.26880645751953125, -0.0802764892578125, 0.08904266357421875, -0.23080062866210938, 0.4603118896484375, 0.18424224853515625, -0.24660491943359375, 0.18941497802734375, 0.1893596649169922, 0.3053741455078125, -0.17476272583007812, 0.27081871032714844, -0.1689605712890625, 0.20650672912597656, 0.16228103637695312, -0.23355484008789062, -0.4861907958984375, -0.411834716796875, -0.32707977294921875, -0.0794219970703125, -0.6936874389648438, 0.115478515625, 1.0567626953125, -0.27532958984375, 0.0097198486328125, 0.22151947021484375, -0.21620559692382812, -0.14818382263183594, -0.019786834716796875, -0.27193450927734375, 0.9299468994140625, 0.1489276885986328, -0.49041748046875, 0.21242523193359375, 0.37877655029296875, -0.41214752197265625, 0.5858917236328125, -0.361663818359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000010.npy"}
{"epoch": 0.015117157974300832, "step": 11, "batch_size": 64, "mean": 0.029834330081939697, "std": 0.30491703748703003, "min": -1.1244125366210938, "p10": -0.2964141845703125, "median": 0.048941612243652344, "p90": 0.34109745025634775, "max": 1.0216140747070312, "pos_frac": 0.578125, "sample": [-0.31993865966796875, 0.13945960998535156, -0.021038055419921875, 0.11516571044921875, -0.4948234558105469, 0.12549209594726562, -0.06185722351074219, -0.09733390808105469, 0.360107421875, 0.17584228515625, 0.13585662841796875, 0.34796905517578125, 0.0788421630859375, 0.14353370666503906, 0.0036067962646484375, 0.188568115234375, 0.44922637939453125, 0.5193939208984375, -0.334808349609375, 0.16640472412109375, 0.7119903564453125, -0.1681671142578125, 1.0216140747070312, 0.0159454345703125, -0.23657608032226562, 0.1071319580078125, -0.3069953918457031, 0.2759227752685547, -0.020427703857421875, -0.2717247009277344, 0.04998588562011719, 0.01219940185546875, -0.13014602661132812, 0.5544281005859375, 0.03420829772949219, -0.18546295166015625, -0.05843353271484375, 0.10013580322265625, 0.249420166015625, -0.0831451416015625, -0.064605712890625, 0.30287933349609375, -0.4308319091796875, -0.013885498046875, -0.226409912109375, -1.1244125366210938, 0.11289596557617188, 0.1400318145751953, 0.220428466796875, 0.0478973388671875, 0.16088485717773438, 0.2044830322265625, -0.1663665771484375, 0.09785079956054688, -0.36504364013671875, -0.26180267333984375, -0.23531723022460938, 0.32506370544433594, -0.16782760620117188, -0.2310943603515625, 0.0711822509765625, 0.08696746826171875, -0.0668792724609375, 0.2017364501953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000011.npy"}
{"epoch": 0.016628873771730914, "step": 12, "batch_size": 64, "mean": 0.021361559629440308, "std": 0.3354218006134033, "min": -1.3328857421875, "p10": -0.2878570556640625, "median": 0.05203533172607422, "p90": 0.37149639129638673, "max": 0.7115325927734375, "pos_frac": 0.609375, "sample": [0.19449234008789062, 0.28658294677734375, 0.13024139404296875, 0.07735252380371094, 0.08687019348144531, 0.5964126586914062, -0.025110244750976562, 0.2997894287109375, 0.1884899139404297, -0.000370025634765625, -0.28256988525390625, 0.08166885375976562, -0.11256790161132812, 0.34442138671875, 0.3867950439453125, -0.2694854736328125, 0.100311279296875, 0.188140869140625, -0.25229644775390625, 0.350433349609375, 0.029445648193359375, 0.037445068359375, -0.2375946044921875, 0.7115325927734375, -0.026393890380859375, -0.29012298583984375, -0.49445343017578125, -0.2153778076171875, -0.0304718017578125, 0.1391887664794922, -0.17783355712890625, -0.25518798828125, -0.27710723876953125, -0.4993896484375, -0.17305946350097656, 0.13396263122558594, -0.492523193359375, 0.4839324951171875, 0.14351654052734375, -0.218963623046875, -0.013031005859375, 0.04442405700683594, 0.1923828125, 0.3187599182128906, 0.0538482666015625, 0.39512062072753906, 0.0205230712890625, -0.13285446166992188, 0.04198455810546875, 0.07340240478515625, 0.3745765686035156, 0.05022239685058594, 0.36430931091308594, -1.3328857421875, 0.58331298828125, 0.24337005615234375, -0.9091320037841797, 0.22081756591796875, -0.18560791015625, -0.3263702392578125, 0.3447265625, 0.06759262084960938, 0.1737384796142578, 0.04376220703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000012.npy"}
{"epoch": 0.018140589569160998, "step": 13, "batch_size": 64, "mean": -0.04561561346054077, "std": 0.2914823591709137, "min": -0.957977294921875, "p10": -0.39459800720214844, "median": -0.008701324462890625, "p90": 0.27767715454101577, "max": 0.43566131591796875, "pos_frac": 0.484375, "sample": [-0.720306396484375, 0.43566131591796875, -0.957977294921875, -0.020656585693359375, 0.111328125, -0.4611473083496094, 0.17798614501953125, 0.09184074401855469, 0.14420318603515625, -0.10233306884765625, 0.19356918334960938, -0.07869148254394531, 0.29216766357421875, 0.03630828857421875, 0.13041305541992188, 0.21956634521484375, 0.17291259765625, 0.02246856689453125, 0.015899658203125, 0.23293304443359375, -0.1724853515625, -0.380767822265625, 0.4141387939453125, -0.2209625244140625, -0.11192703247070312, 0.30199432373046875, 0.04471588134765625, 0.06133270263671875, -0.0010223388671875, -0.48496246337890625, 0.09547996520996094, -0.01638031005859375, 0.3939323425292969, -0.6936874389648438, 0.04683685302734375, 0.06241416931152344, 0.17645263671875, -0.21905517578125, 0.243865966796875, -0.0168914794921875, 0.09151840209960938, -0.0552215576171875, -0.15367889404296875, -0.2659740447998047, -0.10073089599609375, 0.1395111083984375, -0.1180419921875, 0.1557464599609375, -0.3841438293457031, -0.35042572021484375, -0.12078094482421875, -0.03426361083984375, -0.3166484832763672, -0.06760406494140625, 0.38848876953125, -0.8679733276367188, -0.399078369140625, 0.13870620727539062, -0.111785888671875, 0.33917999267578125, 0.072418212890625, -0.212493896484375, -0.053775787353515625, -0.09151458740234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000013.npy"}
{"epoch": 0.019652305366591082, "step": 14, "batch_size": 64, "mean": 0.06049656867980957, "std": 0.28938019275665283, "min": -0.5569839477539062, "p10": -0.2359485626220703, "median": 0.020934104919433594, "p90": 0.38863048553466817, "max": 1.00146484375, "pos_frac": 0.546875, "sample": [0.035266876220703125, -0.200103759765625, -0.003173828125, 0.27585601806640625, 0.1309967041015625, 0.16921424865722656, 0.187042236328125, 0.34466552734375, -0.15793609619140625, -0.1270580291748047, 0.2693939208984375, 0.19226455688476562, 0.2640495300292969, 0.20479965209960938, 0.22180938720703125, -0.012796401977539062, -0.0023822784423828125, -0.24076080322265625, -0.20185089111328125, -0.0666961669921875, -0.5222911834716797, -0.4398155212402344, 0.0775299072265625, -0.0354156494140625, 0.10553741455078125, -0.27822113037109375, 0.30859375, -0.03206634521484375, 0.040645599365234375, 0.6548690795898438, 0.02033233642578125, 1.00146484375, -0.05794525146484375, 0.750946044921875, 0.04096794128417969, 0.2998046875, 0.41226959228515625, -0.20288848876953125, 0.10829353332519531, 0.46135711669921875, -0.280242919921875, 0.4074726104736328, 0.3140830993652344, 0.774658203125, -0.13077926635742188, -0.06311607360839844, 0.00234222412109375, 0.16083526611328125, 0.159271240234375, 0.011114120483398438, -0.3472480773925781, -0.22472000122070312, 0.10056686401367188, -0.5569839477539062, 0.10286712646484375, -0.193206787109375, 0.22251129150390625, -0.13763999938964844, -0.04114532470703125, -0.06002044677734375, 0.021535873413085938, -0.14642333984375, -0.11742782592773438, -0.10309219360351562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000014.npy"}
{"epoch": 0.021164021164021163, "step": 15, "batch_size": 64, "mean": -0.011380106210708618, "std": 0.3859577775001526, "min": -1.2012481689453125, "p10": -0.4395759582519531, "median": 0.030877113342285156, "p90": 0.41065406799316423, "max": 1.1129608154296875, "pos_frac": 0.53125, "sample": [0.19730567932128906, -0.4391326904296875, 0.07686233520507812, 0.0011615753173828125, -0.3973350524902344, 0.49506378173828125, -0.4850006103515625, -0.14368438720703125, -0.203399658203125, -0.9862518310546875, 0.2729949951171875, -0.2434978485107422, -0.6990203857421875, -0.04146003723144531, 0.013824462890625, 0.20810508728027344, 1.1129608154296875, 0.2430267333984375, 0.21606826782226562, -0.8134403228759766, 0.5568923950195312, -0.08134078979492188, -0.40569305419921875, -0.156097412109375, 0.04792976379394531, 0.4766387939453125, 0.31359100341796875, 0.1900634765625, 0.5720062255859375, -0.1389312744140625, 0.052249908447265625, -0.09261512756347656, -0.1654205322265625, -0.390106201171875, 0.4295310974121094, -0.3335533142089844, 0.1313934326171875, 0.17266845703125, -0.24275779724121094, 0.2524566650390625, -0.0684814453125, -0.1443328857421875, -0.43976593017578125, 0.47908782958984375, 0.12455177307128906, -0.4925537109375, 0.09658050537109375, -0.23095321655273438, 0.14312744140625, -1.2012481689453125, -0.3614349365234375, -0.00676727294921875, 0.10559463500976562, -0.0485992431640625, -0.2533283233642578, 0.29933929443359375, 0.3636322021484375, 0.17746353149414062, 0.12242889404296875, 0.18489456176757812, 0.366607666015625, 0.33058929443359375, 0.28343963623046875, -0.13225555419921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000015.npy"}
{"epoch": 0.022675736961451247, "step": 16, "batch_size": 64, "mean": 0.0010768026113510132, "std": 0.31015869975090027, "min": -0.971099853515625, "p10": -0.3512943267822265, "median": -0.01439666748046875, "p90": 0.2941436767578125, "max": 0.920928955078125, "pos_frac": 0.484375, "sample": [-0.10616302490234375, 0.2949104309082031, -0.27339935302734375, 0.12053489685058594, 0.4454803466796875, 0.045253753662109375, 0.2155437469482422, -0.42458343505859375, 0.254608154296875, -0.1666259765625, -0.21691131591796875, 0.619171142578125, 0.15197372436523438, -0.10618209838867188, -0.09160041809082031, -0.3892021179199219, 0.146820068359375, -0.09868812561035156, -0.21393585205078125, -0.244293212890625, 0.6605987548828125, -0.22881317138671875, 0.1816558837890625, 0.06284332275390625, 0.09799385070800781, -0.42340087890625, 0.16159439086914062, 0.07468795776367188, -0.06599235534667969, -0.06735992431640625, -0.260589599609375, 0.22208404541015625, 0.01995086669921875, -0.0634002685546875, 0.000518798828125, 0.069732666015625, 0.823150634765625, -0.0172882080078125, 0.038120269775390625, -0.20799636840820312, -0.20825767517089844, 0.238067626953125, -0.038990020751953125, 0.2812080383300781, 0.920928955078125, 0.24901390075683594, 0.2923545837402344, -0.26151275634765625, 0.10219573974609375, -0.1661205291748047, 0.04210853576660156, -0.06179618835449219, 0.014016151428222656, -0.3846778869628906, -0.4320411682128906, -0.042293548583984375, -0.593475341796875, -0.971099853515625, -0.0932464599609375, -0.011505126953125, -0.14659690856933594, -0.09503746032714844, 0.3496551513671875, 0.045215606689453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000016.npy"}
{"epoch": 0.02418745275888133, "step": 17, "batch_size": 64, "mean": -0.009143710136413574, "std": 0.3372185230255127, "min": -1.0370025634765625, "p10": -0.4068038940429687, "median": 0.012842178344726562, "p90": 0.342927360534668, "max": 1.07794189453125, "pos_frac": 0.515625, "sample": [0.09704208374023438, 0.059722900390625, 0.012775421142578125, -0.13074493408203125, -0.310455322265625, 0.267242431640625, -0.7406234741210938, 0.12143135070800781, 0.13008880615234375, -0.21132659912109375, -0.020111083984375, 0.28710174560546875, -0.212738037109375, -0.11383056640625, -0.1782989501953125, 0.220245361328125, -0.22315216064453125, -0.26123809814453125, 0.12171173095703125, -0.21149444580078125, 0.24721527099609375, 0.4344444274902344, -0.67919921875, 0.16378021240234375, 0.0546875, 0.33826637268066406, 0.40781402587890625, -0.03285408020019531, -0.0935516357421875, -0.17052841186523438, -0.3638763427734375, -0.5020980834960938, 0.13875579833984375, 0.07423782348632812, 0.7693405151367188, 0.1549072265625, 0.14165115356445312, 0.28069305419921875, -0.15601348876953125, -0.62750244140625, -0.0172576904296875, 0.04566192626953125, 0.2600593566894531, -0.00864410400390625, 0.11427116394042969, 0.05448150634765625, -0.425201416015625, 0.012908935546875, 0.45510101318359375, 1.07794189453125, -0.14070892333984375, -1.0370025634765625, -0.28881072998046875, -0.0008831024169921875, 0.3449249267578125, -0.3057708740234375, 0.19990158081054688, 0.05034637451171875, -0.4810791015625, -0.03311920166015625, 0.3773231506347656, 0.03033447265625, -0.10246849060058594, -0.051025390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000017.npy"}
{"epoch": 0.025699168556311415, "step": 18, "batch_size": 64, "mean": 0.03949823975563049, "std": 0.27052420377731323, "min": -0.5922431945800781, "p10": -0.3345634460449219, "median": 0.06013298034667969, "p90": 0.38320999145507817, "max": 0.5912551879882812, "pos_frac": 0.59375, "sample": [-0.032855987548828125, 0.3489494323730469, 0.091705322265625, 0.060306549072265625, 0.2509174346923828, -0.32192230224609375, 0.11693954467773438, 0.5896759033203125, 0.5912551879882812, 0.27896881103515625, 0.2956523895263672, -0.053466796875, -0.3065319061279297, 0.16501617431640625, -0.1374359130859375, -0.08557891845703125, 0.1332378387451172, -0.0478668212890625, 0.22343826293945312, 0.2670745849609375, 0.049793243408203125, 0.40770721435546875, 0.05995941162109375, 0.11037445068359375, -0.2299633026123047, -0.10473442077636719, 0.20076751708984375, 0.43395233154296875, 0.3878288269042969, 0.23174285888671875, 0.31867218017578125, -0.37224578857421875, -0.2029571533203125, -0.2155914306640625, -0.3399810791015625, -0.15441131591796875, 0.00826263427734375, -0.2281627655029297, 0.0079345703125, -0.052947998046875, 0.02651214599609375, 0.2064056396484375, -0.47924041748046875, 0.4293212890625, -0.5857658386230469, 0.403778076171875, 0.20843887329101562, -0.08064460754394531, -0.5922431945800781, -0.221832275390625, 0.07051849365234375, -0.011426925659179688, -0.36240577697753906, -0.38498687744140625, 0.21256637573242188, -0.1590728759765625, 0.16231536865234375, 0.1973857879638672, 0.13018798828125, 0.21429061889648438, 0.01239776611328125, -0.18639373779296875, 0.20186996459960938, 0.3724327087402344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000018.npy"}
{"epoch": 0.027210884353741496, "step": 19, "batch_size": 64, "mean": 0.016053855419158936, "std": 0.3059896230697632, "min": -1.15472412109375, "p10": -0.2466094970703125, "median": 0.002857208251953125, "p90": 0.4140098571777344, "max": 0.7497100830078125, "pos_frac": 0.515625, "sample": [0.6251983642578125, 0.16788482666015625, -0.02944183349609375, -0.11883544921875, 0.002777099609375, 0.2617034912109375, -0.1152801513671875, 0.01906585693359375, -0.5245513916015625, 0.0637969970703125, -0.0596466064453125, 0.18367767333984375, 0.2489013671875, 0.13277244567871094, 0.08426666259765625, -0.2157726287841797, 0.046844482421875, -0.0106048583984375, 0.00293731689453125, 0.027860641479492188, 0.41985321044921875, -0.5110015869140625, -0.1984100341796875, -0.137176513671875, 0.5185394287109375, 0.4408111572265625, 0.04146575927734375, -0.21625518798828125, -0.499053955078125, -0.029895782470703125, 0.3214263916015625, -0.24848175048828125, 0.1166229248046875, 0.18889236450195312, -1.15472412109375, -0.5581207275390625, -0.073699951171875, 0.04221343994140625, 0.623260498046875, -0.061450958251953125, 0.028839111328125, -0.05239677429199219, 0.4003753662109375, -0.038974761962890625, -0.05255889892578125, 0.023752212524414062, -0.1746826171875, -0.1164398193359375, 0.021120071411132812, -0.00434112548828125, 0.7497100830078125, 0.11065483093261719, 0.3225860595703125, -0.02196502685546875, -0.0084686279296875, 0.00763702392578125, -0.3741607666015625, -0.09616851806640625, -0.24224090576171875, 0.2524871826171875, -0.19660377502441406, 0.20322418212890625, 0.5259246826171875, -0.058231353759765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000019.npy"}
{"epoch": 0.02872260015117158, "step": 20, "batch_size": 64, "mean": 0.027033761143684387, "std": 0.3421015441417694, "min": -0.87542724609375, "p10": -0.4256912231445312, "median": 0.049983978271484375, "p90": 0.4908309936523438, "max": 1.06060791015625, "pos_frac": 0.546875, "sample": [-0.1725006103515625, -0.16119384765625, -0.11578369140625, 0.047576904296875, 0.0731353759765625, -0.57550048828125, 0.16681671142578125, 0.538421630859375, 0.2003173828125, -0.24175262451171875, -0.11540985107421875, -0.031002044677734375, -0.45589447021484375, -0.05378913879394531, 0.256866455078125, -0.264923095703125, -0.29032135009765625, -0.04294586181640625, 0.070648193359375, -0.462677001953125, 0.15477752685546875, -0.14884185791015625, 0.0161590576171875, 0.5605316162109375, 0.18089675903320312, 0.49356842041015625, 0.1472644805908203, -0.2584800720214844, 0.23834228515625, 0.05441570281982422, 0.149749755859375, -0.7887725830078125, -0.06085968017578125, -0.049530029296875, -0.53485107421875, -0.029205322265625, 0.24929428100585938, 0.11669921875, 0.2731819152832031, 0.10357666015625, 0.18982315063476562, -0.1116790771484375, 0.25385284423828125, -0.87542724609375, 0.501678466796875, 1.06060791015625, 0.4076042175292969, -0.456451416015625, 0.26995849609375, 0.31752777099609375, -0.35521697998046875, 0.5606842041015625, 0.55267333984375, 0.48444366455078125, 0.05239105224609375, -0.0927581787109375, 0.1191558837890625, 0.288177490234375, -0.319580078125, 0.19197654724121094, -0.06916999816894531, -0.29277801513671875, -0.1983051300048828, 0.012966156005859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000020.npy"}
{"epoch": 0.030234315948601664, "step": 21, "batch_size": 64, "mean": -0.01569044589996338, "std": 0.32274094223976135, "min": -1.086212158203125, "p10": -0.35239181518554685, "median": -0.05365276336669922, "p90": 0.39322052001953134, "max": 0.8892974853515625, "pos_frac": 0.421875, "sample": [0.030248641967773438, 0.1945629119873047, 0.2158184051513672, -0.473724365234375, 0.4012908935546875, -0.223236083984375, 0.4144096374511719, -0.1545886993408203, 0.06174468994140625, -0.1355419158935547, 0.3145294189453125, -0.0213470458984375, -0.2881317138671875, 0.0880279541015625, -0.11843490600585938, 0.23280715942382812, -0.2581329345703125, 0.6845474243164062, -0.30693817138671875, -0.1353626251220703, -0.36438751220703125, 0.15001487731933594, -0.13673973083496094, 0.297393798828125, -0.2032012939453125, -1.086212158203125, -0.10031509399414062, 0.26737213134765625, 0.7810745239257812, 0.11626815795898438, -0.3819580078125, -0.22333908081054688, -0.08782958984375, -0.06106758117675781, -0.5200653076171875, 0.3743896484375, 0.8892974853515625, -0.061737060546875, -0.4802398681640625, -0.14228248596191406, 0.4953765869140625, -0.3899078369140625, -0.175445556640625, 0.08621978759765625, 0.1269683837890625, -0.1813640594482422, 0.1191253662109375, 0.4610099792480469, -0.310302734375, -0.036502838134765625, 0.007114410400390625, 0.245513916015625, -0.030637741088867188, -0.046237945556640625, -0.135650634765625, 0.18926239013671875, 0.016448974609375, -0.14329910278320312, -0.025056838989257812, -0.32440185546875, -0.16681671142578125, 0.0936737060546875, -0.25774383544921875, -0.1705188751220703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000021.npy"}
{"epoch": 0.031746031746031744, "step": 22, "batch_size": 64, "mean": 0.03929010033607483, "std": 0.2886139750480652, "min": -0.90948486328125, "p10": -0.3433761596679687, "median": 0.0678243637084961, "p90": 0.372943115234375, "max": 0.7183837890625, "pos_frac": 0.59375, "sample": [-0.053375244140625, -0.36228179931640625, -0.29926300048828125, 0.24182510375976562, 0.16965293884277344, 0.1762237548828125, -0.04651451110839844, -0.3781890869140625, 0.06669044494628906, 0.1595172882080078, 0.4859580993652344, -0.1261444091796875, -0.11676979064941406, 0.058910369873046875, -0.20584487915039062, -0.26851654052734375, -0.069793701171875, 0.049121856689453125, -0.11110687255859375, -0.5592727661132812, 0.2726593017578125, 0.7183837890625, 0.022796630859375, 0.07493782043457031, -0.16829299926757812, -0.0061798095703125, 0.23608016967773438, 0.0573577880859375, -0.23224639892578125, 0.06895828247070312, 0.6958770751953125, -0.4134521484375, 0.07675933837890625, -0.038639068603515625, 0.1711292266845703, -0.142425537109375, 0.005107879638671875, 0.1262054443359375, -0.405181884765625, -0.2046356201171875, 0.3691139221191406, -0.90948486328125, -0.18195343017578125, 0.17376708984375, -0.13808441162109375, 0.23284912109375, 0.3745841979980469, 0.493499755859375, 0.4486846923828125, 0.467529296875, -0.4140625, 0.346832275390625, 0.18658447265625, 0.107421875, 0.2198028564453125, 0.15625762939453125, 0.11093711853027344, 0.1878070831298828, 0.1045074462890625, 0.221099853515625, 0.2671356201171875, -0.001674652099609375, 0.09682464599609375, -0.131439208984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000022.npy"}
{"epoch": 0.03325774754346183, "step": 23, "batch_size": 64, "mean": -0.004443138837814331, "std": 0.3037569224834442, "min": -0.967620849609375, "p10": -0.3233142852783203, "median": 0.0031061172485351562, "p90": 0.26126708984375013, "max": 0.894012451171875, "pos_frac": 0.515625, "sample": [-0.967620849609375, 0.21051025390625, 0.0495452880859375, 0.18426513671875, -0.14286041259765625, 0.09112167358398438, 0.13350677490234375, -0.580078125, 0.09850311279296875, 0.02093505859375, 0.1197662353515625, -0.1641254425048828, 0.04041290283203125, 0.06464004516601562, -0.05751991271972656, 0.050994873046875, -0.3479461669921875, 0.18751907348632812, -0.13304901123046875, 0.4358673095703125, 0.2742767333984375, 0.03168678283691406, -0.2705574035644531, -0.3411293029785156, -0.3922767639160156, -0.1598663330078125, 0.00318145751953125, 0.0030307769775390625, 0.894012451171875, -0.17547607421875, -0.2680816650390625, -0.09547805786132812, 0.05811309814453125, -0.03779792785644531, 0.59808349609375, -0.1664581298828125, 0.0540618896484375, 0.6912841796875, 0.01451873779296875, -0.28174591064453125, 0.026641845703125, -0.0475311279296875, -0.14653396606445312, 0.0305633544921875, 0.10048675537109375, -0.1173553466796875, 0.039012908935546875, -0.014072418212890625, -0.16127777099609375, 0.1288299560546875, -0.455230712890625, -0.07451820373535156, -0.12453842163085938, -0.015027999877929688, 0.6683464050292969, -0.050922393798828125, 0.2309112548828125, -0.5456809997558594, 0.031665802001953125, -0.09804534912109375, 0.753265380859375, -0.21303939819335938, -0.09236526489257812, 0.13428497314453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000023.npy"}
{"epoch": 0.03476946334089191, "step": 24, "batch_size": 64, "mean": -0.04989221692085266, "std": 0.27880898118019104, "min": -0.6769561767578125, "p10": -0.35269622802734374, "median": -0.0250244140625, "p90": 0.2799591064453126, "max": 0.958282470703125, "pos_frac": 0.453125, "sample": [-0.433380126953125, -0.075958251953125, -0.39391326904296875, 0.10304832458496094, 0.11495590209960938, -0.16057586669921875, 0.04259490966796875, 0.0062408447265625, -0.21840667724609375, -0.6769561767578125, -0.011827468872070312, -0.2340087890625, -0.17845726013183594, 0.3997039794921875, 0.33383941650390625, 0.085357666015625, 0.14225387573242188, -0.1867542266845703, -0.0739288330078125, 0.025056838989257812, -0.33379364013671875, 0.5284652709960938, 0.062835693359375, -0.2755317687988281, 0.958282470703125, -0.2761039733886719, 0.2625541687011719, -0.16894149780273438, -0.63916015625, 0.1329803466796875, -0.06347846984863281, -0.20183753967285156, -0.5218238830566406, -0.04288673400878906, 0.16217803955078125, 0.1265869140625, 0.046794891357421875, -0.2071857452392578, -0.10567474365234375, -0.03360748291015625, 0.384124755859375, 0.002964019775390625, 0.2874183654785156, 0.09068489074707031, -0.0118408203125, -0.0948486328125, 0.1516265869140625, -0.29156494140625, -0.01644134521484375, 0.09429931640625, -0.16039276123046875, 0.082733154296875, 0.03170013427734375, -0.24699020385742188, 0.005218505859375, 0.00179290771484375, 0.050266265869140625, -0.348541259765625, -0.3544769287109375, -0.2808494567871094, -0.12892913818359375, -0.6554946899414062, 0.363067626953125, -0.1681652069091797], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000024.npy"}
{"epoch": 0.036281179138321996, "step": 25, "batch_size": 64, "mean": 0.036393433809280396, "std": 0.29313018918037415, "min": -0.79095458984375, "p10": -0.2517902374267578, "median": 0.02452850341796875, "p90": 0.39915008544921887, "max": 0.73663330078125, "pos_frac": 0.546875, "sample": [0.0885772705078125, -0.6114501953125, 0.24055099487304688, -0.13794708251953125, 0.08360862731933594, -0.4142951965332031, 0.323394775390625, 0.10990333557128906, 0.412567138671875, 0.18074417114257812, -0.12030792236328125, -0.06442070007324219, -0.1713104248046875, 0.0268707275390625, 0.0501251220703125, -0.01522064208984375, -0.1878204345703125, 0.27970123291015625, -0.13062667846679688, 0.4328765869140625, -0.2459716796875, 0.059490203857421875, -0.34659576416015625, 0.3678436279296875, 0.3095703125, -0.10898017883300781, -0.11408615112304688, 0.603668212890625, -0.12377548217773438, 0.03826141357421875, 0.12590789794921875, -0.030416488647460938, 0.3236846923828125, 0.11077499389648438, 0.73663330078125, -0.06665420532226562, -0.1708526611328125, -0.2542839050292969, 0.10686492919921875, 0.1820220947265625, -0.79095458984375, 0.12493515014648438, 0.12502479553222656, -0.07271003723144531, 0.22110366821289062, 0.01691436767578125, 0.6826133728027344, -0.21490859985351562, 0.022186279296875, -0.050140380859375, -0.20001983642578125, -0.08066749572753906, 0.7010421752929688, 0.05994415283203125, -0.37964630126953125, 0.007925033569335938, 0.5868148803710938, 0.24930572509765625, 0.0606842041015625, -0.21390151977539062, 0.21710968017578125, -0.424285888671875, -0.1322479248046875, -0.0655670166015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000025.npy"}
{"epoch": 0.03779289493575208, "step": 26, "batch_size": 64, "mean": 0.009161293506622314, "std": 0.30446335673332214, "min": -0.5668487548828125, "p10": -0.34180526733398436, "median": -0.01815032958984375, "p90": 0.38213958740234377, "max": 0.8609619140625, "pos_frac": 0.453125, "sample": [0.20812606811523438, -0.49053955078125, -0.1519622802734375, -0.03167724609375, 0.4215240478515625, -0.07061958312988281, 0.3502044677734375, 0.38320159912109375, -0.22834014892578125, 0.3342552185058594, 0.038364410400390625, 0.8609619140625, -0.21990966796875, -0.17724609375, -0.2493133544921875, -0.17385101318359375, 0.31046295166015625, 0.466827392578125, 0.34095001220703125, -0.017879486083984375, 0.0918426513671875, -0.29813194274902344, -0.12529373168945312, -0.3741340637207031, -0.4734039306640625, -0.22695159912109375, 0.141265869140625, -0.15053367614746094, -0.053493499755859375, -0.2453765869140625, -0.018421173095703125, 0.8433570861816406, -0.34444427490234375, 0.37966156005859375, -0.5668487548828125, 0.1174774169921875, 0.21334075927734375, 0.25583839416503906, 0.047943115234375, -0.133697509765625, 0.07668685913085938, 0.00873565673828125, -0.3188304901123047, -0.3558807373046875, 0.623321533203125, -0.20305633544921875, -0.3723602294921875, 0.15911865234375, -0.3356475830078125, 0.17078208923339844, 0.49109649658203125, -0.04443359375, 0.15647125244140625, -0.005279541015625, -0.1792469024658203, -0.2524833679199219, 0.0008087158203125, -0.2194347381591797, 0.35540008544921875, -0.08681488037109375, -0.22919464111328125, -0.010011672973632812, 0.014696121215820312, 0.15834426879882812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000026.npy"}
{"epoch": 0.039304610733182165, "step": 27, "batch_size": 64, "mean": -0.011037617921829224, "std": 0.3380962312221527, "min": -1.022735595703125, "p10": -0.3150550842285156, "median": -0.001239776611328125, "p90": 0.3688827514648438, "max": 1.0026397705078125, "pos_frac": 0.5, "sample": [0.099151611328125, 0.21967315673828125, 0.0204010009765625, -0.06823348999023438, 0.09225845336914062, 0.1939544677734375, -0.26621246337890625, -0.0284271240234375, -0.19045257568359375, 0.0692901611328125, -0.2331390380859375, -1.022735595703125, 0.21559715270996094, 0.3671417236328125, -0.11895942687988281, 0.09379196166992188, 0.1318340301513672, -0.5625, -0.3248443603515625, -0.46307373046875, 0.458282470703125, 0.36962890625, -0.8669586181640625, -0.27921295166015625, 0.42943572998046875, 0.3562164306640625, -0.0814971923828125, -0.040744781494140625, 0.6326828002929688, -0.1520709991455078, -0.29221343994140625, 0.22621726989746094, -0.009145736694335938, 0.13657379150390625, 0.3907299041748047, -0.8478775024414062, 0.3437042236328125, 1.0026397705078125, 0.35335540771484375, -0.12049484252929688, 0.1692962646484375, -0.5083160400390625, 0.1972637176513672, 0.11712646484375, -0.18408584594726562, 0.06018829345703125, -0.09618377685546875, 0.4250450134277344, 0.11274337768554688, 0.10085296630859375, -0.07709503173828125, -0.0582733154296875, 0.0066661834716796875, -0.1256256103515625, 0.135833740234375, -0.16274070739746094, -0.141448974609375, -0.27674293518066406, -0.063568115234375, -0.2819175720214844, 0.017330169677734375, 0.0709686279296875, -0.09512710571289062, -0.2823638916015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000027.npy"}
{"epoch": 0.04081632653061224, "step": 28, "batch_size": 64, "mean": -0.004308879375457764, "std": 0.3192167282104492, "min": -0.782562255859375, "p10": -0.4066864013671875, "median": 0.026132583618164062, "p90": 0.4701057434082032, "max": 0.6660232543945312, "pos_frac": 0.515625, "sample": [0.04566192626953125, -0.782562255859375, -0.16766357421875, -0.3477325439453125, -0.1732940673828125, -0.009174346923828125, 0.2249298095703125, 0.30843353271484375, 0.2767333984375, 0.60211181640625, -0.14214706420898438, 0.4798583984375, -0.5790634155273438, -0.48412322998046875, -0.4503173828125, -0.1728363037109375, -0.2508506774902344, -0.41697120666503906, -0.0148773193359375, 0.14220428466796875, 0.4929046630859375, -0.14763259887695312, 0.03582191467285156, -0.245849609375, 0.15372848510742188, 0.11016082763671875, -0.25576019287109375, -0.011875152587890625, -0.6761207580566406, -0.01782989501953125, -0.24445343017578125, -0.3458213806152344, 0.17681503295898438, 0.6660232543945312, -0.27561187744140625, 0.12356185913085938, 0.045352935791015625, -0.08684539794921875, -0.3005638122558594, 0.20062255859375, 0.0877227783203125, -0.39800262451171875, -0.087677001953125, 0.5033664703369141, 0.0755157470703125, 0.016443252563476562, 0.10876655578613281, -0.2832794189453125, 0.04976463317871094, 0.22880935668945312, 0.26509857177734375, 0.44734954833984375, 0.29858970642089844, 0.19818115234375, 0.523193359375, 0.3827667236328125, -0.15143203735351562, -0.41040802001953125, 0.22553253173828125, -0.2983112335205078, 0.132537841796875, 0.49059104919433594, 0.09173393249511719, -0.257568359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000028.npy"}
{"epoch": 0.042328042328042326, "step": 29, "batch_size": 64, "mean": 0.02456343173980713, "std": 0.3357960283756256, "min": -0.8926811218261719, "p10": -0.37100677490234374, "median": 0.03179454803466797, "p90": 0.37614688873291013, "max": 0.6882781982421875, "pos_frac": 0.515625, "sample": [-0.016422271728515625, -0.3518257141113281, 0.6332569122314453, -0.45613861083984375, 0.6882781982421875, 0.026319503784179688, -0.08837890625, 0.1541004180908203, -0.7481460571289062, 0.08953857421875, 0.10653305053710938, 0.3752403259277344, 0.32276153564453125, -0.051631927490234375, 0.22955703735351562, 0.34261322021484375, -0.2894287109375, -0.18672561645507812, -0.3690948486328125, 0.17047882080078125, -0.11652374267578125, -0.060577392578125, 0.2769012451171875, -0.1259479522705078, -0.6032943725585938, 0.1188812255859375, -0.0031585693359375, -0.8926811218261719, 0.6677398681640625, 0.33922576904296875, 0.27680206298828125, -0.3649139404296875, 0.27392578125, 0.3203125, -0.0501708984375, 0.12202644348144531, 0.19488525390625, 0.1037445068359375, 0.37653541564941406, 0.2109375, -0.1212158203125, 0.052768707275390625, -0.07043647766113281, -0.015903472900390625, -0.011487960815429688, -0.25095367431640625, -0.18820953369140625, -0.42120361328125, -0.07083892822265625, -0.004352569580078125, 0.543792724609375, -0.19020843505859375, 0.4430084228515625, 0.06991767883300781, 0.223663330078125, 0.03726959228515625, -0.5994338989257812, -0.371826171875, -0.23748016357421875, 0.2484130859375, 0.6766510009765625, -0.3561553955078125, 0.18358421325683594, 0.3571624755859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000029.npy"}
{"epoch": 0.04383975812547241, "step": 30, "batch_size": 64, "mean": 0.007722735404968262, "std": 0.32882797718048096, "min": -1.696044921875, "p10": -0.3387298583984375, "median": 0.060466766357421875, "p90": 0.29201412200927734, "max": 0.853118896484375, "pos_frac": 0.59375, "sample": [-0.04209327697753906, 0.12079620361328125, 0.06540679931640625, -0.14459991455078125, 0.05222892761230469, 0.19966888427734375, -0.0017757415771484375, 0.216583251953125, -0.6003189086914062, 0.1153106689453125, -0.17541885375976562, 0.027740478515625, -0.27327728271484375, 0.07083892822265625, -0.19699859619140625, 0.2942657470703125, -0.155914306640625, 0.853118896484375, -0.42116546630859375, 0.3271636962890625, 0.11437225341796875, -0.013645172119140625, 0.13295745849609375, 0.04215812683105469, 0.4086761474609375, 0.114990234375, -0.3098487854003906, -0.3400917053222656, 0.1710357666015625, -0.08661651611328125, 0.1790924072265625, -0.10599899291992188, 0.18958282470703125, 0.21865081787109375, 0.17547607421875, 0.089324951171875, 0.43587493896484375, -0.010713577270507812, 0.34873199462890625, -0.12239646911621094, -0.13322067260742188, 0.2673301696777344, 0.24676513671875, 0.16976165771484375, 0.052532196044921875, 0.24637603759765625, -0.3624725341796875, -0.08557510375976562, 0.32039642333984375, -0.566070556640625, -0.44292449951171875, 0.17010498046875, 0.11680221557617188, 0.17554473876953125, -0.05558013916015625, 0.06482696533203125, -0.0849151611328125, 0.0561065673828125, -1.696044921875, -0.06530189514160156, 0.1555938720703125, -0.3355522155761719, 0.02983856201171875, 0.2867603302001953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000030.npy"}
{"epoch": 0.045351473922902494, "step": 31, "batch_size": 64, "mean": 0.08262395858764648, "std": 0.3422168791294098, "min": -0.780059814453125, "p10": -0.2664875030517578, "median": 0.021015167236328125, "p90": 0.5803369522094728, "max": 1.1230621337890625, "pos_frac": 0.515625, "sample": [-0.0176849365234375, 0.4427947998046875, -0.053924560546875, 0.25252532958984375, -0.11986732482910156, -0.053741455078125, 0.0685272216796875, -0.02324676513671875, -0.3311309814453125, 0.08786773681640625, 0.3170604705810547, -0.048980712890625, 0.5899105072021484, 0.90472412109375, 0.14398956298828125, 0.441558837890625, -0.09109306335449219, 0.21051025390625, 0.0828857421875, -0.11737060546875, 1.1230621337890625, 0.041751861572265625, 0.14629364013671875, -0.05098152160644531, 0.7840251922607422, 0.0440673828125, 0.1823577880859375, -0.2702789306640625, -0.24662017822265625, 0.31215667724609375, -0.0028228759765625, -0.21730804443359375, -0.1039886474609375, -0.16815185546875, 0.023160934448242188, -0.2032623291015625, -0.30089569091796875, -0.146514892578125, -0.23513412475585938, -0.15753936767578125, 0.33233642578125, -0.3275146484375, -0.017452239990234375, 0.546417236328125, 0.626007080078125, 0.4019279479980469, -0.2576408386230469, 0.03322029113769531, 0.10383987426757812, -0.12546539306640625, -0.780059814453125, -0.19799041748046875, -0.35626220703125, 0.018869400024414062, 0.7650146484375, -0.15217208862304688, 0.21899986267089844, 0.672882080078125, 0.024324417114257812, 0.09371185302734375, 0.5579986572265625, 0.18691444396972656, -0.041667938232421875, -0.2769966125488281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000031.npy"}
{"epoch": 0.04686318972033258, "step": 32, "batch_size": 64, "mean": 0.06868937611579895, "std": 0.32006749510765076, "min": -0.6210174560546875, "p10": -0.3243619918823242, "median": 0.07061767578125, "p90": 0.4965656280517581, "max": 0.8154983520507812, "pos_frac": 0.609375, "sample": [0.20035552978515625, 0.43784332275390625, -0.5482711791992188, 0.4360809326171875, -0.5877838134765625, 0.14923095703125, 0.2855224609375, 0.196319580078125, 0.551971435546875, 0.3752288818359375, 0.637481689453125, 0.5217323303222656, 0.3525047302246094, 0.14270401000976562, 0.5909061431884766, -0.181396484375, 0.0051116943359375, 0.055629730224609375, -0.37923431396484375, -0.005916595458984375, -0.09811592102050781, 0.010402679443359375, 0.3416748046875, 0.36176300048828125, -0.070343017578125, -0.05490303039550781, 0.536865234375, -0.103485107421875, -0.168548583984375, 0.1507568359375, 0.04641914367675781, 0.078369140625, -0.14327621459960938, 0.2000274658203125, 0.2044219970703125, -0.6210174560546875, -0.13439178466796875, -0.02922821044921875, 0.5576171875, 0.8154983520507812, 0.3960723876953125, 0.19148635864257812, -0.26071929931640625, 0.14383697509765625, 0.01387786865234375, 0.15625762939453125, -0.125701904296875, -0.3412666320800781, -0.5616989135742188, -0.23745155334472656, -0.24444580078125, 0.2107696533203125, -0.2651939392089844, -0.08210945129394531, 0.01326751708984375, -0.20895004272460938, -0.493988037109375, 0.37920188903808594, -0.28491783142089844, 0.12119865417480469, 0.09502792358398438, 0.422698974609375, 0.179473876953125, 0.0628662109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000032.npy"}
{"epoch": 0.04837490551776266, "step": 33, "batch_size": 64, "mean": 0.030031487345695496, "std": 0.3293270170688629, "min": -1.223541259765625, "p10": -0.32339019775390626, "median": 0.019992828369140625, "p90": 0.4032112121582032, "max": 0.8622207641601562, "pos_frac": 0.515625, "sample": [-0.0588226318359375, -0.323944091796875, -0.357818603515625, 0.1676177978515625, 0.10860633850097656, -0.15053558349609375, -0.5274734497070312, 0.21794891357421875, 0.6111907958984375, 0.2124786376953125, -0.10784912109375, 0.17205429077148438, -0.10575008392333984, 0.16523361206054688, 0.8622207641601562, -0.3731880187988281, -0.11767578125, 0.18900299072265625, 0.1399383544921875, 0.49359130859375, 0.38344573974609375, 0.37960052490234375, 0.3508453369140625, -0.05193328857421875, -0.6327972412109375, 0.437744140625, 0.00365447998046875, -0.014209747314453125, -0.2399444580078125, 0.168701171875, 0.2406597137451172, -0.06826972961425781, 0.4594459533691406, -0.015941619873046875, -0.27754783630371094, 0.15056610107421875, 0.18272972106933594, -0.2027587890625, 0.2162322998046875, -0.2040882110595703, 0.1038360595703125, -0.2081451416015625, -0.16202735900878906, -0.5253753662109375, 0.2493438720703125, 0.1222381591796875, 0.33699798583984375, -0.018444061279296875, -0.17522430419921875, -0.095001220703125, -0.14556121826171875, 0.2806549072265625, 0.21875572204589844, -0.100006103515625, -0.22117233276367188, -0.0677490234375, -0.10393524169921875, 0.18314361572265625, -1.223541259765625, -0.3220977783203125, 0.0363311767578125, 0.6062583923339844, 0.25809288024902344, 0.41168212890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000033.npy"}
{"epoch": 0.049886621315192746, "step": 34, "batch_size": 64, "mean": 0.020989298820495605, "std": 0.2988686263561249, "min": -0.6394424438476562, "p10": -0.28893585205078126, "median": 0.035137176513671875, "p90": 0.33001480102539066, "max": 0.7303466796875, "pos_frac": 0.515625, "sample": [-0.5044822692871094, 0.4367523193359375, 0.17413330078125, -0.18202972412109375, 0.3930320739746094, 0.19207000732421875, 0.08133506774902344, -0.20105361938476562, 0.11327934265136719, 0.19214820861816406, 0.30217742919921875, -0.141998291015625, -0.025909423828125, 0.332855224609375, -0.6124267578125, -0.03868865966796875, 0.7303466796875, -0.23882293701171875, 0.1283893585205078, 0.27927589416503906, 0.07003021240234375, -0.018037796020507812, 0.17217445373535156, -0.2733306884765625, -0.13335037231445312, -0.07928466796875, 0.32338714599609375, 0.7225837707519531, 0.20269775390625, -0.09899139404296875, -0.031673431396484375, 0.2288360595703125, -0.14264869689941406, 0.17641639709472656, -0.1944732666015625, -0.1414642333984375, -0.05065155029296875, 0.23221588134765625, -0.068389892578125, 0.5032310485839844, 0.17311859130859375, -0.2091827392578125, -0.10555076599121094, 0.095489501953125, 0.1463165283203125, 0.215576171875, -0.49266815185546875, -0.12356185913085938, 0.23390960693359375, 0.2301483154296875, 0.21332740783691406, -0.5353317260742188, 0.568450927734375, -0.6103363037109375, -0.23525238037109375, -0.295623779296875, 0.000244140625, -0.2146148681640625, -0.2406158447265625, 0.19754981994628906, 0.24025726318359375, -0.21889495849609375, 0.14034271240234375, -0.6394424438476562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000034.npy"}
{"epoch": 0.05139833711262283, "step": 35, "batch_size": 64, "mean": -0.023406386375427246, "std": 0.32984301447868347, "min": -0.5768566131591797, "p10": -0.42888412475585935, "median": -0.07332229614257812, "p90": 0.4520471572875977, "max": 1.2677001953125, "pos_frac": 0.375, "sample": [-0.0775299072265625, -0.3811683654785156, 0.45558738708496094, 0.17044830322265625, -0.43346405029296875, 0.7438201904296875, -0.1585235595703125, -0.18193817138671875, 0.1781158447265625, -0.5768566131591797, -0.1881103515625, -0.03551483154296875, 0.0072174072265625, -0.04054450988769531, -0.140869140625, 0.1917400360107422, 1.2677001953125, -0.15740966796875, -0.1283416748046875, 0.032093048095703125, 0.049854278564453125, 0.46157073974609375, 0.5182247161865234, -0.06911468505859375, -0.08258819580078125, -0.1827850341796875, -0.453826904296875, -0.10740280151367188, -0.0364227294921875, 0.10741424560546875, -0.4181976318359375, 0.45769500732421875, -0.440032958984375, -0.21559906005859375, -0.20584869384765625, -0.3661346435546875, -0.027631759643554688, -0.0865020751953125, 0.044010162353515625, -0.14207839965820312, -0.20331192016601562, 0.08530044555664062, 0.44378662109375, 0.2399749755859375, 0.08594894409179688, -0.0583648681640625, 0.35129547119140625, -0.23960113525390625, 0.0363006591796875, 0.013593673706054688, -0.1837139129638672, -0.2882881164550781, -0.557373046875, -0.16126251220703125, -0.1335601806640625, -0.12598037719726562, -0.31195068359375, -0.529998779296875, -0.038928985595703125, -0.0147857666015625, 0.4286651611328125, 0.5789947509765625, -0.4439048767089844, 0.1781005859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000035.npy"}
{"epoch": 0.05291005291005291, "step": 36, "batch_size": 64, "mean": 0.009759783744812012, "std": 0.3675188720226288, "min": -1.03826904296875, "p10": -0.4509994506835937, "median": 0.015218734741210938, "p90": 0.428424072265625, "max": 0.8246612548828125, "pos_frac": 0.546875, "sample": [-0.0808258056640625, 0.427825927734375, -0.2580718994140625, -0.07606315612792969, -0.28047752380371094, -0.24724197387695312, 0.6562728881835938, 0.3400115966796875, 0.1457080841064453, -0.43160247802734375, 0.22733306884765625, 0.1289825439453125, 0.428680419921875, -0.9069366455078125, 0.023458480834960938, -0.17556381225585938, 0.41538238525390625, -0.08139419555664062, 0.08603286743164062, 0.4147911071777344, 0.2520332336425781, 0.18310546875, -0.23128890991210938, 0.002086639404296875, -0.5887107849121094, -0.34177398681640625, -0.030353546142578125, 0.8237075805664062, 0.2723426818847656, -0.5896453857421875, -0.11854171752929688, -0.06359100341796875, -0.0922088623046875, 0.2086029052734375, -0.45931243896484375, 0.017436981201171875, -0.540618896484375, 0.01300048828125, -0.3887615203857422, -0.2688941955566406, 0.0272369384765625, 0.3267669677734375, 0.6245574951171875, -0.03164863586425781, 0.004344940185546875, 0.1290569305419922, 0.31340789794921875, 0.4658470153808594, 0.2371978759765625, 0.29405975341796875, 0.1800384521484375, -0.13191986083984375, -0.052364349365234375, 0.03818511962890625, -0.2989616394042969, -0.6134872436523438, 0.141082763671875, 0.4446563720703125, -1.03826904296875, -0.14203262329101562, 0.020944595336914062, 0.8246612548828125, -0.09421730041503906, 0.1405658721923828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000036.npy"}
{"epoch": 0.05442176870748299, "step": 37, "batch_size": 64, "mean": 0.09463286399841309, "std": 0.4485906660556793, "min": -1.42431640625, "p10": -0.4093341827392578, "median": 0.036972999572753906, "p90": 0.6363967895507813, "max": 1.67047119140625, "pos_frac": 0.53125, "sample": [0.3223876953125, 0.7772674560546875, 0.0487518310546875, 0.02545928955078125, 0.64984130859375, 0.2982215881347656, -0.040454864501953125, 0.033100128173828125, 0.1341533660888672, 0.6109046936035156, 0.63665771484375, 0.1347503662109375, -0.27252960205078125, 0.6357879638671875, -0.07193756103515625, -0.44500732421875, 0.3329601287841797, 0.3394927978515625, 0.444061279296875, 0.2783203125, -0.4363555908203125, -0.1856822967529297, -1.42431640625, -0.1448516845703125, -0.4143409729003906, 0.050144195556640625, -0.034969329833984375, -0.14110374450683594, -0.233978271484375, 0.06603240966796875, -0.1020050048828125, -0.39765167236328125, -0.06168365478515625, 0.310516357421875, 1.67047119140625, 0.75787353515625, 0.34348297119140625, -0.04914665222167969, 0.08855438232421875, -0.21941375732421875, 1.121246337890625, -0.05234527587890625, -0.3751564025878906, 0.12601470947265625, -0.46279144287109375, -0.2045440673828125, 0.061279296875, 0.40191650390625, -0.11175537109375, -0.55560302734375, 0.23082351684570312, -0.1244354248046875, -0.0020751953125, 0.04084587097167969, 0.6741714477539062, 0.2588844299316406, -0.107513427734375, 0.33527565002441406, -0.45654296875, -0.09363555908203125, 0.6136093139648438, -0.016571044921875, -0.18192481994628906, 0.623565673828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000037.npy"}
{"epoch": 0.055933484504913075, "step": 38, "batch_size": 64, "mean": 0.043681979179382324, "std": 0.4401227831840515, "min": -0.8385772705078125, "p10": -0.509844970703125, "median": 0.05363655090332031, "p90": 0.49779090881347665, "max": 1.307952880859375, "pos_frac": 0.578125, "sample": [-0.12404251098632812, 0.3489818572998047, 0.00730133056640625, 1.3041229248046875, -0.18024444580078125, 0.09782028198242188, 0.16610145568847656, 0.5048942565917969, -0.5357666015625, 0.4812164306640625, -0.13358688354492188, -0.004085540771484375, 0.26908302307128906, 0.3114280700683594, -0.4382171630859375, -0.32817840576171875, 0.03339385986328125, 0.26579856872558594, -0.26377105712890625, -0.06217193603515625, -0.8385772705078125, 0.66619873046875, -0.7483596801757812, -0.5434341430664062, 0.24298477172851562, 0.295745849609375, 0.376007080078125, -0.8288459777832031, 0.43029022216796875, -0.30818939208984375, -0.46962928771972656, -0.48016357421875, 0.06227874755859375, 0.31244659423828125, 0.7213973999023438, 0.42844390869140625, 0.13004302978515625, -0.5025177001953125, -0.07053756713867188, 0.00775909423828125, 0.4015159606933594, -0.17315673828125, -0.24088287353515625, -0.15767288208007812, 0.18693923950195312, -0.22708892822265625, -0.31903076171875, 0.1500701904296875, 0.5972366333007812, 1.307952880859375, 0.265716552734375, 0.3480987548828125, 0.7400054931640625, -0.5536422729492188, 0.06847000122070312, 0.24286651611328125, -0.439849853515625, -0.30144500732421875, -0.5129852294921875, 0.044994354248046875, 0.3065071105957031, 0.02178955078125, 0.3096122741699219, 0.1262073516845703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000038.npy"}
{"epoch": 0.05744520030234316, "step": 39, "batch_size": 64, "mean": 0.10873657464981079, "std": 0.2994993031024933, "min": -0.62359619140625, "p10": -0.20323028564453124, "median": 0.07625770568847656, "p90": 0.4976850509643555, "max": 0.77301025390625, "pos_frac": 0.65625, "sample": [0.5669021606445312, -0.3548774719238281, -0.063873291015625, 0.075469970703125, 0.02392578125, 0.2266082763671875, 0.44789886474609375, 0.24358558654785156, 0.44525146484375, 0.271209716796875, 0.10865020751953125, -0.10425376892089844, 0.49042320251464844, -0.022129058837890625, -0.1609954833984375, 0.1642608642578125, 0.05411529541015625, 0.4674205780029297, 0.20633888244628906, 0.2460784912109375, -0.5672683715820312, -0.05169677734375, -0.0661773681640625, -0.2034454345703125, 0.77301025390625, -0.1434764862060547, -0.34716796875, 0.05091667175292969, -0.11606597900390625, 0.030948638916015625, 0.1084442138671875, 0.434295654296875, -0.10749053955078125, -0.14553070068359375, 0.3179283142089844, 0.06781005859375, -0.202728271484375, 0.44777679443359375, 0.14282989501953125, -0.62359619140625, 0.1009674072265625, -0.5720672607421875, -0.06145668029785156, 0.2813262939453125, 0.52813720703125, -0.02071380615234375, 0.3227691650390625, 0.7313232421875, 0.5007972717285156, 0.03214836120605469, 0.7555389404296875, -0.2938995361328125, 0.07704544067382812, -0.028993606567382812, 0.2387542724609375, 0.022003173828125, 0.16399765014648438, 0.5026702880859375, 0.18801116943359375, 0.13317489624023438, 0.01656341552734375, 0.21276473999023438, 0.0684051513671875, -0.07145309448242188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000039.npy"}
{"epoch": 0.05895691609977324, "step": 40, "batch_size": 64, "mean": -0.05382627248764038, "std": 0.32383590936660767, "min": -0.7530632019042969, "p10": -0.47242603302001945, "median": -0.00409698486328125, "p90": 0.3672763824462891, "max": 0.5657272338867188, "pos_frac": 0.484375, "sample": [-0.5484161376953125, 0.5657272338867188, 0.0875091552734375, 0.3034534454345703, -0.06661605834960938, 0.36209869384765625, 0.22304916381835938, 0.2544746398925781, -0.1617889404296875, -0.11690139770507812, 0.18949508666992188, 0.1341838836669922, -0.6079025268554688, -0.41457557678222656, -0.2765846252441406, 0.07761001586914062, -0.014373779296875, 0.1842193603515625, 0.43994903564453125, 0.07575607299804688, -0.10978317260742188, 0.143218994140625, 0.4644813537597656, 0.4181861877441406, 0.1640777587890625, -0.3513031005859375, 0.02208709716796875, 0.0244293212890625, 0.40069580078125, 0.05957221984863281, 0.36756134033203125, -0.6221389770507812, 0.018707275390625, -0.3701057434082031, 0.022169113159179688, -0.00778961181640625, -0.012847900390625, 0.34877586364746094, -0.3212432861328125, 0.0804901123046875, -0.2852191925048828, 0.28369140625, 0.4295654296875, -0.671661376953125, -0.40303802490234375, -0.4972190856933594, -0.0371856689453125, -0.3895988464355469, -0.14310836791992188, 0.00872802734375, -0.3121223449707031, -0.294219970703125, -0.3538780212402344, 0.1935882568359375, -0.00040435791015625, -0.7530632019042969, -0.344940185546875, -0.27117156982421875, -0.3095550537109375, -0.6076889038085938, 0.3666114807128906, -0.3836841583251953, 0.1810760498046875, -0.27999114990234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000040.npy"}
{"epoch": 0.06046863189720333, "step": 41, "batch_size": 64, "mean": 0.04054197669029236, "std": 0.40444427728652954, "min": -1.086761474609375, "p10": -0.36331863403320314, "median": 0.03194999694824219, "p90": 0.5352113723754885, "max": 0.8571090698242188, "pos_frac": 0.53125, "sample": [0.39593505859375, 0.3913993835449219, 0.025552749633789062, -0.2627105712890625, -0.202728271484375, 0.066925048828125, -0.3287506103515625, 0.4566612243652344, 0.11647796630859375, -0.747802734375, -0.27280426025390625, 0.2132415771484375, -0.01293182373046875, 0.6382598876953125, -0.3321685791015625, 0.320159912109375, 0.053020477294921875, 0.4054718017578125, 0.15554428100585938, -0.07589530944824219, 0.7003402709960938, -0.29547119140625, -0.06937026977539062, 0.233551025390625, 0.46509361267089844, 0.34308624267578125, 0.34930419921875, 0.20175743103027344, -0.08882522583007812, -0.2779998779296875, -0.07626914978027344, 0.7888641357421875, 0.5652618408203125, -0.0139923095703125, 0.03834724426269531, -0.5447769165039062, 0.3590850830078125, 0.019756317138671875, 0.7147750854492188, -0.034698486328125, -0.22169113159179688, -0.4336509704589844, -0.19036865234375, -0.09507369995117188, 0.38907623291015625, -0.2001934051513672, 0.27080535888671875, 0.14249420166015625, 0.8571090698242188, 0.05165863037109375, -0.17330169677734375, -1.08624267578125, -0.25011444091796875, 0.06836700439453125, -0.35019683837890625, -1.086761474609375, 0.455780029296875, -0.16390228271484375, 0.4120044708251953, -0.5452499389648438, 0.1489105224609375, 0.6786880493164062, -0.09519386291503906, -0.3689422607421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000041.npy"}
{"epoch": 0.06198034769463341, "step": 42, "batch_size": 64, "mean": 0.013764426112174988, "std": 0.36762717366218567, "min": -1.2149887084960938, "p10": -0.37761535644531247, "median": 0.014832496643066406, "p90": 0.43815860748291036, "max": 0.899566650390625, "pos_frac": 0.515625, "sample": [0.63031005859375, 0.899566650390625, -0.18344879150390625, -0.009124755859375, -0.11587715148925781, -0.8272171020507812, 0.09411048889160156, -0.07226085662841797, 0.24968719482421875, 0.0263214111328125, 0.03440093994140625, -0.23911285400390625, -0.22346878051757812, 0.05947113037109375, 0.06307220458984375, 0.054798126220703125, 0.3581695556640625, 0.3005218505859375, -0.11802864074707031, -0.0753936767578125, 0.15398788452148438, 0.5494232177734375, 0.61798095703125, -0.0984649658203125, -0.49127197265625, 0.04354095458984375, 0.028064727783203125, -0.248138427734375, -0.12214469909667969, -0.3443756103515625, -0.0232391357421875, 0.39179039001464844, -0.06139373779296875, -0.1967010498046875, 0.36052703857421875, 0.45803070068359375, 0.46891021728515625, -0.0855865478515625, -0.7779998779296875, 0.36687469482421875, -0.3213539123535156, -0.6931076049804688, -1.2149887084960938, -0.01584625244140625, -0.3918609619140625, 0.1290740966796875, 0.27190399169921875, 0.0466766357421875, -0.15028762817382812, -0.1390838623046875, -0.025533676147460938, 0.3529090881347656, 0.19277191162109375, -0.0063323974609375, 0.3139228820800781, -0.153717041015625, -0.5666122436523438, 0.1448192596435547, 0.5984725952148438, 0.36455535888671875, 0.0033435821533203125, 0.3669414520263672, -0.16545867919921875, 0.04340362548828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000042.npy"}
{"epoch": 0.06349206349206349, "step": 43, "batch_size": 64, "mean": 0.09020435810089111, "std": 0.42384761571884155, "min": -1.1931724548339844, "p10": -0.45667037963867185, "median": 0.08189773559570312, "p90": 0.5641838073730469, "max": 1.4004058837890625, "pos_frac": 0.609375, "sample": [0.084991455078125, 0.0312652587890625, -0.54962158203125, 0.4623088836669922, 0.040813446044921875, -0.20263099670410156, -0.06407546997070312, 0.11548233032226562, 0.3087005615234375, -0.679718017578125, 0.4208793640136719, 0.089813232421875, 0.2536773681640625, -0.054271697998046875, -0.193878173828125, -0.0117340087890625, 0.03582763671875, -0.1002044677734375, 0.19295120239257812, -0.0349884033203125, 0.2048625946044922, 0.3609466552734375, -0.5721435546875, -0.1684417724609375, 0.763153076171875, 0.07880401611328125, 0.2211475372314453, 0.8709182739257812, -0.0478363037109375, 0.567596435546875, -0.10693359375, 0.2193145751953125, 0.5190658569335938, -0.170166015625, 0.012582778930664062, 0.5072479248046875, 0.62115478515625, -0.2172985076904297, 0.29296112060546875, 0.37973785400390625, 0.40325164794921875, -0.1556854248046875, -0.04746246337890625, -1.1931724548339844, 0.08959388732910156, 1.4004058837890625, 0.04611968994140625, 0.1616230010986328, -0.40174293518066406, 0.3417625427246094, 0.19574546813964844, 0.8924560546875, -0.20015716552734375, -0.19339752197265625, 0.2735786437988281, 0.5562210083007812, -0.7518272399902344, 0.32024383544921875, 0.5860595703125, 0.35382080078125, -0.42547607421875, -0.47003936767578125, 0.0424346923828125, -0.533538818359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000043.npy"}
{"epoch": 0.06500377928949358, "step": 44, "batch_size": 64, "mean": -0.009241342544555664, "std": 0.3956943154335022, "min": -0.9864273071289062, "p10": -0.4572406768798828, "median": -0.06754875183105469, "p90": 0.4795724868774416, "max": 1.060638427734375, "pos_frac": 0.4375, "sample": [-0.658233642578125, -0.16099929809570312, -0.00334930419921875, 0.11799812316894531, 0.309783935546875, -0.634674072265625, 0.187713623046875, -0.24951934814453125, -0.3464393615722656, -0.21181106567382812, 0.23114013671875, 0.761199951171875, -0.1243896484375, -0.023471832275390625, 0.26511383056640625, -0.2292938232421875, -0.03972816467285156, 0.035907745361328125, 0.42974090576171875, 0.30019187927246094, -0.07280349731445312, 0.6457710266113281, 0.5325164794921875, -0.1760711669921875, -0.4135093688964844, -0.475982666015625, 0.8069534301757812, 0.23392486572265625, -0.553955078125, 0.206817626953125, 0.2166595458984375, -0.13891220092773438, 0.159881591796875, -0.1430511474609375, -0.11131477355957031, 0.18949508666992188, 0.9451828002929688, -0.1975421905517578, -0.2952423095703125, -0.4026031494140625, -0.11796188354492188, 0.0601043701171875, -0.281341552734375, -0.2720794677734375, 0.05329132080078125, 0.23720550537109375, -0.14847564697265625, 0.3909912109375, -0.06229400634765625, -0.07942581176757812, -0.8322906494140625, 0.2125396728515625, 0.205963134765625, 0.1959075927734375, 0.00191497802734375, -0.2296295166015625, -0.329864501953125, 1.060638427734375, 0.5009288787841797, -0.20721435546875, -0.113372802734375, -0.1380157470703125, -0.6256332397460938, -0.9864273071289062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000044.npy"}
{"epoch": 0.06651549508692366, "step": 45, "batch_size": 64, "mean": 0.13970479369163513, "std": 0.3883092403411865, "min": -0.797393798828125, "p10": -0.30268077850341796, "median": 0.14820575714111328, "p90": 0.6365058898925782, "max": 1.0496826171875, "pos_frac": 0.703125, "sample": [0.11959075927734375, 0.0048370361328125, 0.5962867736816406, 0.15108489990234375, 0.1453266143798828, 0.38958740234375, 0.0045604705810546875, 0.362060546875, -0.780517578125, -0.7850799560546875, 0.6413230895996094, 0.27573394775390625, 0.061824798583984375, 0.40746307373046875, 0.014707565307617188, 0.29895782470703125, 0.09464645385742188, -0.413177490234375, 0.40316009521484375, 0.454498291015625, 0.319671630859375, -0.37369537353515625, 0.27756690979003906, -0.12408828735351562, 0.3558998107910156, 0.73974609375, 0.1650390625, 0.17864608764648438, 0.6309890747070312, 0.3285179138183594, 0.19303131103515625, 0.301422119140625, -0.29886627197265625, 0.4969215393066406, 0.2996940612792969, 0.37493896484375, -0.24286651611328125, -0.15590667724609375, -0.6093521118164062, -0.15655517578125, -0.797393798828125, 1.0496826171875, -0.011814117431640625, -0.18783950805664062, 0.13849639892578125, -0.0326385498046875, 0.000579833984375, -0.092926025390625, 0.06011962890625, 0.7711257934570312, 0.6388702392578125, 0.4680824279785156, 0.7892990112304688, 0.308868408203125, -0.30431556701660156, 0.09133148193359375, 0.8760833740234375, 0.058231353759765625, 0.24143028259277344, 0.018178939819335938, -0.25399017333984375, -0.297119140625, 0.318939208984375, -0.057804107666015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000045.npy"}
{"epoch": 0.06802721088435375, "step": 46, "batch_size": 64, "mean": 0.1856483519077301, "std": 0.37959519028663635, "min": -0.5473155975341797, "p10": -0.2956554412841797, "median": 0.2070178985595703, "p90": 0.6306053161621095, "max": 1.1212577819824219, "pos_frac": 0.703125, "sample": [0.023895263671875, -0.5084495544433594, -0.13729095458984375, 0.34322357177734375, 0.515899658203125, 0.2642822265625, 0.019472122192382812, -0.2503929138183594, 0.594696044921875, 0.01190185546875, 0.5320777893066406, 0.1680755615234375, 0.3052825927734375, 0.3917388916015625, -0.5196380615234375, 0.22697830200195312, 0.0264739990234375, 0.2505149841308594, 0.32822418212890625, 0.46924781799316406, 0.33172035217285156, 0.96343994140625, 0.021276473999023438, 0.00616455078125, 0.335113525390625, -0.1758575439453125, 0.6178359985351562, 1.00628662109375, 0.6873626708984375, 0.549896240234375, -0.28997039794921875, 0.5992965698242188, 0.16167831420898438, 0.1313629150390625, -0.15930938720703125, 0.516693115234375, -0.0046253204345703125, 0.28469085693359375, -0.5473155975341797, 1.1212577819824219, 0.3598365783691406, -0.39306640625, 0.29769134521484375, -0.23157501220703125, 0.7201499938964844, 0.1870574951171875, -0.08594512939453125, 0.02161407470703125, -0.312469482421875, -0.2980918884277344, 0.5213985443115234, -0.27344322204589844, 0.146728515625, 0.23192214965820312, 0.636077880859375, 0.44525146484375, 0.029815673828125, -0.30811309814453125, -0.23716354370117188, 0.7195587158203125, -0.01470184326171875, 0.27411651611328125, -0.25103187561035156, 0.482666015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000046.npy"}
{"epoch": 0.06953892668178382, "step": 47, "batch_size": 64, "mean": 0.13316544890403748, "std": 0.3228130340576172, "min": -0.733978271484375, "p10": -0.2563385009765625, "median": 0.12782669067382812, "p90": 0.5144729614257812, "max": 0.8109951019287109, "pos_frac": 0.640625, "sample": [0.2587890625, 0.45575714111328125, -0.14331436157226562, 0.135498046875, 0.48714447021484375, 0.10274887084960938, 0.08872413635253906, 0.21311187744140625, -0.04414176940917969, 0.12015533447265625, -0.1929473876953125, -0.22452926635742188, 0.21343994140625, -0.01885223388671875, -0.1128387451171875, 0.22039031982421875, 0.6198101043701172, -0.29070281982421875, 0.4854278564453125, 0.30426025390625, -0.3549003601074219, -0.2536468505859375, -0.1657257080078125, 0.10541343688964844, 0.06764030456542969, 0.3645477294921875, 0.15976333618164062, 0.03352546691894531, -0.07069015502929688, 0.3591480255126953, -0.13586044311523438, 0.8109951019287109, -0.25838470458984375, 0.27420806884765625, -0.17704010009765625, 0.14736557006835938, 0.81036376953125, 0.5077667236328125, 0.2432689666748047, 0.7684326171875, -0.0690765380859375, 0.08173370361328125, 0.19255828857421875, 0.513885498046875, 0.23145675659179688, -0.2574920654296875, 0.5652313232421875, 0.4852752685546875, 0.42000579833984375, 0.023923873901367188, -0.13408660888671875, 0.5446853637695312, -0.733978271484375, -0.08226776123046875, -0.47226524353027344, 0.5147247314453125, -0.10279273986816406, 0.24956130981445312, 0.44466400146484375, -0.12378692626953125, 0.0871734619140625, 0.4777069091796875, 0.153228759765625, -0.40160179138183594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000047.npy"}
{"epoch": 0.0710506424792139, "step": 48, "batch_size": 64, "mean": 0.017589718103408813, "std": 0.39497604966163635, "min": -0.91729736328125, "p10": -0.4892875671386719, "median": 0.014756202697753906, "p90": 0.49834213256835946, "max": 1.011993408203125, "pos_frac": 0.53125, "sample": [-0.0059986114501953125, -0.5345306396484375, 0.17058563232421875, 0.16459274291992188, -0.27542877197265625, 0.9659843444824219, -0.2420196533203125, -0.136199951171875, -0.22409629821777344, 0.0043544769287109375, 0.47766876220703125, 0.122589111328125, -0.28151702880859375, -0.5071258544921875, 0.40901947021484375, 1.011993408203125, 0.30789947509765625, 0.015459060668945312, -0.91729736328125, 0.5072021484375, 0.513885498046875, -0.0524444580078125, 0.1925811767578125, 0.0954742431640625, 0.24615859985351562, -0.306671142578125, 0.257080078125, 0.5333786010742188, -0.381439208984375, 0.16701507568359375, 0.32936859130859375, -0.4742431640625, -0.2293376922607422, -0.4396629333496094, 0.26526641845703125, -0.04109954833984375, -0.06119537353515625, -0.08109283447265625, 0.6773262023925781, -0.543212890625, -0.4061622619628906, 0.3148345947265625, 0.19026947021484375, 0.3173828125, -0.7476577758789062, -0.28106689453125, 0.46781158447265625, 0.20569229125976562, -0.3941192626953125, -0.16875648498535156, 0.0140533447265625, -0.24200439453125, -0.3801689147949219, 0.12381744384765625, 0.02960205078125, 0.27666282653808594, 0.7960739135742188, -0.14522361755371094, 0.1214599609375, -0.5360298156738281, -0.1122283935546875, 0.3883628845214844, 0.08860206604003906, -0.49573516845703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000048.npy"}
{"epoch": 0.07256235827664399, "step": 49, "batch_size": 64, "mean": 0.019293993711471558, "std": 0.3757753372192383, "min": -1.14599609375, "p10": -0.4433540344238281, "median": 0.03557586669921875, "p90": 0.3942987442016603, "max": 1.085296630859375, "pos_frac": 0.59375, "sample": [0.02541351318359375, -0.041553497314453125, 0.06174468994140625, 0.30231475830078125, 0.0961151123046875, -0.23669815063476562, -0.14330101013183594, 0.24147605895996094, 0.2422313690185547, -0.2906494140625, -0.040775299072265625, -0.042407989501953125, 0.26586151123046875, 0.07879638671875, -0.40270042419433594, -0.04695892333984375, -0.46852684020996094, -0.619293212890625, 0.2704658508300781, -0.901885986328125, 0.2017841339111328, -0.102447509765625, -0.03885078430175781, 0.31000518798828125, 0.2528495788574219, -0.12921142578125, 0.33837890625, 0.22490310668945312, 0.40732383728027344, -0.27105712890625, 0.034046173095703125, 0.25701904296875, 0.01029205322265625, 0.46910667419433594, 0.22958755493164062, 0.034088134765625, 0.03594970703125, -0.1879730224609375, 0.1013336181640625, 0.634735107421875, 0.0352020263671875, 0.052318572998046875, 0.130584716796875, 0.623992919921875, -0.2440948486328125, 0.049816131591796875, -0.18048095703125, 0.48127174377441406, -0.031253814697265625, 0.019153594970703125, 0.29679107666015625, 0.31613922119140625, -1.14599609375, -0.35223388671875, -0.42749786376953125, -0.8176307678222656, 0.5863418579101562, 1.085296630859375, 0.3639068603515625, 0.17980003356933594, -0.468109130859375, 0.29622650146484375, -0.4501495361328125, -0.32611083984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000049.npy"}
{"epoch": 0.07407407407407407, "step": 50, "batch_size": 64, "mean": 0.0764555037021637, "std": 0.35630232095718384, "min": -0.9310531616210938, "p10": -0.32314224243164064, "median": 0.07302474975585938, "p90": 0.49225387573242196, "max": 1.1904296875, "pos_frac": 0.609375, "sample": [0.14678955078125, 0.5131683349609375, -0.1844940185546875, 0.14372634887695312, 0.306793212890625, -0.31749725341796875, 0.4696044921875, -0.27019500732421875, -0.10324859619140625, 0.3130340576171875, 0.26140403747558594, 0.411346435546875, -0.9310531616210938, 0.14958572387695312, -0.2489013671875, -0.2772960662841797, 0.09589385986328125, 0.4668312072753906, 0.2100658416748047, -0.39948272705078125, 0.40460205078125, 0.195037841796875, -0.3255615234375, 0.25659942626953125, 0.0122222900390625, 1.1904296875, -0.6284294128417969, 0.5856094360351562, 0.11523818969726562, -0.16363525390625, 0.5555648803710938, 0.19699668884277344, 0.2925701141357422, 0.23184776306152344, -0.3399810791015625, -0.245941162109375, 0.05495452880859375, 1.0462417602539062, -0.03736114501953125, -0.16187286376953125, -0.11128997802734375, 0.6044158935546875, -0.24084854125976562, 0.5019607543945312, -0.03450965881347656, 0.28737831115722656, 0.0716094970703125, -0.2160186767578125, -0.3699188232421875, 0.03878021240234375, 0.07455253601074219, 0.021879196166992188, 0.01567840576171875, 0.29760169982910156, 0.29431724548339844, 0.2781715393066406, -0.346343994140625, -0.23437118530273438, -0.10271453857421875, -0.05045318603515625, -0.2084178924560547, 0.22489166259765625, 0.031154632568359375, 0.07444000244140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000050.npy"}
{"epoch": 0.07558578987150416, "step": 51, "batch_size": 64, "mean": 0.1322861909866333, "std": 0.4378786087036133, "min": -0.7030792236328125, "p10": -0.3324592590332031, "median": 0.04151725769042969, "p90": 0.6656700134277345, "max": 1.8316192626953125, "pos_frac": 0.5625, "sample": [0.2880859375, -0.022371292114257812, -0.20447921752929688, 0.29010772705078125, -0.167816162109375, -0.1508941650390625, 0.36836814880371094, 0.26212120056152344, -0.057403564453125, 0.33465576171875, 0.044445037841796875, 0.4415779113769531, -0.3497657775878906, 0.6808242797851562, -0.13123321533203125, -0.4753875732421875, 0.23492431640625, 0.428375244140625, -0.0010833740234375, 0.63031005859375, -0.03871917724609375, 0.11122512817382812, 0.25624656677246094, 0.5413665771484375, -0.20855712890625, -0.3153533935546875, -0.33228302001953125, 0.021343231201171875, 0.14044761657714844, 0.2675495147705078, 0.38127899169921875, -0.3325347900390625, -0.1988506317138672, 1.3018798828125, -0.6679763793945312, 0.024898529052734375, -0.24749374389648438, -0.23852157592773438, -0.4486274719238281, 0.05648040771484375, -0.08271408081054688, -0.7030792236328125, 0.2568511962890625, 1.0184478759765625, 0.0385894775390625, 0.02658843994140625, -0.14052581787109375, 1.8316192626953125, 0.227508544921875, 0.716796875, -0.059856414794921875, 0.8988876342773438, 0.26160430908203125, -0.03032684326171875, 0.10843658447265625, -0.0099334716796875, 0.43033599853515625, 0.7014007568359375, -0.12096595764160156, -0.05623626708984375, 0.17633056640625, 0.297943115234375, 0.5302810668945312, -0.36882781982421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000051.npy"}
{"epoch": 0.07709750566893424, "step": 52, "batch_size": 64, "mean": 0.12421198189258575, "std": 0.3972077965736389, "min": -0.802276611328125, "p10": -0.3075996398925781, "median": 0.058991432189941406, "p90": 0.6361175537109376, "max": 1.49755859375, "pos_frac": 0.59375, "sample": [0.7188491821289062, -0.035003662109375, 0.3278350830078125, 0.6939239501953125, 0.8707962036132812, -0.0088348388671875, 0.3257255554199219, -0.14138031005859375, -0.5495796203613281, 0.5105743408203125, 0.3026123046875, 0.26201629638671875, -0.5341949462890625, 0.55792236328125, 0.949462890625, 0.227264404296875, 0.5524826049804688, -0.31160736083984375, 0.02462005615234375, -0.27754783630371094, -0.298248291015625, -0.12159538269042969, 0.21128463745117188, 0.1753101348876953, 0.6488800048828125, -0.013336181640625, -0.15961456298828125, 0.30890464782714844, -0.081817626953125, 0.162689208984375, -0.08680534362792969, -0.463623046875, 0.01430511474609375, -0.2583770751953125, 0.3125133514404297, 0.07042694091796875, 0.0959930419921875, -0.042865753173828125, 0.3794708251953125, -0.802276611328125, -0.09091949462890625, 0.53564453125, 0.06804847717285156, 0.25464630126953125, -0.06924247741699219, 0.04476165771484375, -0.13811779022216797, -0.17101287841796875, 1.49755859375, 0.117950439453125, 0.48937225341796875, 0.1209259033203125, -0.46477508544921875, 0.2091217041015625, 0.02993011474609375, 0.719329833984375, 0.6063385009765625, 0.014255523681640625, -0.37664794921875, 0.3531360626220703, -0.10712432861328125, -0.17911720275878906, -0.08158493041992188, 0.04993438720703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000052.npy"}
{"epoch": 0.07860922146636433, "step": 53, "batch_size": 64, "mean": 0.05677822232246399, "std": 0.3571418821811676, "min": -0.7627334594726562, "p10": -0.4034713745117187, "median": 0.0601806640625, "p90": 0.4904773712158206, "max": 0.932373046875, "pos_frac": 0.5625, "sample": [0.24317550659179688, 0.15974044799804688, 0.6136627197265625, -0.19170379638671875, 0.027811050415039062, 0.12140083312988281, 0.023250579833984375, 0.050079345703125, 0.2371063232421875, -0.21248626708984375, 0.169189453125, -0.252288818359375, -0.26990509033203125, 0.41037559509277344, -0.4096832275390625, -0.38897705078125, 0.10887527465820312, 0.08238983154296875, -0.35228729248046875, -0.7627334594726562, 0.11720848083496094, 0.270294189453125, 0.1049957275390625, 0.3861045837402344, -0.48462677001953125, 0.839874267578125, -0.4939308166503906, 0.3934974670410156, 0.3185081481933594, 0.135223388671875, -0.033435821533203125, -0.19769287109375, -0.1352996826171875, -0.10552215576171875, 0.4312744140625, 0.0521392822265625, -0.018018722534179688, 0.7317733764648438, 0.38811302185058594, -0.3094196319580078, -0.4561424255371094, -0.5842132568359375, 0.4083385467529297, -0.12835693359375, 0.32743072509765625, -0.089935302734375, 0.0682220458984375, 0.21465301513671875, 0.20473098754882812, 0.932373046875, -0.37299346923828125, 0.5158500671386719, -0.5050449371337891, 0.4284210205078125, -0.23754119873046875, 0.3114280700683594, 0.5977325439453125, -0.2214202880859375, -0.10105133056640625, -0.0042324066162109375, -0.093292236328125, 0.6090469360351562, 0.11734199523925781, -0.1055908203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000053.npy"}
{"epoch": 0.0801209372637944, "step": 54, "batch_size": 64, "mean": 0.09712427854537964, "std": 0.33811426162719727, "min": -0.7705535888671875, "p10": -0.34377136230468747, "median": 0.15488433837890625, "p90": 0.5309913635253908, "max": 0.7847747802734375, "pos_frac": 0.609375, "sample": [0.343231201171875, 0.6930313110351562, 0.013671875, 0.27459716796875, -0.0384368896484375, 0.15695953369140625, 0.4712810516357422, -0.0263214111328125, 0.2939109802246094, -0.59503173828125, -0.42938995361328125, -0.038555145263671875, 0.7847747802734375, -0.2925834655761719, 0.22648239135742188, 0.34707069396972656, 0.361602783203125, 0.11780929565429688, 0.26598358154296875, -0.49523162841796875, 0.3055267333984375, 0.6948089599609375, 0.15833091735839844, 0.5435638427734375, -0.7705535888671875, 0.2715797424316406, 0.15280914306640625, 0.002429962158203125, -0.12958526611328125, 0.047489166259765625, -0.1895599365234375, 0.03009796142578125, -0.25414276123046875, -0.22199249267578125, 0.2881507873535156, -0.30487823486328125, 0.16109466552734375, 0.2997589111328125, 0.2522621154785156, 0.1862945556640625, 0.4157428741455078, -0.3840370178222656, 0.5016555786132812, 0.05461883544921875, -0.19383621215820312, 0.5542259216308594, -0.350372314453125, -0.04626274108886719, -0.0310516357421875, 0.41753387451171875, 0.24153900146484375, -0.2559661865234375, 0.2691192626953125, -0.09572219848632812, 0.6061782836914062, -0.030595779418945312, 0.317352294921875, -0.0830841064453125, 0.29564666748046875, -0.328369140625, 0.4550628662109375, -0.1202239990234375, 0.5584640502929688, -0.5100059509277344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000054.npy"}
{"epoch": 0.08163265306122448, "step": 55, "batch_size": 64, "mean": 0.22266672551631927, "std": 0.49250420928001404, "min": -1.1053543090820312, "p10": -0.3361629486083984, "median": 0.1279735565185547, "p90": 0.7894462585449219, "max": 1.708526611328125, "pos_frac": 0.65625, "sample": [1.2411727905273438, 0.15233612060546875, -0.3236045837402344, -0.0794219970703125, 0.07391166687011719, -0.03839111328125, 0.8478622436523438, 0.055877685546875, -0.12552833557128906, -0.34154510498046875, 0.2880821228027344, 0.230987548828125, -0.03737640380859375, -0.1243438720703125, 0.0006999969482421875, 0.7940216064453125, 1.1647109985351562, 0.659271240234375, 0.0309295654296875, -0.181976318359375, 0.5355110168457031, -0.60540771484375, 0.7351837158203125, 0.29653358459472656, 1.313507080078125, 0.11899948120117188, 0.12258529663085938, 0.6253814697265625, -0.029083251953125, 0.179107666015625, 0.3742504119873047, -0.39251708984375, 0.6287078857421875, -0.1793079376220703, 0.114044189453125, -0.085968017578125, 0.38062000274658203, 0.33518409729003906, 0.27680206298828125, -0.3693504333496094, -0.060176849365234375, 0.7787704467773438, -0.14880752563476562, 0.05426025390625, 0.1063232421875, -0.090362548828125, 0.9917449951171875, -0.0079803466796875, -1.1053543090820312, 0.1372222900390625, 0.642822265625, 0.543975830078125, 0.31775665283203125, -0.18614578247070312, -0.3733711242675781, 0.7720756530761719, 0.70928955078125, 0.24049949645996094, 0.2453289031982422, 1.708526611328125, 0.13336181640625, 0.062530517578125, 0.7105598449707031, -0.5946388244628906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000055.npy"}
{"epoch": 0.08314436885865457, "step": 56, "batch_size": 64, "mean": 0.10503232479095459, "std": 0.3755733072757721, "min": -1.628570556640625, "p10": -0.23568725585937497, "median": 0.0648355484008789, "p90": 0.5780181884765625, "max": 1.0638923645019531, "pos_frac": 0.609375, "sample": [0.1337738037109375, 0.18097305297851562, 0.2155017852783203, -0.2857666015625, -0.081817626953125, 0.45766448974609375, 0.666595458984375, 0.44189453125, 0.2848052978515625, 0.07842826843261719, -0.19095611572265625, 0.521209716796875, -0.1784210205078125, 0.15810394287109375, 0.4290771484375, 0.23668289184570312, -0.2502899169921875, -0.1602001190185547, 0.25890350341796875, 0.5881767272949219, -0.15699005126953125, -0.2016143798828125, 0.04811286926269531, -0.1004180908203125, 0.017812728881835938, 0.5028305053710938, 0.048542022705078125, 0.570648193359375, -0.1904449462890625, 0.11461067199707031, 0.3294181823730469, 0.6996841430664062, 0.7841339111328125, 0.23976898193359375, -0.10402679443359375, -0.17273712158203125, -1.628570556640625, 0.09911537170410156, -0.05590248107910156, 0.39897918701171875, -0.1421489715576172, -0.25798797607421875, 0.5811767578125, -0.27571868896484375, 0.1617717742919922, 0.04582977294921875, 0.0619354248046875, -0.0727996826171875, 0.06773567199707031, 0.35234832763671875, 1.0638923645019531, -0.10622024536132812, 0.12744522094726562, -0.2950782775878906, 0.354278564453125, 0.04458808898925781, 0.058971405029296875, -0.04932403564453125, 0.1390228271484375, -0.00803375244140625, -0.07331275939941406, 0.66290283203125, -0.08813667297363281, -0.3783607482910156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000056.npy"}
{"epoch": 0.08465608465608465, "step": 57, "batch_size": 64, "mean": 0.24903348088264465, "std": 0.45004981756210327, "min": -0.736907958984375, "p10": -0.2402736663818359, "median": 0.2669086456298828, "p90": 0.7709152221679689, "max": 1.568878173828125, "pos_frac": 0.671875, "sample": [-0.1261138916015625, -0.07024383544921875, 0.37430572509765625, 1.330718994140625, 0.0517730712890625, 0.4191131591796875, 0.6183319091796875, -0.6384773254394531, 0.46288299560546875, 0.6156158447265625, 0.055389404296875, 0.8210067749023438, -0.4392509460449219, -0.009336471557617188, 0.03099822998046875, -0.2099285125732422, 0.5526275634765625, 0.3360176086425781, 1.568878173828125, 0.2557525634765625, 0.6504669189453125, -0.29412841796875, 0.5387802124023438, 0.005126953125, 0.048824310302734375, 0.1710529327392578, -0.05733489990234375, 0.33065032958984375, -0.2532787322998047, -0.6481170654296875, 0.1761627197265625, 0.39157867431640625, 0.3977088928222656, 0.32691192626953125, -0.050994873046875, 0.21025848388671875, -0.736907958984375, 0.4916725158691406, 0.6146392822265625, -0.104583740234375, -0.1328582763671875, -0.14007568359375, -0.10744476318359375, 0.6395664215087891, 0.492431640625, 0.818603515625, 0.7888946533203125, 0.403045654296875, 0.73138427734375, 0.3576164245605469, 0.6893310546875, -0.01441192626953125, -0.0520477294921875, 1.2416229248046875, 0.10382080078125, 0.7878570556640625, 0.59765625, 0.3732128143310547, 0.24519920349121094, 0.33037567138671875, -0.13322067260742188, -0.0744171142578125, -0.4946136474609375, 0.2780647277832031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000057.npy"}
{"epoch": 0.08616780045351474, "step": 58, "batch_size": 64, "mean": 0.11425450444221497, "std": 0.3957008123397827, "min": -0.680145263671875, "p10": -0.4479804992675781, "median": 0.15990066528320312, "p90": 0.5659242630004883, "max": 1.0406494140625, "pos_frac": 0.640625, "sample": [-0.5829353332519531, 0.6815528869628906, 0.5649776458740234, -0.4506378173828125, -0.0277557373046875, 0.254150390625, -0.14112091064453125, 0.22075653076171875, 0.029815673828125, -0.587615966796875, 0.4163818359375, 0.12086105346679688, 0.367828369140625, -0.680145263671875, -0.3520393371582031, -0.20429611206054688, 0.27188873291015625, 0.2170257568359375, 0.176116943359375, 0.3021392822265625, 0.322998046875, 0.0753021240234375, 0.3375091552734375, -0.19427490234375, -0.4514923095703125, 0.232696533203125, 0.5425605773925781, 0.46390533447265625, -0.2559337615966797, 0.5956649780273438, -0.5417709350585938, -0.44178009033203125, 0.058681488037109375, 0.35861968994140625, 1.020355224609375, -0.5645332336425781, 0.560394287109375, 1.0406494140625, 0.31348419189453125, 0.315460205078125, -0.086212158203125, 0.031993865966796875, 0.14368438720703125, 0.2398681640625, -0.2044677734375, 1.02825927734375, -0.10032081604003906, 0.2451171875, 0.045196533203125, 0.28144073486328125, 0.5695571899414062, 0.20003509521484375, -0.43720245361328125, -0.3031425476074219, 0.4119110107421875, 0.40723419189453125, -0.027971267700195312, -0.10849761962890625, -0.2821502685546875, 0.127105712890625, 0.5663299560546875, -0.110595703125, 0.2601966857910156, 0.029474258422851562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000058.npy"}
{"epoch": 0.08767951625094482, "step": 59, "batch_size": 64, "mean": 0.16999676823616028, "std": 0.38334041833877563, "min": -0.7077598571777344, "p10": -0.27742938995361327, "median": 0.18939971923828125, "p90": 0.656148719787598, "max": 1.3639373779296875, "pos_frac": 0.640625, "sample": [0.23686599731445312, -0.29357147216796875, -0.26042938232421875, -0.17574310302734375, -0.3118553161621094, -0.24973297119140625, 0.410430908203125, -0.23917388916015625, 0.4167327880859375, 0.174591064453125, 0.913177490234375, 0.23968887329101562, 0.9534454345703125, 0.7968826293945312, -0.21313095092773438, 0.6988277435302734, 0.736053466796875, -0.27490997314453125, 0.40447998046875, 0.27362060546875, 0.17995452880859375, 0.21751785278320312, 0.5565643310546875, 0.19884490966796875, -0.2690277099609375, 0.22252655029296875, -0.058441162109375, 0.8526382446289062, 0.38446044921875, 0.2314453125, 0.13299179077148438, -0.2579765319824219, 0.3676643371582031, -0.2197265625, -0.56536865234375, -0.1293487548828125, -0.2846641540527344, 0.08690071105957031, -0.7077598571777344, 0.09539985656738281, 1.3639373779296875, 0.3588409423828125, -0.0153350830078125, 0.44976043701171875, 0.2372760772705078, 0.4375724792480469, -0.14741134643554688, 0.45438385009765625, 0.1575450897216797, -0.0204925537109375, -0.041416168212890625, 0.2593040466308594, 0.4899024963378906, -0.2799835205078125, 0.15169525146484375, 0.5142631530761719, -0.00461578369140625, 0.35916900634765625, 0.3060188293457031, 0.11179351806640625, 0.09589958190917969, 0.2437286376953125, 0.4056205749511719, -0.27850914001464844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000059.npy"}
{"epoch": 0.08919123204837491, "step": 60, "batch_size": 64, "mean": 0.05685010552406311, "std": 0.46785256266593933, "min": -2.2037353515625, "p10": -0.3473167419433594, "median": 0.07059097290039062, "p90": 0.6182361602783205, "max": 1.1643714904785156, "pos_frac": 0.59375, "sample": [0.24136734008789062, -0.32875823974609375, 0.16357040405273438, 0.31563568115234375, 0.08441925048828125, 0.37364959716796875, 0.4892120361328125, -0.08385467529296875, 0.014209747314453125, 0.1043853759765625, 0.36820220947265625, 0.6768035888671875, -0.09164619445800781, 0.3884754180908203, -0.22803497314453125, 0.8546333312988281, -0.26871681213378906, 0.062435150146484375, 1.1643714904785156, 0.47231292724609375, -0.3552703857421875, -0.29917144775390625, 0.0681304931640625, -0.20148468017578125, 0.4476203918457031, 0.094268798828125, -0.0894012451171875, 0.09592819213867188, 0.6551437377929688, -0.21088027954101562, 0.07472419738769531, -0.185272216796875, 0.63665771484375, -0.04061126708984375, -0.3256187438964844, 0.1061553955078125, 0.2482738494873047, -0.36600494384765625, 0.03942108154296875, -0.3933868408203125, 0.08388519287109375, 0.6801090240478516, 0.49555206298828125, -0.6344070434570312, 0.19802474975585938, 0.078399658203125, 0.4510841369628906, 0.044403076171875, -0.6445541381835938, -0.11772918701171875, -0.206329345703125, 0.5752525329589844, -0.015764236450195312, 0.07305145263671875, 0.0043792724609375, -0.12877273559570312, 0.23931121826171875, -0.27230072021484375, 0.4744415283203125, -0.06211662292480469, -2.2037353515625, -0.7839107513427734, 0.6443138122558594, -0.10607528686523438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000060.npy"}
{"epoch": 0.09070294784580499, "step": 61, "batch_size": 64, "mean": 0.07799449563026428, "std": 0.4740542769432068, "min": -0.8686065673828125, "p10": -0.4960681915283203, "median": 0.05225849151611328, "p90": 0.6707160949707032, "max": 1.1519088745117188, "pos_frac": 0.5625, "sample": [-0.7618408203125, -0.23065185546875, 0.94866943359375, 0.3531951904296875, -0.3273162841796875, 0.14762115478515625, 0.3690948486328125, 0.5083847045898438, 0.28347015380859375, 0.07001876831054688, 0.20421600341796875, -0.13675689697265625, -0.047332763671875, 0.3549041748046875, 0.6650924682617188, -0.23277664184570312, 0.739898681640625, -0.03127479553222656, 0.31627655029296875, 0.42226600646972656, 0.012826919555664062, 0.5394287109375, 0.273895263671875, 0.04479217529296875, 0.008470535278320312, -0.12088394165039062, -0.2440185546875, -0.7659912109375, 0.326324462890625, -0.7109184265136719, 0.5828380584716797, -0.06494903564453125, -0.3574371337890625, 0.4010772705078125, -0.1701202392578125, 0.03668975830078125, -0.2471771240234375, -0.8303775787353516, 0.4200630187988281, 1.101287841796875, 0.6634063720703125, -0.8686065673828125, 1.1519088745117188, -0.4675445556640625, -0.48415374755859375, 0.7173614501953125, -0.07061576843261719, -0.13787269592285156, -0.0540618896484375, -0.05855560302734375, 0.7626266479492188, -0.30217742919921875, 0.05972480773925781, 0.24420928955078125, 0.4048614501953125, -0.49062347412109375, 0.1678009033203125, 0.25563812255859375, -0.7943115234375, -0.4094085693359375, 0.5205841064453125, 0.673126220703125, -0.4984016418457031, 0.15575408935546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000061.npy"}
{"epoch": 0.09221466364323508, "step": 62, "batch_size": 64, "mean": 0.1725986897945404, "std": 0.4413338005542755, "min": -1.006988525390625, "p10": -0.3522365570068359, "median": 0.1315288543701172, "p90": 0.7365467071533206, "max": 1.1283111572265625, "pos_frac": 0.65625, "sample": [0.8757781982421875, -0.1352977752685547, -0.7268791198730469, 0.2218799591064453, 0.1729736328125, -0.2902679443359375, 1.1283111572265625, -0.10416030883789062, 0.6602783203125, 0.08770751953125, 0.8693885803222656, 0.010162353515625, 0.34619903564453125, -0.456085205078125, -0.23191070556640625, 0.04557037353515625, -0.101959228515625, -0.1416015625, -0.1688556671142578, 0.7672042846679688, 0.49886322021484375, -0.36053466796875, -0.4471435546875, 0.5432510375976562, 0.3128242492675781, -0.280303955078125, -0.229034423828125, 0.1775665283203125, 0.0885009765625, 0.414276123046875, 0.29938507080078125, 0.07734489440917969, -0.08851242065429688, 0.002655029296875, 0.48931121826171875, -1.006988525390625, 0.0443115234375, -0.15330886840820312, -0.5136318206787109, 0.12372589111328125, -0.056270599365234375, 0.5466690063476562, 0.6269378662109375, 0.24768829345703125, 0.4536094665527344, -0.2328338623046875, 0.6360321044921875, -0.4998283386230469, 0.2526206970214844, 1.0730361938476562, 0.3227691650390625, 0.6049365997314453, 0.3499431610107422, 0.13933181762695312, -0.3328742980957031, 0.0966033935546875, 0.8299560546875, 0.603607177734375, -0.07812690734863281, 0.03678131103515625, 0.6650123596191406, 0.7871437072753906, 0.5876922607421875, 0.5648860931396484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000062.npy"}
{"epoch": 0.09372637944066516, "step": 63, "batch_size": 64, "mean": 0.2199820578098297, "std": 0.4450734555721283, "min": -0.9024925231933594, "p10": -0.20951251983642574, "median": 0.2509145736694336, "p90": 0.7738113403320317, "max": 1.4111557006835938, "pos_frac": 0.765625, "sample": [0.02077484130859375, 0.4503936767578125, -0.22782135009765625, 0.09874725341796875, 0.02764892578125, -0.1667919158935547, 0.0305023193359375, 1.348419189453125, 1.4111557006835938, -0.0381927490234375, 0.06591796875, -0.15972137451171875, -0.10219573974609375, -0.8934173583984375, 0.8202056884765625, 0.05034637451171875, 0.0149078369140625, -0.8079910278320312, -0.08658027648925781, 0.2615947723388672, -0.791839599609375, -0.3213176727294922, 0.9104042053222656, 0.3684539794921875, -0.2731781005859375, 0.4748039245605469, 0.0872344970703125, 0.04568290710449219, 0.1655101776123047, 0.39151763916015625, 0.4727020263671875, 0.48926544189453125, 0.5751838684082031, 0.319305419921875, 0.240234375, -0.06650543212890625, 0.4985008239746094, 0.4933929443359375, 0.05056953430175781, 0.0718231201171875, 0.3330059051513672, 0.29376220703125, 0.20732879638671875, 0.3829612731933594, 0.32946205139160156, 0.2738323211669922, 0.665557861328125, 0.9160614013671875, 0.21965789794921875, 0.17090606689453125, 0.27606773376464844, 0.4363422393798828, 0.4570274353027344, -0.9024925231933594, 0.9644546508789062, -0.0908355712890625, 0.5362930297851562, 0.33442115783691406, -0.029689788818359375, 0.08206558227539062, 0.269317626953125, 0.9558563232421875, 0.374664306640625, 0.3031768798828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000063.npy"}
{"epoch": 0.09523809523809523, "step": 64, "batch_size": 64, "mean": 0.06814375519752502, "std": 0.41975778341293335, "min": -1.2229766845703125, "p10": -0.4008819580078125, "median": 0.072662353515625, "p90": 0.5250892639160157, "max": 1.2003936767578125, "pos_frac": 0.578125, "sample": [0.45632171630859375, 1.2003936767578125, -0.5516986846923828, 0.5103225708007812, 0.3260765075683594, 0.068328857421875, 0.7185077667236328, 0.03845977783203125, 0.2275390625, -0.07944488525390625, -0.08069038391113281, 0.16839981079101562, -0.016473770141601562, -0.327880859375, 0.07570648193359375, 0.09058380126953125, -0.142578125, 0.2605743408203125, 0.4631462097167969, 0.6692924499511719, 0.399017333984375, 0.35047149658203125, -0.11606979370117188, -0.15227890014648438, -0.2821502685546875, -0.10040664672851562, 0.4567298889160156, -1.2229766845703125, -0.24421310424804688, 0.36545372009277344, 0.04851531982421875, -0.6441192626953125, 0.06961822509765625, -0.389495849609375, -0.04304313659667969, 0.736724853515625, 0.1898059844970703, -1.1412200927734375, 0.21341323852539062, 0.3215904235839844, 0.3665008544921875, -0.24161529541015625, 0.6810989379882812, 0.26749229431152344, 0.19536781311035156, -0.2372894287109375, -0.21206092834472656, 0.390045166015625, -0.40576171875, 0.5314178466796875, 0.2773475646972656, -0.046588897705078125, 0.5976181030273438, -0.12075042724609375, -0.04544639587402344, 0.18393707275390625, 0.18620681762695312, -0.23211288452148438, -0.38763427734375, 0.16463851928710938, -0.42925262451171875, -0.49909210205078125, 0.040004730224609375, 0.44687652587890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000064.npy"}
{"epoch": 0.09674981103552532, "step": 65, "batch_size": 64, "mean": 0.32481849193573, "std": 0.4678734242916107, "min": -1.2567863464355469, "p10": -0.18375778198242188, "median": 0.3766965866088867, "p90": 0.8443798065185549, "max": 1.763214111328125, "pos_frac": 0.796875, "sample": [0.4966554641723633, 0.31476593017578125, 0.41207122802734375, 0.37175750732421875, -0.17438888549804688, 0.28485870361328125, 0.276641845703125, 0.900604248046875, 0.27594566345214844, 0.6340103149414062, 0.50604248046875, 0.3946876525878906, 0.6819515228271484, 0.6283187866210938, 0.7146987915039062, 0.3816356658935547, 0.6197853088378906, 0.39988136291503906, 0.636962890625, 0.69970703125, 0.2727813720703125, 0.6095733642578125, -0.118988037109375, 1.160430908203125, 0.9100055694580078, 0.16520309448242188, -1.2567863464355469, -0.3722381591796875, -0.2789459228515625, 0.41253662109375, 0.024627685546875, 0.6910476684570312, -0.6534042358398438, -0.37598419189453125, 0.5120315551757812, 0.3397216796875, -0.042537689208984375, 0.405487060546875, 0.09329986572265625, 0.8725929260253906, 0.7259407043457031, 0.12368202209472656, 0.501312255859375, 0.046497344970703125, 0.603607177734375, -0.18486785888671875, 0.24741744995117188, 0.05877685546875, -0.7116909027099609, -0.1811676025390625, 0.7785491943359375, 0.147918701171875, 0.32639312744140625, 0.9096584320068359, -0.14520263671875, 0.36507415771484375, 0.41761016845703125, 0.1306285858154297, 0.4996185302734375, 0.5548629760742188, 0.9363555908203125, -0.0878753662109375, 1.763214111328125, 0.13502120971679688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000065.npy"}
{"epoch": 0.0982615268329554, "step": 66, "batch_size": 64, "mean": 0.10960283875465393, "std": 0.4260352551937103, "min": -1.066925048828125, "p10": -0.42662200927734373, "median": 0.18377399444580078, "p90": 0.598064422607422, "max": 0.8488082885742188, "pos_frac": 0.609375, "sample": [0.18402671813964844, 0.241790771484375, 0.1491985321044922, 0.5193843841552734, 0.24789047241210938, 0.10437774658203125, 0.18352127075195312, 0.2646064758300781, -0.2541618347167969, 0.5612678527832031, 0.4903106689453125, 0.8488082885742188, 0.7017364501953125, 0.4095497131347656, -0.58380126953125, 0.39703369140625, -0.394439697265625, 0.22085189819335938, 0.438018798828125, 0.814422607421875, 0.4963226318359375, 0.16593551635742188, -0.7704849243164062, -0.12692642211914062, -0.2324066162109375, 0.33841514587402344, 0.4016895294189453, 0.343780517578125, 0.41564178466796875, -0.01764678955078125, 0.42299652099609375, -0.2189769744873047, -0.00783538818359375, 0.47216033935546875, 0.296112060546875, -0.005832672119140625, 0.1993541717529297, -0.1638031005859375, 0.3684272766113281, -0.19704437255859375, -0.873626708984375, -1.066925048828125, 0.2801361083984375, -0.69677734375, -0.21672821044921875, 0.00864410400390625, -0.03376007080078125, -0.36209869384765625, -0.019500732421875, 0.15487098693847656, 0.05277252197265625, -0.1266326904296875, 0.6138343811035156, 0.808990478515625, 0.6314239501953125, -0.19476318359375, -0.4404144287109375, -0.769561767578125, 0.377349853515625, 0.7059173583984375, 0.4877967834472656, -0.1851177215576172, 0.3177947998046875, -0.1633148193359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000066.npy"}
{"epoch": 0.09977324263038549, "step": 67, "batch_size": 64, "mean": 0.20993369817733765, "std": 0.4622207283973694, "min": -0.821319580078125, "p10": -0.30223617553710935, "median": 0.23087787628173828, "p90": 0.6918643951416016, "max": 1.8378143310546875, "pos_frac": 0.625, "sample": [-0.14544677734375, -0.821319580078125, -0.26257896423339844, -0.049411773681640625, 0.06851959228515625, 0.5374927520751953, 0.7177028656005859, 0.6536464691162109, 0.6940536499023438, -0.31299591064453125, 0.6502151489257812, -0.20347023010253906, 0.420501708984375, 0.35079193115234375, -0.16725921630859375, 0.5902976989746094, -0.2320842742919922, 0.6867561340332031, -0.24522781372070312, 0.430267333984375, 0.8390426635742188, 0.38065338134765625, -0.2915496826171875, 0.5817031860351562, -0.082244873046875, -0.46588134765625, 0.506988525390625, 0.37647247314453125, 0.49100494384765625, -0.4337158203125, 0.12964630126953125, 0.66387939453125, -0.1425323486328125, -0.4876251220703125, 0.256256103515625, 0.05665397644042969, -0.10561370849609375, 0.23874855041503906, -0.2650794982910156, 0.486846923828125, 1.8378143310546875, -0.1714324951171875, 0.7670516967773438, 0.43524169921875, -0.073516845703125, -0.1988677978515625, 0.3714141845703125, 0.2230072021484375, 0.3682403564453125, -0.061004638671875, 0.055217742919921875, 1.46026611328125, 0.3133392333984375, 0.01990509033203125, 0.21462249755859375, 0.37979888916015625, 0.321807861328125, 0.2815055847167969, -0.08734130859375, -0.30681610107421875, 0.5044403076171875, 0.8953704833984375, 0.189605712890625, -0.39801788330078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000067.npy"}
{"epoch": 0.10128495842781557, "step": 68, "batch_size": 64, "mean": 0.2649560570716858, "std": 0.5160690546035767, "min": -1.213775634765625, "p10": -0.3220481872558594, "median": 0.2619209289550781, "p90": 0.8234611511230471, "max": 1.8237762451171875, "pos_frac": 0.703125, "sample": [1.8237762451171875, 0.304351806640625, 0.5037708282470703, 0.44342041015625, -0.25159454345703125, 0.7420578002929688, 0.42415618896484375, 0.45456695556640625, 0.011194229125976562, 0.044158935546875, -0.32759857177734375, -0.322174072265625, 0.7448310852050781, 0.8413162231445312, -0.105804443359375, 0.59710693359375, 0.2783203125, 0.3726806640625, -0.5421218872070312, -0.26294708251953125, 0.1544647216796875, 0.0007457733154296875, 1.00262451171875, 0.22824859619140625, 0.002712249755859375, 0.26206207275390625, 0.5559005737304688, -0.2949676513671875, 1.2459487915039062, -0.046112060546875, 0.2572193145751953, 0.09399032592773438, 0.27069091796875, 0.37952423095703125, -0.32175445556640625, 0.6807174682617188, 1.5246124267578125, -0.13531494140625, -0.082672119140625, 0.1454143524169922, 0.2227325439453125, -0.032047271728515625, 0.3424224853515625, 0.7075538635253906, -0.5932464599609375, -0.5300140380859375, -0.086456298828125, -0.17295455932617188, 0.1139068603515625, 0.5035781860351562, 0.68902587890625, 0.4395751953125, -0.096435546875, 0.78179931640625, 0.26177978515625, 0.5289993286132812, -0.40493011474609375, 0.8865814208984375, 1.0932159423828125, 0.7033405303955078, 0.6588821411132812, 0.331695556640625, -1.213775634765625, 0.1244354248046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000068.npy"}
{"epoch": 0.10279667422524566, "step": 69, "batch_size": 64, "mean": 0.17647448182106018, "std": 0.4222490191459656, "min": -0.7174720764160156, "p10": -0.34674663543701173, "median": 0.23356151580810547, "p90": 0.5499771118164063, "max": 2.0377197265625, "pos_frac": 0.71875, "sample": [0.3940410614013672, -0.44057464599609375, 0.6493568420410156, 0.275238037109375, 0.79095458984375, -0.015598297119140625, -0.2863121032714844, 0.31109619140625, 0.82318115234375, 0.0033416748046875, 0.0836944580078125, 0.2605552673339844, 0.01666259765625, 0.5257949829101562, 0.5270767211914062, 0.13094329833984375, 0.0056781768798828125, 0.21881103515625, 0.3989410400390625, -0.16405868530273438, 2.0377197265625, -0.668548583984375, 0.4393768310546875, 0.47900390625, 0.398284912109375, 0.38840484619140625, -0.39423370361328125, 0.5597915649414062, 0.233856201171875, 0.34488677978515625, 0.23996353149414062, 0.79205322265625, 0.23326683044433594, -0.309112548828125, 0.3107147216796875, -0.3461151123046875, 0.23908615112304688, 0.3872489929199219, -0.0545196533203125, 0.13266754150390625, 0.27813720703125, 0.452850341796875, -0.2313385009765625, -0.03375244140625, 0.2438812255859375, 0.19980621337890625, 0.3911399841308594, 0.3333473205566406, -0.3470172882080078, -0.5617599487304688, -0.6386947631835938, 0.1368846893310547, 0.12713241577148438, -0.058780670166015625, 0.22537994384765625, -0.7174720764160156, -0.1674346923828125, 0.2872886657714844, 0.13268661499023438, 0.008413314819335938, 0.342559814453125, 0.2638225555419922, 0.6912841796875, -0.0166168212890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000069.npy"}
{"epoch": 0.10430839002267574, "step": 70, "batch_size": 64, "mean": 0.14442339539527893, "std": 0.4034799635410309, "min": -0.8839263916015625, "p10": -0.44020080566406244, "median": 0.15054035186767578, "p90": 0.6042821884155274, "max": 0.8481903076171875, "pos_frac": 0.71875, "sample": [0.406036376953125, -0.6061496734619141, 0.840362548828125, 0.3348388671875, 0.5356407165527344, -0.25084495544433594, 0.008392333984375, 0.11798858642578125, 0.04021453857421875, -0.48199462890625, 0.0739898681640625, -0.24908065795898438, -0.3970489501953125, 0.7567596435546875, 0.07775306701660156, -0.260284423828125, -0.21753692626953125, -0.4586944580078125, -0.25411224365234375, 0.15535736083984375, 0.6981658935546875, 0.18369293212890625, 0.7509288787841797, 0.4084434509277344, 0.0613555908203125, 0.56890869140625, 0.12367630004882812, 0.0373687744140625, -0.7411270141601562, 0.527008056640625, 0.1457233428955078, 0.4836273193359375, 0.05565643310546875, 0.3152351379394531, 0.07953643798828125, 0.32450103759765625, 0.44412994384765625, 0.555419921875, 0.22800254821777344, 0.26467132568359375, 0.15753555297851562, 0.6091098785400391, 0.12514114379882812, 0.28002166748046875, -0.1043853759765625, 0.4843406677246094, -0.7586135864257812, 0.593017578125, 0.3552570343017578, 0.5274124145507812, 0.5654373168945312, 0.612579345703125, -0.000823974609375, -0.8839263916015625, -0.2148303985595703, 0.14421844482421875, -0.068389892578125, -0.48299217224121094, 0.487060546875, 0.3351097106933594, 0.0389251708984375, -0.3151893615722656, 0.2223796844482422, 0.8481903076171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000070.npy"}
{"epoch": 0.10582010582010581, "step": 71, "batch_size": 64, "mean": 0.04047642648220062, "std": 0.4723221957683563, "min": -1.375091552734375, "p10": -0.529718017578125, "median": 0.06914329528808594, "p90": 0.6077144622802736, "max": 0.9857177734375, "pos_frac": 0.5625, "sample": [0.06779098510742188, 0.5798606872558594, -0.021997451782226562, 0.2842426300048828, 0.0712432861328125, 0.1731109619140625, 0.8035068511962891, -0.6553573608398438, -0.7275390625, 0.250213623046875, 0.13576889038085938, -0.032135009765625, 0.26480865478515625, -0.21787261962890625, -0.32862091064453125, 0.04746818542480469, 0.334075927734375, 0.7835311889648438, -0.09982109069824219, 0.5123291015625, -1.078948974609375, 0.9857177734375, 0.49358367919921875, -0.07574081420898438, 0.8506317138671875, 0.3461151123046875, 0.07049560546875, -0.06055450439453125, 0.8527812957763672, -0.2175140380859375, -0.6119384765625, 0.1019744873046875, -0.76361083984375, -0.44535255432128906, 0.32262229919433594, 0.13530731201171875, 0.27831077575683594, -0.1863861083984375, 0.22689437866210938, -0.11107635498046875, 0.27907562255859375, -1.375091552734375, 0.40464019775390625, 0.0595550537109375, -0.36971282958984375, -0.016526222229003906, 0.0091705322265625, 0.6196517944335938, -0.2844085693359375, 0.19825172424316406, 0.09164047241210938, 0.3892784118652344, 0.922607421875, 0.53204345703125, -0.04720306396484375, -0.23262786865234375, -0.4886894226074219, 0.356842041015625, -0.03623199462890625, -0.5013217926025391, -0.5311508178710938, 0.2241382598876953, -0.5263748168945312, -0.4249839782714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000071.npy"}
{"epoch": 0.1073318216175359, "step": 72, "batch_size": 64, "mean": 0.26161882281303406, "std": 0.5118357539176941, "min": -0.8241424560546875, "p10": -0.3685148239135742, "median": 0.17777061462402344, "p90": 0.844425392150879, "max": 1.820037841796875, "pos_frac": 0.75, "sample": [-0.44507598876953125, -0.0772552490234375, 0.01659393310546875, 0.14548110961914062, 0.3763580322265625, 0.3419036865234375, -0.07981300354003906, 0.9070930480957031, -0.7838611602783203, -0.2742156982421875, 0.8360500335693359, 0.9874649047851562, 0.2659454345703125, 0.6963462829589844, 0.3591156005859375, 0.03754997253417969, -0.8241424560546875, 0.4940967559814453, -0.05092620849609375, -0.47954559326171875, 0.8132476806640625, 0.13623619079589844, 0.6571559906005859, 0.05613517761230469, 0.09856414794921875, 0.7177734375, 0.0495147705078125, -0.37685585021972656, -0.265960693359375, 0.03833770751953125, 1.820037841796875, 0.563690185546875, 0.7912826538085938, 1.19256591796875, -0.47711944580078125, -0.3048591613769531, 0.6878910064697266, -0.18289947509765625, 0.08243179321289062, 0.2994270324707031, 0.8480148315429688, 0.5159721374511719, 0.05107879638671875, -0.1092071533203125, 0.147003173828125, 0.08331680297851562, 0.933868408203125, 0.745758056640625, 0.15014266967773438, 0.23662185668945312, 0.4792938232421875, 0.79656982421875, 1.2566375732421875, 0.002941131591796875, 0.2053985595703125, 0.2296123504638672, 0.5389442443847656, 0.08780479431152344, 0.5887603759765625, 0.1418743133544922, -0.7686920166015625, 0.7967681884765625, 0.28841400146484375, -0.34905242919921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000072.npy"}
{"epoch": 0.10884353741496598, "step": 73, "batch_size": 64, "mean": 0.1334337592124939, "std": 0.504127025604248, "min": -0.949554443359375, "p10": -0.5243415832519531, "median": 0.10036277770996094, "p90": 0.6742485046386719, "max": 1.5302276611328125, "pos_frac": 0.59375, "sample": [0.2440509796142578, -0.18974685668945312, -0.8928146362304688, -0.48746490478515625, -0.6552314758300781, 0.3284149169921875, -0.1342315673828125, -0.8351898193359375, -0.6840057373046875, 0.4579486846923828, 0.5333938598632812, -0.16211700439453125, -0.03769683837890625, -0.4531974792480469, -0.24452781677246094, -0.040782928466796875, -0.16156005859375, 1.0864753723144531, 0.5585098266601562, 0.50274658203125, -0.949554443359375, -0.2338409423828125, -0.17751502990722656, 0.31456756591796875, 0.2135009765625, 0.60455322265625, 0.08998680114746094, -0.21846961975097656, -0.049251556396484375, -0.090087890625, 0.5113067626953125, -0.07306289672851562, 0.3856525421142578, 0.20934295654296875, 0.0184326171875, 0.10941696166992188, 0.4662284851074219, 1.42755126953125, -0.7614021301269531, -0.5401458740234375, 0.187591552734375, 0.6961212158203125, 0.048614501953125, 0.20853424072265625, 0.15761184692382812, 0.06682586669921875, 0.6618423461914062, 0.37491416931152344, 0.3709259033203125, 0.8212203979492188, -0.11590576171875, 0.5419120788574219, 1.0255012512207031, 0.5693893432617188, -0.0075225830078125, 0.20066070556640625, 0.06743240356445312, 0.453033447265625, 0.6795654296875, -0.1245269775390625, 0.2275218963623047, 1.5302276611328125, 0.09130859375, -0.18322181701660156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000073.npy"}
{"epoch": 0.11035525321239607, "step": 74, "batch_size": 64, "mean": 0.2982656955718994, "std": 0.4957234263420105, "min": -0.5774097442626953, "p10": -0.22521915435791012, "median": 0.2147541046142578, "p90": 1.009471130371094, "max": 1.9498748779296875, "pos_frac": 0.734375, "sample": [0.04852294921875, -0.38143157958984375, 0.412139892578125, 0.06549835205078125, 0.6200942993164062, 0.25909423828125, 0.4954376220703125, 1.9498748779296875, 1.0820083618164062, 1.1212482452392578, 0.7213897705078125, 1.5414237976074219, 0.154144287109375, 0.23525238037109375, -0.048122406005859375, 0.3943901062011719, 1.260589599609375, -0.160430908203125, -0.15743255615234375, 0.3235015869140625, -0.05960845947265625, 0.16229248046875, 0.11316299438476562, 0.38074493408203125, 0.4019660949707031, 0.14496421813964844, -0.20593833923339844, -0.4143524169921875, 1.0363693237304688, -0.5208511352539062, 0.345062255859375, 0.4268913269042969, 0.3692054748535156, 0.16938400268554688, -0.252166748046875, 0.37403106689453125, -0.07657432556152344, 0.2823028564453125, -0.087921142578125, -0.2576770782470703, 0.6892833709716797, -0.5774097442626953, -0.20524215698242188, 0.19711685180664062, 0.33188629150390625, 0.072296142578125, 1.5095291137695312, 0.03133201599121094, 0.7115097045898438, 0.6498336791992188, 0.0643310546875, -0.01287841796875, 0.22824859619140625, 0.4745597839355469, 0.4853057861328125, 0.17714691162109375, -0.23348236083984375, 0.023040771484375, 0.9467086791992188, 0.6691131591796875, 0.550323486328125, 0.20125961303710938, -0.17634963989257812, 0.0130615234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000074.npy"}
{"epoch": 0.11186696900982615, "step": 75, "batch_size": 64, "mean": 0.26300370693206787, "std": 0.46396082639694214, "min": -0.6538734436035156, "p10": -0.30838832855224607, "median": 0.25290918350219727, "p90": 0.890841865539551, "max": 1.5629425048828125, "pos_frac": 0.65625, "sample": [-0.0712890625, -0.16002655029296875, 0.3061199188232422, 0.5623245239257812, 1.288330078125, -0.06719779968261719, 0.14287567138671875, 0.32917022705078125, 0.48432159423828125, 0.4419670104980469, -0.1107635498046875, -0.02596282958984375, -0.29912376403808594, 0.7140731811523438, -0.33049774169921875, 1.0138225555419922, -0.02477264404296875, -0.08265113830566406, 0.26541614532470703, 0.4416542053222656, 1.03900146484375, 0.8261795043945312, 1.5629425048828125, 0.01747894287109375, 0.40994834899902344, 0.633026123046875, 0.7453765869140625, -0.2013397216796875, 0.2404022216796875, -0.18625259399414062, 0.10281753540039062, 0.9185543060302734, 0.62738037109375, 0.7899112701416016, -0.0457305908203125, -0.09432220458984375, 0.364776611328125, 0.1700897216796875, 0.9719924926757812, 0.7744407653808594, -0.26255035400390625, 0.365631103515625, -0.6538734436035156, -0.021177291870117188, 0.6480598449707031, 0.38100433349609375, 0.20584869384765625, 0.5521354675292969, 0.27683258056640625, -0.40233612060546875, 0.3255577087402344, 0.3119392395019531, -0.32265663146972656, 0.011486053466796875, -0.3178901672363281, -0.3123588562011719, 0.155517578125, -0.21330642700195312, -0.5380058288574219, 0.2814903259277344, 0.47099876403808594, 0.025384902954101562, 1.2214813232421875, 0.15856170654296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000075.npy"}
{"epoch": 0.11337868480725624, "step": 76, "batch_size": 64, "mean": 0.16686102747917175, "std": 0.4853253960609436, "min": -0.9426498413085938, "p10": -0.3748435974121093, "median": 0.15789222717285156, "p90": 0.7579277038574219, "max": 1.4074859619140625, "pos_frac": 0.640625, "sample": [0.3436126708984375, 0.9598159790039062, -0.9426498413085938, 0.41033935546875, 0.6051406860351562, -0.49436378479003906, 0.1977252960205078, 0.3244743347167969, 0.20050621032714844, 0.7616653442382812, 0.7322597503662109, 1.1691665649414062, -0.10002899169921875, -0.9021224975585938, 0.2164306640625, -0.38927459716796875, 0.5455780029296875, 0.3730583190917969, -0.1028594970703125, 0.17461013793945312, 0.9141387939453125, 0.432769775390625, -0.3411712646484375, 0.74920654296875, 0.524810791015625, 0.032989501953125, 0.02527618408203125, 0.6570510864257812, -0.8637104034423828, -0.06092262268066406, 0.1313037872314453, 0.023204803466796875, -0.2518310546875, 0.5720443725585938, -0.13136863708496094, 0.298797607421875, 0.5201644897460938, 0.14117431640625, 0.12645339965820312, -0.11130523681640625, 0.4647789001464844, -0.6624221801757812, -0.2691535949707031, 0.024379730224609375, 0.0468902587890625, 0.5101032257080078, -0.127288818359375, -0.26711082458496094, 0.2837677001953125, 1.4074859619140625, -0.23142242431640625, -0.6464691162109375, -0.23957061767578125, 0.0610809326171875, 0.7701873779296875, 0.8363513946533203, -0.07373237609863281, -0.21191978454589844, -0.2184906005859375, 0.2834320068359375, 0.7150077819824219, 0.24804115295410156, 0.5219573974609375, -0.018938064575195312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000076.npy"}
{"epoch": 0.11489040060468632, "step": 77, "batch_size": 64, "mean": 0.17436623573303223, "std": 0.45174530148506165, "min": -1.1253814697265625, "p10": -0.4157356262207031, "median": 0.18951797485351562, "p90": 0.7893053054809573, "max": 1.2065200805664062, "pos_frac": 0.640625, "sample": [0.181427001953125, -0.021053314208984375, -0.09019851684570312, -0.44010162353515625, -0.1998157501220703, 1.2065200805664062, 0.47815704345703125, 0.333709716796875, -0.4923858642578125, -0.020572662353515625, 0.675048828125, 0.4569244384765625, 0.3207817077636719, -0.27652740478515625, 0.4463233947753906, 0.7133769989013672, 0.5164947509765625, 0.8218460083007812, 0.5130157470703125, 0.3301849365234375, -0.10839080810546875, -0.024034500122070312, 1.0662078857421875, 0.5035133361816406, 0.203094482421875, -0.020488739013671875, -0.11210250854492188, -0.41902923583984375, 0.23587989807128906, 0.030162811279296875, -0.6306514739990234, -0.6242446899414062, 0.18487167358398438, 0.02260589599609375, 0.3002166748046875, 0.17218017578125, -0.08055877685546875, 0.4717063903808594, 0.21147918701171875, 0.9134407043457031, -0.13043212890625, 0.133026123046875, 0.08902740478515625, 0.3956775665283203, 0.8826332092285156, 0.8983001708984375, 0.09573936462402344, -0.0332183837890625, 0.32379150390625, 0.2949943542480469, 0.26917266845703125, -0.32727813720703125, 0.19416427612304688, 0.6503067016601562, 1.0188369750976562, 0.2782173156738281, -0.408050537109375, -0.5567626953125, -1.1253814697265625, -0.068878173828125, 0.210113525390625, 0.1183929443359375, 0.6125049591064453, -0.40447235107421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000077.npy"}
{"epoch": 0.1164021164021164, "step": 78, "batch_size": 64, "mean": 0.10266757011413574, "std": 0.45668914914131165, "min": -1.405303955078125, "p10": -0.526880645751953, "median": 0.15784454345703125, "p90": 0.5388168334960938, "max": 1.0884723663330078, "pos_frac": 0.703125, "sample": [0.09140586853027344, -1.405303955078125, 0.03813934326171875, 0.4234161376953125, 0.22017669677734375, 1.0884723663330078, 0.2688179016113281, 0.0303955078125, 0.07590675354003906, -0.2119293212890625, 0.080047607421875, -0.5430145263671875, -0.035907745361328125, 0.29383087158203125, 0.33087158203125, 0.00139617919921875, -0.4694366455078125, 0.8368988037109375, 0.21474456787109375, 0.16393280029296875, -0.7489700317382812, -0.6752853393554688, 0.5611572265625, 0.5171318054199219, -0.0360260009765625, 0.42012786865234375, 0.44358062744140625, 0.0513153076171875, 0.3735198974609375, -0.2850494384765625, 0.9263515472412109, 0.5458984375, 0.5222930908203125, 1.0305442810058594, 0.20255279541015625, -0.91650390625, -0.6087188720703125, 0.110870361328125, 0.27965354919433594, 0.4826946258544922, 0.3751182556152344, 0.3988914489746094, 0.04851531982421875, -0.2913398742675781, -0.48923492431640625, 0.7966270446777344, -0.2628898620605469, 0.021595001220703125, 0.33917999267578125, 0.3182525634765625, -0.3748607635498047, 0.1118011474609375, -0.25921630859375, -0.07857513427734375, 0.02166748046875, -0.6060981750488281, 0.17313575744628906, 0.230499267578125, 0.36487579345703125, -0.09918975830078125, 0.454132080078125, 0.3233451843261719, 0.15175628662109375, 0.212738037109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000078.npy"}
{"epoch": 0.11791383219954649, "step": 79, "batch_size": 64, "mean": 0.1862274706363678, "std": 0.416649729013443, "min": -1.1121139526367188, "p10": -0.232403564453125, "median": 0.15523529052734375, "p90": 0.6599216461181642, "max": 1.4184761047363281, "pos_frac": 0.71875, "sample": [0.05216217041015625, 0.41385650634765625, -0.35132789611816406, 0.29206275939941406, -0.0600738525390625, 0.6697807312011719, -1.1121139526367188, 0.0548095703125, 0.1266937255859375, -0.0142669677734375, -0.013059616088867188, 0.1462249755859375, 0.4709815979003906, 1.4184761047363281, -0.4831390380859375, 0.16670989990234375, 0.2861518859863281, 0.24086380004882812, 0.054523468017578125, 0.5556755065917969, -0.0438690185546875, 0.6971588134765625, 0.0990142822265625, 0.16424560546875, 0.3909454345703125, -0.19131851196289062, 1.0526847839355469, 0.2818756103515625, 0.1373138427734375, 0.1195526123046875, -0.0349273681640625, -0.14456748962402344, 0.45992279052734375, 0.5330886840820312, 0.3454093933105469, 0.03157806396484375, 0.12461090087890625, 0.18146133422851562, -0.23598098754882812, -0.17353057861328125, 0.19981765747070312, 0.32537078857421875, -0.31451416015625, -0.22405624389648438, -0.5923366546630859, 0.24946975708007812, 0.20726776123046875, 0.0009613037109375, 0.4202136993408203, 0.08610916137695312, 0.2995586395263672, 0.20536041259765625, 0.0268707275390625, -0.46783447265625, -0.06275558471679688, 0.84942626953125, 0.48722076416015625, 0.7118453979492188, 0.6369171142578125, 1.4048042297363281, 0.259307861328125, -0.18323898315429688, 0.5581512451171875, 0.12496185302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000079.npy"}
{"epoch": 0.11942554799697656, "step": 80, "batch_size": 64, "mean": 0.22545722126960754, "std": 0.4805307984352112, "min": -0.95062255859375, "p10": -0.40911006927490223, "median": 0.21296119689941406, "p90": 0.7812067031860351, "max": 1.8023681640625, "pos_frac": 0.6875, "sample": [-0.25045204162597656, 0.6645011901855469, 0.3206787109375, 0.20406341552734375, 0.4243507385253906, -0.56085205078125, 0.33530426025390625, 0.21490859985351562, -0.072509765625, -0.10223388671875, 0.6845951080322266, -0.12823486328125, 0.5247802734375, -0.5742263793945312, -0.03842353820800781, 1.8023681640625, 0.2110137939453125, 0.07056427001953125, 0.5781097412109375, 0.049190521240234375, 0.3658714294433594, 0.3359527587890625, -0.03798866271972656, 0.03194427490234375, 0.7032623291015625, 1.042684555053711, 0.7750930786132812, -0.1009979248046875, 0.99688720703125, 0.6025238037109375, 0.32207489013671875, 0.6085586547851562, 0.24073028564453125, 0.2432403564453125, -0.01941680908203125, 0.5756988525390625, -0.44417762756347656, -0.1092529296875, 0.38365936279296875, -0.3272857666015625, -0.95062255859375, -0.6284255981445312, -0.23137664794921875, 0.3892402648925781, 0.21560287475585938, -0.4522895812988281, 0.5721282958984375, 0.04832267761230469, 0.1228485107421875, -0.5025062561035156, 1.306671142578125, 0.3005390167236328, 0.706085205078125, -0.15180206298828125, -0.04622840881347656, 0.0849761962890625, 0.08648681640625, 0.2406024932861328, 0.7838268280029297, 0.87158203125, 0.17043304443359375, 0.0616912841796875, 0.083587646484375, 0.80133056640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000080.npy"}
{"epoch": 0.12093726379440665, "step": 81, "batch_size": 64, "mean": 0.23963311314582825, "std": 0.530830979347229, "min": -1.4478378295898438, "p10": -0.3898445129394531, "median": 0.2031993865966797, "p90": 0.944589996337891, "max": 1.6084747314453125, "pos_frac": 0.6875, "sample": [0.2193603515625, -0.24625205993652344, 0.5224571228027344, -0.0311279296875, 1.2624053955078125, -0.42752838134765625, 0.1792430877685547, 0.03680419921875, -0.12195968627929688, -0.4719734191894531, 0.4947662353515625, 0.2014617919921875, -0.39005279541015625, 0.4611663818359375, 0.1393108367919922, 0.06468963623046875, -0.04155921936035156, 0.27513694763183594, -1.4478378295898438, 0.5691623687744141, 0.2598419189453125, 0.32827186584472656, 0.9979248046875, 0.548370361328125, 0.7215423583984375, 0.23882293701171875, -0.1689300537109375, 0.4511299133300781, -0.11879730224609375, 0.16461181640625, 0.29199981689453125, 0.981048583984375, 0.5939788818359375, 0.032245635986328125, 0.38858795166015625, 0.18231201171875, -0.867828369140625, 1.6084747314453125, 1.084136962890625, 0.620880126953125, -0.1568603515625, 0.20493698120117188, 0.8595199584960938, -0.21638870239257812, 0.75543212890625, 0.06875801086425781, 0.49387168884277344, -0.13672637939453125, -0.47370147705078125, 1.1856842041015625, 1.545257568359375, 0.66552734375, 0.2500267028808594, -0.3893585205078125, 0.3863353729248047, -0.48622894287109375, 0.09201431274414062, 0.07713127136230469, 0.13714218139648438, 0.3675994873046875, -0.018276214599609375, 0.6126213073730469, -0.04175567626953125, -0.03234100341796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000081.npy"}
{"epoch": 0.12244897959183673, "step": 82, "batch_size": 64, "mean": 0.3042997121810913, "std": 0.4362401068210602, "min": -0.7844085693359375, "p10": -0.2465044021606445, "median": 0.31821250915527344, "p90": 0.8552364349365235, "max": 1.4595947265625, "pos_frac": 0.75, "sample": [0.3060188293457031, 0.4486961364746094, 0.5144500732421875, 0.0777740478515625, 0.3943328857421875, 0.30373191833496094, 0.2574005126953125, 0.5493240356445312, 0.85784912109375, 0.5092029571533203, 0.5461330413818359, 0.4245166778564453, 0.0014495849609375, 0.6777191162109375, 0.18074417114257812, 0.6869163513183594, -0.15716552734375, 0.45246124267578125, 0.706787109375, 0.2408905029296875, 0.1804962158203125, 0.17891311645507812, 0.3772296905517578, 0.245513916015625, 0.8337554931640625, 0.38809967041015625, -0.02872467041015625, -0.3007965087890625, 0.8491401672363281, 0.6821308135986328, 0.34032440185546875, 0.1751575469970703, 1.4595947265625, -0.0803375244140625, -0.629302978515625, -0.3351287841796875, 0.9347305297851562, -0.550048828125, 0.28021240234375, 0.9058799743652344, 0.33040618896484375, -0.2528266906738281, 0.836883544921875, 0.02864837646484375, 0.5788116455078125, -0.025970458984375, 0.9998645782470703, -0.030447006225585938, 0.388427734375, 0.6655426025390625, -0.7844085693359375, -0.18868446350097656, 0.5732574462890625, 0.17395782470703125, -0.188262939453125, -0.2317523956298828, -0.39522552490234375, 0.27346038818359375, 1.0300025939941406, 0.16121482849121094, 0.3507080078125, 0.433837890625, -0.08805084228515625, 0.9497146606445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000082.npy"}
{"epoch": 0.12396069538926682, "step": 83, "batch_size": 64, "mean": 0.34642475843429565, "std": 0.5084571242332458, "min": -1.1236953735351562, "p10": -0.24267616271972656, "median": 0.31415271759033203, "p90": 0.9526779174804688, "max": 1.4458999633789062, "pos_frac": 0.78125, "sample": [1.2794647216796875, 0.6859893798828125, 0.29822540283203125, 0.20880126953125, 0.7320346832275391, 0.105804443359375, 0.14893341064453125, -0.13982772827148438, 0.770263671875, 0.773406982421875, -0.24778366088867188, 0.12040138244628906, -0.13575363159179688, 0.18816375732421875, 0.5466995239257812, 0.34498023986816406, -0.5852584838867188, 0.8686676025390625, 0.2805309295654297, -0.011157989501953125, 0.5667839050292969, 0.9329605102539062, 1.1068801879882812, -0.20830726623535156, -1.1236953735351562, 0.41457366943359375, 0.65771484375, 0.21265411376953125, 0.06270027160644531, 0.1569995880126953, 0.5433197021484375, -0.49996185302734375, 1.2162666320800781, -0.8401241302490234, -0.5156612396240234, 0.27842140197753906, 0.38146400451660156, -0.340606689453125, 0.19127273559570312, -0.2307586669921875, 0.1475982666015625, 0.70166015625, 0.4143638610839844, 0.2350139617919922, -0.10495376586914062, 0.3300800323486328, 0.21656036376953125, 1.195159912109375, 0.24434661865234375, 0.8227424621582031, 1.4458999633789062, 0.38277244567871094, 0.9611282348632812, 0.2754631042480469, 0.8262939453125, 0.6021747589111328, 0.6984424591064453, 0.7110099792480469, 0.643524169921875, 1.14154052734375, 0.6101531982421875, 0.35353851318359375, 0.2679100036621094, -0.14672088623046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000083.npy"}
{"epoch": 0.1254724111866969, "step": 84, "batch_size": 64, "mean": 0.14622774720191956, "std": 0.5230767726898193, "min": -1.07891845703125, "p10": -0.5068536758422851, "median": 0.08040809631347656, "p90": 0.8873247146606446, "max": 1.1678886413574219, "pos_frac": 0.59375, "sample": [0.394287109375, 0.745025634765625, 0.31171607971191406, 0.893798828125, -0.5881881713867188, 0.696563720703125, 0.2724609375, -0.39466094970703125, -0.13671493530273438, 0.0310211181640625, -0.761077880859375, 1.1016826629638672, 0.4127655029296875, 0.525360107421875, 0.7484512329101562, -0.1229095458984375, -0.3424415588378906, 0.241943359375, -0.043975830078125, -0.3016319274902344, -0.3927459716796875, -0.0469970703125, 0.27060508728027344, 0.116973876953125, 0.1774139404296875, 0.36834716796875, -0.239715576171875, 0.2519073486328125, 0.19371795654296875, -0.4613800048828125, 0.7357559204101562, 0.39154052734375, -0.23846054077148438, -0.035491943359375, -0.2605743408203125, 0.45623779296875, 1.1678886413574219, 0.6964378356933594, 0.10558700561523438, -0.2099609375, -0.958984375, 0.827484130859375, -0.172454833984375, 0.21875953674316406, 1.0096664428710938, 0.03483390808105469, -0.06197547912597656, 1.021759033203125, -0.031650543212890625, 0.8759613037109375, 0.04087066650390625, -1.07891845703125, 0.8921947479248047, -0.3225860595703125, 0.05522918701171875, 0.022098541259765625, 1.011688232421875, 0.2848377227783203, -0.6473922729492188, -0.619384765625, 0.7819976806640625, 0.053680419921875, -0.5263423919677734, -0.0833587646484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000084.npy"}
{"epoch": 0.12698412698412698, "step": 85, "batch_size": 64, "mean": 0.3224252462387085, "std": 0.5566155314445496, "min": -1.0758209228515625, "p10": -0.3933723449707031, "median": 0.3635215759277344, "p90": 1.0535797119140626, "max": 1.649810791015625, "pos_frac": 0.75, "sample": [1.058990478515625, -0.23960494995117188, 0.12079048156738281, 0.08051681518554688, 0.46671295166015625, 0.491546630859375, 0.1055908203125, 0.3918781280517578, -0.3087444305419922, 1.0765724182128906, 1.2828903198242188, -1.0758209228515625, 0.35364532470703125, 1.04095458984375, 0.3624706268310547, 0.6534576416015625, 1.3071403503417969, -0.39809417724609375, 0.7897491455078125, 0.8061981201171875, 0.0293426513671875, 0.445037841796875, -0.382354736328125, 0.8503570556640625, 0.07298088073730469, -0.3798828125, 0.172027587890625, 0.18724441528320312, 0.8408317565917969, -0.0569915771484375, -0.22145843505859375, 0.6723518371582031, -0.7433929443359375, -0.637054443359375, 0.10094642639160156, 0.1268310546875, 1.3092803955078125, 0.6655750274658203, -0.27332305908203125, 0.42629241943359375, -0.7153129577636719, 0.569915771484375, 1.1863021850585938, 0.3413429260253906, 1.649810791015625, 0.9808254241943359, 0.200103759765625, 0.7372837066650391, 0.36432647705078125, 0.5025920867919922, 0.07772064208984375, 0.3472442626953125, -0.036357879638671875, 0.7090072631835938, 0.435699462890625, 0.3627166748046875, -0.39980316162109375, -0.0834197998046875, 0.44954681396484375, -0.6217422485351562, 0.6052780151367188, 0.6655654907226562, 0.370025634765625, 0.3650627136230469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000085.npy"}
{"epoch": 0.12849584278155707, "step": 86, "batch_size": 64, "mean": 0.2749743163585663, "std": 0.5472034811973572, "min": -0.83935546875, "p10": -0.3107082366943359, "median": 0.19569969177246094, "p90": 1.048346328735352, "max": 1.8776016235351562, "pos_frac": 0.65625, "sample": [-0.10874176025390625, 0.38541221618652344, 0.221343994140625, 0.038875579833984375, 0.9283905029296875, 0.5877208709716797, -0.0962677001953125, 0.6319427490234375, 0.18030548095703125, 0.2440662384033203, -0.339019775390625, -0.3262367248535156, 0.36846923828125, 1.0958938598632812, 0.10355377197265625, 0.5953445434570312, 0.5595531463623047, -0.06839561462402344, -0.19667816162109375, -0.157257080078125, -0.83935546875, -0.6402664184570312, -0.0108642578125, -0.2708110809326172, 0.5373077392578125, 0.1313762664794922, 0.64801025390625, 0.08583831787109375, 1.1900787353515625, 0.21109390258789062, 1.2066841125488281, -0.4488372802734375, 0.307464599609375, 0.6706390380859375, 0.03546142578125, 0.29343414306640625, -0.5559844970703125, -0.172149658203125, 0.7081375122070312, 0.4763641357421875, -0.0815277099609375, 0.28819847106933594, 0.10287094116210938, 0.052288055419921875, 0.9032058715820312, 1.0949363708496094, 0.6281261444091797, 0.4390888214111328, 0.93963623046875, -0.6566619873046875, 1.5106658935546875, -0.11812591552734375, -0.222442626953125, 0.2998180389404297, 0.7078590393066406, 1.8776016235351562, -0.00716400146484375, -0.0768280029296875, 0.16985321044921875, -0.27447509765625, 0.05328369140625, -0.21428298950195312, 1.4646339416503906, 0.5059013366699219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000086.npy"}
{"epoch": 0.13000755857898716, "step": 87, "batch_size": 64, "mean": 0.31578001379966736, "std": 0.5832809805870056, "min": -1.105987548828125, "p10": -0.4694156646728515, "median": 0.2719459533691406, "p90": 1.031364440917969, "max": 1.582000732421875, "pos_frac": 0.71875, "sample": [0.6056480407714844, 1.486175537109375, 0.338104248046875, 0.11810111999511719, 1.5398178100585938, 0.619171142578125, -0.2093658447265625, -0.5603790283203125, 0.8085708618164062, 0.5223598480224609, -0.7573585510253906, -0.072113037109375, 0.6779689788818359, -0.5015449523925781, -0.58154296875, 0.6696929931640625, 0.438018798828125, 1.0651931762695312, 0.9170017242431641, 0.6044845581054688, -0.2291412353515625, 1.4892349243164062, 0.7862510681152344, 0.157470703125, 0.5740089416503906, -0.947479248046875, 0.043605804443359375, -0.31137657165527344, 0.9524307250976562, -0.0135345458984375, -0.39444732666015625, -0.091400146484375, 0.5886516571044922, -0.6221084594726562, 0.18776702880859375, -0.001171112060546875, 0.1860809326171875, 0.10533332824707031, 0.1755199432373047, 0.7678012847900391, 0.3876991271972656, 0.38777923583984375, 0.5565681457519531, -1.105987548828125, 0.1290130615234375, -9.5367431640625e-05, 0.3068065643310547, 1.1498260498046875, 0.23708534240722656, 0.0732574462890625, 0.4256744384765625, 0.5308837890625, 0.1493377685546875, -0.05571746826171875, 0.6666221618652344, 0.16867828369140625, 0.0828704833984375, -0.17093658447265625, 0.805694580078125, 0.7501144409179688, 1.582000732421875, 0.8202705383300781, 0.0418701171875, 1.1591033935546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000087.npy"}
{"epoch": 0.13151927437641722, "step": 88, "batch_size": 64, "mean": 0.22902727127075195, "std": 0.6055418848991394, "min": -1.1759796142578125, "p10": -0.5528091430664063, "median": 0.2450103759765625, "p90": 0.9572729110717775, "max": 1.343505859375, "pos_frac": 0.640625, "sample": [0.2873382568359375, 0.432708740234375, -0.40714454650878906, 0.5194625854492188, 1.3058853149414062, 0.212860107421875, 0.7497100830078125, -0.5890274047851562, 0.5606765747070312, 0.12381362915039062, 1.0168113708496094, 0.3735504150390625, 0.4534168243408203, 0.2690448760986328, -0.3250389099121094, 0.41158294677734375, 0.2348480224609375, -0.10704421997070312, -0.5604400634765625, 0.687469482421875, 0.9335193634033203, 0.11925506591796875, 0.15951919555664062, -0.4435768127441406, -0.535003662109375, 1.2975845336914062, 0.8833198547363281, 1.329315185546875, 0.001556396484375, -0.19033050537109375, -1.0847244262695312, -0.9364166259765625, 0.2551727294921875, 0.102142333984375, 0.5547981262207031, -0.2819099426269531, 0.9674530029296875, 0.7826385498046875, 0.7061195373535156, -0.04871177673339844, 0.16036033630371094, 0.484222412109375, 0.7966575622558594, 0.33277130126953125, -1.1759796142578125, 0.6492462158203125, -0.14698028564453125, -0.03802299499511719, 0.8236618041992188, 0.6913681030273438, 0.675537109375, 0.5594940185546875, 1.343505859375, -0.699249267578125, -0.37972259521484375, -0.4115104675292969, -0.17940521240234375, -0.03405570983886719, -0.07395172119140625, 1.253713607788086, -0.4050102233886719, 0.7813873291015625, 0.16509056091308594, -0.7375869750976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000088.npy"}
{"epoch": 0.1330309901738473, "step": 89, "batch_size": 64, "mean": 0.2088102400302887, "std": 0.6582293510437012, "min": -2.59185791015625, "p10": -0.40202789306640624, "median": 0.2837820053100586, "p90": 0.9852870941162111, "max": 1.4882545471191406, "pos_frac": 0.6875, "sample": [0.6114597320556641, -0.5025138854980469, 0.2654743194580078, 0.318023681640625, 1.0025482177734375, 1.3537940979003906, 0.4258270263671875, 0.3580780029296875, 0.12393760681152344, 0.7177810668945312, 0.466766357421875, 0.2262115478515625, -0.2724609375, 1.4400787353515625, -0.07511138916015625, 0.3757038116455078, 0.262969970703125, 0.306182861328125, 0.19234466552734375, -0.8794403076171875, 0.34003448486328125, -0.08183670043945312, 0.806488037109375, 0.60723876953125, 0.5448722839355469, 0.71942138671875, 0.5481719970703125, 0.15571975708007812, -0.17424583435058594, 0.249359130859375, -0.33278656005859375, 1.0230560302734375, 0.20846176147460938, -0.21274948120117188, 0.3246002197265625, -0.0198974609375, 0.45191192626953125, 1.0604629516601562, -2.59185791015625, -0.6948394775390625, 0.6705856323242188, 0.9450111389160156, 1.4882545471191406, 0.8900146484375, -0.2241382598876953, 0.08293342590332031, 0.3020896911621094, 0.5941619873046875, 1.024017333984375, -1.01861572265625, -0.3688201904296875, 0.1987934112548828, 0.10453414916992188, 0.17490768432617188, 0.3942584991455078, -1.3415374755859375, 0.3260345458984375, 0.6142959594726562, -0.24737548828125, -0.31694793701171875, -0.416259765625, -0.2830238342285156, 0.4044151306152344, -0.2829742431640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000089.npy"}
{"epoch": 0.1345427059712774, "step": 90, "batch_size": 64, "mean": 0.22840029001235962, "std": 0.6474013328552246, "min": -2.35546875, "p10": -0.3418144226074219, "median": 0.19708538055419922, "p90": 1.03278865814209, "max": 1.4975128173828125, "pos_frac": 0.640625, "sample": [-0.8111915588378906, 1.0371208190917969, -0.19867515563964844, -0.025371551513671875, 0.46238136291503906, 0.17131996154785156, 1.0226802825927734, 0.8415145874023438, -0.3397369384765625, -2.35546875, 0.7503662109375, 0.9641075134277344, 0.6443691253662109, 0.6641979217529297, 1.050445556640625, 0.011241912841796875, 0.0705413818359375, 0.685577392578125, -0.09736251831054688, -0.059619903564453125, 1.179107666015625, 0.19358062744140625, 0.42572021484375, 1.3098602294921875, 1.4975128173828125, 0.2005901336669922, -0.19097137451171875, 1.445343017578125, 0.6671180725097656, 0.527069091796875, 0.6436767578125, 0.528167724609375, -0.1410503387451172, -0.24249267578125, 0.30342864990234375, 0.020416259765625, -0.40777587890625, -0.3341522216796875, 0.30773162841796875, 0.4871482849121094, -0.256134033203125, -0.4482879638671875, 1.2719879150390625, 0.4249725341796875, 0.46080780029296875, 0.016635894775390625, 0.6156730651855469, 0.6071052551269531, -0.08264350891113281, -0.07767105102539062, 0.019039154052734375, 0.3552513122558594, -0.25391387939453125, -0.12807846069335938, 0.022809982299804688, 0.1863861083984375, -0.32550048828125, -0.5257015228271484, 0.3963470458984375, 0.5682296752929688, -0.34270477294921875, -1.3697357177734375, -0.24001121520996094, 0.8142890930175781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000090.npy"}
{"epoch": 0.1360544217687075, "step": 91, "batch_size": 64, "mean": 0.33689484000205994, "std": 0.6835027933120728, "min": -1.5979461669921875, "p10": -0.39273910522460936, "median": 0.279144287109375, "p90": 1.1830862045288086, "max": 2.4163665771484375, "pos_frac": 0.75, "sample": [0.2279052734375, 0.4717254638671875, 0.8934478759765625, -0.29323768615722656, 0.3477630615234375, 1.1033554077148438, -0.2663421630859375, 0.23443603515625, 1.2673187255859375, 2.4163665771484375, -0.4073486328125, 0.3654937744140625, -0.0003414154052734375, 0.1084136962890625, 0.014600753784179688, -0.45403480529785156, 1.2957305908203125, 0.514984130859375, 0.7035026550292969, -0.4607391357421875, 0.961151123046875, 1.01312255859375, -0.35865020751953125, 0.36010169982910156, 1.2158889770507812, 0.8558483123779297, 0.24210357666015625, 0.1100311279296875, 0.0077972412109375, 1.0550079345703125, 0.10254669189453125, 0.7905998229980469, 1.159271240234375, 0.12170028686523438, 0.8512668609619141, 0.3499031066894531, -0.085418701171875, 0.49886322021484375, 0.07152748107910156, -1.5979461669921875, 0.9980926513671875, -0.9366836547851562, -0.9095535278320312, -1.3358688354492188, 0.04222679138183594, 0.9019737243652344, 0.06591033935546875, 1.4911060333251953, 0.31710052490234375, 0.13759613037109375, 0.00775146484375, -0.3440818786621094, -0.051456451416015625, -0.013319015502929688, 1.2360687255859375, 1.1932926177978516, 0.2572174072265625, -0.01160430908203125, 1.1006393432617188, 0.3010711669921875, 0.01967620849609375, 0.4419841766357422, 0.30956268310546875, 0.53485107421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000091.npy"}
{"epoch": 0.13756613756613756, "step": 92, "batch_size": 64, "mean": 0.2999285161495209, "std": 0.5971072316169739, "min": -0.982330322265625, "p10": -0.4671672821044922, "median": 0.23065662384033203, "p90": 1.0399887084960937, "max": 1.6079177856445312, "pos_frac": 0.65625, "sample": [-0.20143890380859375, 0.1846294403076172, -0.2601776123046875, 0.028779983520507812, 0.02767181396484375, 0.1935272216796875, -0.06737899780273438, 0.21913909912109375, -0.2264862060546875, -0.32195091247558594, -0.6613883972167969, 0.5904579162597656, 0.8932514190673828, 0.30242156982421875, 1.4627819061279297, 0.9221096038818359, 0.7275657653808594, -0.47402381896972656, 0.9448318481445312, -0.44609832763671875, 1.1441192626953125, -0.2517967224121094, 0.6578521728515625, 0.2568206787109375, 1.41510009765625, 0.23772621154785156, 0.47638702392578125, 0.5154953002929688, -0.29079437255859375, -0.00772857666015625, -0.6152400970458984, 1.0410003662109375, 0.8558273315429688, 0.21143722534179688, 0.2235870361328125, -0.1222686767578125, -0.25792694091796875, 0.5870513916015625, -0.982330322265625, -0.21421051025390625, 1.037628173828125, -0.038730621337890625, 0.4745025634765625, 0.6808242797851562, 0.36687660217285156, -0.4662513732910156, 1.1189422607421875, 1.3344879150390625, 1.6079177856445312, -0.530181884765625, -0.467559814453125, 0.9117889404296875, 0.20375823974609375, 0.022947311401367188, -0.0755615234375, -0.7313480377197266, 0.8757781982421875, 0.25011444091796875, 0.6298236846923828, 0.729766845703125, 0.6574020385742188, 0.10672378540039062, 0.8219375610351562, 0.95550537109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000092.npy"}
{"epoch": 0.13907785336356765, "step": 93, "batch_size": 64, "mean": 0.3049851953983307, "std": 0.6706203818321228, "min": -1.3579483032226562, "p10": -0.34267730712890615, "median": 0.20646286010742188, "p90": 1.1129673004150393, "max": 2.3497314453125, "pos_frac": 0.703125, "sample": [0.5601673126220703, 0.81402587890625, 1.3354644775390625, 0.23070526123046875, -1.0170135498046875, 0.02068328857421875, 0.28299713134765625, 0.188201904296875, -0.0035839080810546875, 0.8483734130859375, 0.9413909912109375, 1.1528968811035156, 2.3497314453125, -1.0583686828613281, 0.11206626892089844, 0.6509552001953125, 0.48567962646484375, 0.7958335876464844, 0.15093421936035156, -0.17177581787109375, 0.21224212646484375, 0.0326080322265625, 0.13480377197265625, 0.695892333984375, 0.041492462158203125, -0.16542434692382812, 0.2466278076171875, -0.0412445068359375, -0.12508773803710938, 0.16748046875, 0.54815673828125, -0.2426910400390625, 0.6325931549072266, 1.2596282958984375, -1.3579483032226562, 0.1884613037109375, -0.7092742919921875, 0.2940254211425781, -0.16593170166015625, -0.0465240478515625, 0.5991401672363281, -0.0880279541015625, 0.4350605010986328, 0.5944137573242188, 0.20068359375, 0.957000732421875, 0.5002956390380859, 0.04649162292480469, 2.0251312255859375, -0.11203193664550781, 1.4024486541748047, 0.18905067443847656, 1.0197982788085938, 1.0037689208984375, 0.2855796813964844, -0.1571502685546875, 1.3221778869628906, -0.901214599609375, -0.14776611328125, -0.385528564453125, 0.5662841796875, -0.7370147705078125, 0.06320953369140625, 0.5680007934570312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000093.npy"}
{"epoch": 0.14058956916099774, "step": 94, "batch_size": 64, "mean": 0.3475595712661743, "std": 0.7267595529556274, "min": -2.1240692138671875, "p10": -0.404905891418457, "median": 0.3762493133544922, "p90": 1.0886646270751956, "max": 2.8522796630859375, "pos_frac": 0.75, "sample": [0.8430633544921875, 0.4115409851074219, 0.49707794189453125, 1.2449569702148438, 0.5885391235351562, -1.0645065307617188, -0.7463359832763672, 0.33856773376464844, 0.3409576416015625, 0.9866256713867188, 0.11359214782714844, 1.1209564208984375, 0.455535888671875, -2.1240692138671875, 0.4408149719238281, 0.3212547302246094, -0.15864181518554688, 0.05066490173339844, 0.7542877197265625, 0.580322265625, 0.7945194244384766, 1.9743881225585938, 0.6119613647460938, 0.6269798278808594, 1.3941268920898438, 0.009023666381835938, 0.3258171081542969, -0.2770881652832031, -0.42761993408203125, 0.52783203125, 0.42149925231933594, -0.2691059112548828, 0.2123260498046875, 0.2122802734375, 0.5164222717285156, 0.15591049194335938, 0.8358078002929688, 0.7417144775390625, 0.8138618469238281, 2.8522796630859375, 0.9066390991210938, 0.8846511840820312, -0.19648361206054688, 1.154327392578125, 1.0133171081542969, 0.6892929077148438, -1.088165283203125, 0.6639404296875, -0.2348175048828125, 0.2913856506347656, 1.4706573486328125, 0.10395050048828125, 0.1542205810546875, -0.07489776611328125, 0.11542320251464844, -0.019483566284179688, -0.32601165771484375, 0.45854759216308594, -0.40936279296875, 0.5392284393310547, 0.033599853515625, -0.6554489135742188, -0.39450645446777344, 0.11566734313964844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000094.npy"}
{"epoch": 0.1421012849584278, "step": 95, "batch_size": 64, "mean": 0.3784918189048767, "std": 0.6034960746765137, "min": -1.477264404296875, "p10": -0.21324348449707028, "median": 0.3194770812988281, "p90": 1.1794719696044924, "max": 1.9903564453125, "pos_frac": 0.71875, "sample": [1.9903564453125, -0.5966796875, 0.6906280517578125, -0.19865036010742188, -0.34076690673828125, 0.03331756591796875, 0.7559814453125, 0.3997650146484375, 0.8231887817382812, 1.14068603515625, 0.20912933349609375, 0.7118988037109375, 0.043365478515625, -0.19278335571289062, -0.5034561157226562, 0.6940383911132812, 0.869964599609375, 0.453155517578125, 0.38720130920410156, 0.9838752746582031, -0.009002685546875, 1.4718017578125, -0.13232994079589844, 0.7863655090332031, -0.2194976806640625, 0.15155792236328125, 1.58990478515625, -0.08245849609375, 0.419708251953125, -1.477264404296875, 0.168548583984375, 0.33648681640625, -0.24936294555664062, 1.1960945129394531, -0.08764266967773438, 0.22655487060546875, 0.9770278930664062, 0.14421844482421875, 0.0612030029296875, 0.174041748046875, -0.176910400390625, 0.25736236572265625, 0.128509521484375, 0.4756183624267578, 0.4850616455078125, -0.10638427734375, 1.262939453125, 0.04033660888671875, 0.8826522827148438, 1.4302978515625, 0.30246734619140625, 0.599334716796875, -0.5444202423095703, -0.08365249633789062, 0.730377197265625, 1.4187545776367188, -0.18211746215820312, 0.5656204223632812, 0.8917236328125, 0.8447418212890625, 0.1310291290283203, 0.6454601287841797, 0.564208984375, -0.13970565795898438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000095.npy"}
{"epoch": 0.1436130007558579, "step": 96, "batch_size": 64, "mean": 0.4078254997730255, "std": 0.63037109375, "min": -1.3615188598632812, "p10": -0.3379833221435547, "median": 0.41835498809814453, "p90": 1.1593658447265627, "max": 1.7628402709960938, "pos_frac": 0.78125, "sample": [0.5596466064453125, -1.3615188598632812, 0.336578369140625, 0.12000274658203125, 1.6615753173828125, 0.4411163330078125, 0.788970947265625, -0.3401451110839844, 0.4473114013671875, 0.20840072631835938, 0.6089935302734375, -0.01839447021484375, 1.0150985717773438, 1.6336212158203125, 0.5928115844726562, 0.9274444580078125, -0.33293914794921875, 0.4605064392089844, 0.39559364318847656, -0.16655349731445312, 0.9293937683105469, 0.87261962890625, 0.11745452880859375, -1.2680435180664062, 0.13567352294921875, 0.038539886474609375, 1.0320968627929688, 0.485443115234375, 0.8643417358398438, 0.3845634460449219, 1.0808448791503906, 1.0878448486328125, 1.287322998046875, 0.7138748168945312, 0.07134246826171875, 0.3318634033203125, -0.45941925048828125, 0.12462425231933594, -0.052555084228515625, 0.7038841247558594, 1.1900177001953125, -0.6000289916992188, 0.6119327545166016, 0.042613983154296875, 0.951507568359375, 0.136199951171875, 1.2496356964111328, 0.2385711669921875, 1.0400390625, -0.2057647705078125, 0.751007080078125, -0.2362823486328125, 0.5420570373535156, 0.6973724365234375, 1.3562469482421875, 0.5338592529296875, 0.2886924743652344, -0.20264053344726562, 0.24185562133789062, 1.7628402709960938, 0.084442138671875, 0.21335411071777344, -0.5273056030273438, -0.5192222595214844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000096.npy"}
{"epoch": 0.14512471655328799, "step": 97, "batch_size": 64, "mean": 0.33114248514175415, "std": 0.6368626952171326, "min": -0.7954177856445312, "p10": -0.36029205322265623, "median": 0.25428104400634766, "p90": 1.1734411239624025, "max": 2.1299285888671875, "pos_frac": 0.65625, "sample": [-0.3801727294921875, 0.6640853881835938, -0.6557807922363281, 1.8196830749511719, 0.3103199005126953, 1.9720993041992188, -0.30438232421875, -0.27426910400390625, 0.09531402587890625, -0.485565185546875, -0.1329822540283203, -0.3088417053222656, 0.2703094482421875, 0.6885604858398438, 0.188690185546875, -0.7954177856445312, -0.14571571350097656, 0.8745231628417969, 0.5554122924804688, 0.25940895080566406, 0.4667816162109375, -0.32244873046875, 0.11315345764160156, -0.11270904541015625, 0.210479736328125, -0.14905548095703125, 0.08763504028320312, 0.8024826049804688, 0.4522514343261719, 0.15740966796875, 0.3067779541015625, -0.496063232421875, 1.1535568237304688, 0.3358478546142578, 0.6637191772460938, 0.0482025146484375, 1.3212013244628906, -0.005374908447265625, -0.162872314453125, 1.186614990234375, 0.10825347900390625, -0.058681488037109375, 2.1299285888671875, 0.5157852172851562, 0.9278793334960938, -0.19917678833007812, 0.27518272399902344, 0.6549530029296875, 1.959503173828125, 0.24915313720703125, -0.3765106201171875, 0.443389892578125, 0.834014892578125, -0.0975189208984375, 0.7304306030273438, 0.9254684448242188, 1.1819629669189453, 0.360321044921875, 0.1035308837890625, 0.6010208129882812, 0.5550498962402344, -0.20623016357421875, -0.2852439880371094, -0.4122161865234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000097.npy"}
{"epoch": 0.14663643235071808, "step": 98, "batch_size": 64, "mean": 0.3170931935310364, "std": 0.5286184549331665, "min": -0.5500869750976562, "p10": -0.3254034042358398, "median": 0.2739839553833008, "p90": 1.0963340759277345, "max": 1.771942138671875, "pos_frac": 0.6875, "sample": [0.5035552978515625, 0.18634796142578125, 0.5737113952636719, 0.45447540283203125, 0.44528961181640625, 0.149993896484375, 0.5191421508789062, -0.0265960693359375, 0.20366668701171875, 0.8845748901367188, 0.4875335693359375, 0.1917247772216797, 1.0969085693359375, 0.28899383544921875, 1.1976165771484375, 0.8081283569335938, -0.00811767578125, 1.2958641052246094, 0.2589740753173828, 0.07151031494140625, 0.3859367370605469, 1.4773979187011719, 0.335662841796875, -0.125946044921875, 0.13771629333496094, -0.1450786590576172, -0.0452880859375, 0.021343231201171875, -0.14420318603515625, -0.07611083984375, 0.5085372924804688, 0.3713531494140625, 0.5847015380859375, 0.7054443359375, -0.24769210815429688, -0.2449493408203125, -0.33560752868652344, 1.771942138671875, -0.020715713500976562, 0.20336151123046875, 0.47187042236328125, 0.00946044921875, -0.5037765502929688, 0.35417938232421875, -0.5500869750976562, -0.4753303527832031, 0.6812458038330078, 0.13762283325195312, -0.524078369140625, 0.9144515991210938, 0.5553321838378906, 1.3198165893554688, -0.20814895629882812, -0.5377159118652344, 1.0634765625, 0.04694366455078125, 0.3889274597167969, -0.05768775939941406, 0.4451866149902344, 1.1446990966796875, -0.3846473693847656, 1.0949935913085938, 0.507720947265625, -0.3015937805175781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000098.npy"}
{"epoch": 0.14814814814814814, "step": 99, "batch_size": 64, "mean": 0.3479306697845459, "std": 0.5749596357345581, "min": -0.9476776123046875, "p10": -0.282342529296875, "median": 0.21709060668945312, "p90": 1.242120742797852, "max": 1.654632568359375, "pos_frac": 0.78125, "sample": [1.2872390747070312, 1.3748531341552734, 0.07910919189453125, 1.654632568359375, 0.24277496337890625, 0.00738525390625, 0.07418060302734375, -0.28849029541015625, 0.4525604248046875, 1.4463272094726562, 1.32769775390625, 0.3430900573730469, 0.9571456909179688, 0.7270050048828125, 0.032649993896484375, -0.26799774169921875, 0.04375457763671875, 0.07341575622558594, 0.16346359252929688, 0.8447093963623047, 0.7105636596679688, 0.14014434814453125, 0.10759735107421875, 0.3809967041015625, -0.55987548828125, -0.083343505859375, 0.22808837890625, -0.9476776123046875, -0.2653961181640625, 1.4408683776855469, 0.08525466918945312, -0.10986328125, 0.00136566162109375, 0.20609283447265625, 0.7039375305175781, 1.0864028930664062, 0.4686546325683594, -0.5292587280273438, 0.35111045837402344, 1.1368446350097656, 0.1812572479248047, 0.660491943359375, 0.9090175628662109, 1.5358428955078125, -0.01662445068359375, 0.38472747802734375, 1.1223793029785156, -0.3142261505126953, -0.2409515380859375, 0.4632682800292969, 0.9881782531738281, 0.5105476379394531, 0.16119766235351562, 0.48018646240234375, 0.1245880126953125, 0.24457550048828125, -0.5881004333496094, -0.2322998046875, 0.32779884338378906, 0.05767059326171875, 0.6688575744628906, 0.13666152954101562, -0.5305709838867188, 0.10507583618164062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000099.npy"}
{"epoch": 0.14965986394557823, "step": 100, "batch_size": 64, "mean": 0.3216173052787781, "std": 0.7350816130638123, "min": -1.3964385986328125, "p10": -0.5645439147949218, "median": 0.19692420959472656, "p90": 1.1841377258300783, "max": 2.5517578125, "pos_frac": 0.65625, "sample": [0.68646240234375, 0.157073974609375, -0.394134521484375, 0.4542999267578125, 1.0472412109375, -0.05420684814453125, 0.18958091735839844, 0.7771244049072266, 1.5016098022460938, 0.26859283447265625, 0.82464599609375, 0.00440216064453125, 0.7833480834960938, 0.5866622924804688, -0.16330718994140625, 2.3148422241210938, -0.24536895751953125, -0.008380889892578125, 0.4017677307128906, -0.0290069580078125, 0.4484672546386719, 0.6424942016601562, -0.5268020629882812, 0.046131134033203125, -0.45468902587890625, 0.40827178955078125, -0.11329841613769531, 0.7071075439453125, -0.04397773742675781, 0.102691650390625, 0.010345458984375, -1.0839080810546875, -0.20220947265625, -0.5922889709472656, -0.34279441833496094, -0.047710418701171875, 0.212310791015625, -0.6117401123046875, -0.7925567626953125, 1.2004318237304688, -0.580718994140625, 1.2168731689453125, -0.04315948486328125, 0.060638427734375, 0.2551002502441406, 1.3896293640136719, 0.8770980834960938, 0.09811973571777344, 0.1621074676513672, -0.9008255004882812, 1.835845947265625, 0.820587158203125, 0.8252410888671875, 0.2042675018310547, 0.1631622314453125, 0.8427009582519531, -0.0570831298828125, 1.1461181640625, 0.8687210083007812, 0.686798095703125, 2.5517578125, 0.829132080078125, -1.3964385986328125, 0.6583099365234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000100.npy"}
{"epoch": 0.15117157974300832, "step": 101, "batch_size": 64, "mean": 0.48820409178733826, "std": 0.6080871820449829, "min": -0.9186439514160156, "p10": -0.2539020538330078, "median": 0.4136343002319336, "p90": 1.1841415405273439, "max": 2.062286376953125, "pos_frac": 0.828125, "sample": [0.3378410339355469, 0.044097900390625, -0.26264190673828125, 0.24990081787109375, 1.306060791015625, 1.8868179321289062, 1.0487747192382812, 0.3604450225830078, 0.0091400146484375, 0.42862701416015625, -0.015918731689453125, 0.32897186279296875, 0.170623779296875, 0.5411376953125, 1.58258056640625, 0.002956390380859375, 0.6626358032226562, 0.46207427978515625, -0.23350906372070312, -0.9186439514160156, 1.0279083251953125, -0.049327850341796875, 0.5480804443359375, 0.24625396728515625, 0.2956886291503906, 1.0816307067871094, 1.1917495727539062, 1.6119384765625, 0.31600379943847656, 1.0831165313720703, 0.6810455322265625, 0.819488525390625, 0.5853710174560547, -0.026103973388671875, -0.7759170532226562, -0.7791748046875, 0.35698699951171875, 0.5673904418945312, 0.3366870880126953, -0.29474639892578125, -0.283905029296875, 0.9426727294921875, 0.4653663635253906, 0.49176788330078125, 2.062286376953125, 0.53570556640625, 0.34769439697265625, 0.1513671875, 0.14910316467285156, 0.1995258331298828, 1.1663894653320312, 0.39864158630371094, 0.9093093872070312, 0.8940696716308594, 0.20548248291015625, 0.47039794921875, 0.32036590576171875, 0.7393341064453125, -0.3062591552734375, 0.3165092468261719, 0.6984405517578125, 0.821136474609375, 2.0284194946289062, 0.7051982879638672], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000101.npy"}
{"epoch": 0.15268329554043839, "step": 102, "batch_size": 64, "mean": 0.3097189664840698, "std": 0.6946539878845215, "min": -1.36444091796875, "p10": -0.5417060852050781, "median": 0.27504730224609375, "p90": 1.0950576782226569, "max": 2.1849517822265625, "pos_frac": 0.75, "sample": [0.9271392822265625, -0.3146820068359375, 0.15343666076660156, 0.6430091857910156, -1.321746826171875, 0.5250034332275391, 0.0875396728515625, 0.7095603942871094, -0.14473724365234375, -0.6145210266113281, 1.2814712524414062, 0.35655975341796875, 0.27591705322265625, 0.06857490539550781, 0.3806915283203125, 0.27417755126953125, -0.551422119140625, 2.009063720703125, -0.5190353393554688, 0.6120147705078125, -0.18819427490234375, 0.2155475616455078, -0.02869415283203125, 0.362335205078125, 0.7337150573730469, 0.5776443481445312, 0.8501510620117188, 0.76092529296875, -0.8712158203125, 0.05535888671875, 0.90570068359375, 1.5376930236816406, -0.3837432861328125, 0.8856468200683594, 0.02892303466796875, -0.456146240234375, 0.45229339599609375, 1.167022705078125, 2.1849517822265625, -0.3389320373535156, 0.0538330078125, 1.4991912841796875, -0.91058349609375, 0.508880615234375, 0.17111968994140625, 0.24309539794921875, 0.4693756103515625, 0.07513046264648438, 0.18091583251953125, 0.6141586303710938, 0.3563365936279297, 0.8407363891601562, 0.21616363525390625, -0.7932376861572266, 0.14126968383789062, -0.08323287963867188, 0.023487091064453125, 1.5537109375, 0.2974853515625, -1.36444091796875, 0.7453460693359375, 0.7674942016601562, 0.7554588317871094, 0.17132186889648438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000102.npy"}
{"epoch": 0.15419501133786848, "step": 103, "batch_size": 64, "mean": 0.30156272649765015, "std": 0.657216489315033, "min": -1.1313095092773438, "p10": -0.45784454345703124, "median": 0.21212387084960938, "p90": 1.1538787841796876, "max": 2.1854934692382812, "pos_frac": 0.640625, "sample": [-0.08322906494140625, 1.1395606994628906, 1.0684967041015625, -1.1313095092773438, 0.8593311309814453, 0.18901824951171875, 0.5902786254882812, 0.2837390899658203, -0.0482330322265625, 0.3122215270996094, -0.22445297241210938, 0.445159912109375, -0.35698699951171875, 0.05744171142578125, -0.311492919921875, 0.7917442321777344, 0.7601222991943359, 0.9720954895019531, -0.57379150390625, 1.0187263488769531, 0.086669921875, -0.5496177673339844, 0.03286933898925781, -0.003292083740234375, 0.5331687927246094, -0.3614463806152344, -0.6475257873535156, 1.479583740234375, -0.43878936767578125, 0.5848674774169922, 0.08182525634765625, 0.6776351928710938, 1.357452392578125, 0.17901039123535156, 0.093994140625, 0.2352294921875, -0.25531768798828125, 0.496337890625, -0.23532867431640625, -0.047466278076171875, 1.1600151062011719, -0.19578933715820312, 0.28293418884277344, 1.1736984252929688, 0.7300567626953125, 0.056976318359375, 0.7538566589355469, 0.3228874206542969, 1.2692699432373047, -0.22188377380371094, 1.927093505859375, 0.45729827880859375, -0.6494598388671875, -0.022342681884765625, -0.4154510498046875, -0.46601104736328125, 0.3160400390625, -0.7996711730957031, -0.10314369201660156, 0.7068328857421875, 0.17193603515625, 0.7123908996582031, 2.1854934692382812, 0.8886871337890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000103.npy"}
{"epoch": 0.15570672713529857, "step": 104, "batch_size": 64, "mean": 0.23145365715026855, "std": 0.6443175077438354, "min": -1.2103958129882812, "p10": -0.4597517013549804, "median": 0.18278884887695312, "p90": 1.0361251831054692, "max": 1.7322235107421875, "pos_frac": 0.640625, "sample": [0.3177337646484375, 0.7556915283203125, 0.4745330810546875, 0.5063686370849609, -0.9547195434570312, 0.114044189453125, 0.2180652618408203, 0.3065948486328125, 0.3226470947265625, -0.4005126953125, 0.6504669189453125, 0.03568267822265625, 1.2626953125, 0.7464447021484375, -0.6404495239257812, -0.3379974365234375, 0.875244140625, -0.11363792419433594, -0.28704071044921875, -0.05716705322265625, -0.3751373291015625, -0.5961761474609375, 1.08856201171875, -0.1789398193359375, -0.570037841796875, 0.029462814331054688, 0.20952224731445312, -0.96673583984375, 1.7322235107421875, -0.3862018585205078, 0.25377655029296875, -0.4851398468017578, -0.3913555145263672, -0.23729896545410156, -0.0951385498046875, 0.6977825164794922, 0.24724578857421875, -0.28768348693847656, 1.5701522827148438, 0.20085906982421875, 0.3828887939453125, 0.00379180908203125, 1.678192138671875, -0.1423931121826172, 0.023395538330078125, 0.1647186279296875, 0.88946533203125, 0.006389617919921875, -1.2103958129882812, 0.003662109375, 0.7851009368896484, -0.012542724609375, 0.6560020446777344, 0.8586540222167969, 0.1438446044921875, 0.8042526245117188, 0.2594261169433594, 0.317901611328125, 0.33594512939453125, -0.32489013671875, -0.05286407470703125, 1.3673782348632812, 1.7069091796875, 0.9137725830078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000104.npy"}
{"epoch": 0.15721844293272866, "step": 105, "batch_size": 64, "mean": 0.3727165460586548, "std": 0.659478485584259, "min": -1.6348419189453125, "p10": -0.19124374389648435, "median": 0.35895729064941406, "p90": 1.1290878295898437, "max": 2.0029449462890625, "pos_frac": 0.765625, "sample": [0.3875312805175781, 0.929595947265625, -0.3366546630859375, 0.37599945068359375, 0.20287132263183594, 0.5550899505615234, -0.911163330078125, 1.665374755859375, 0.0062713623046875, -0.015130996704101562, -0.1560516357421875, 0.6079750061035156, 1.0711097717285156, 1.1292572021484375, 0.4463005065917969, 1.3096771240234375, 0.3230266571044922, -1.4682693481445312, 0.1365528106689453, 0.9590644836425781, -0.0329742431640625, -0.0633392333984375, 0.4223747253417969, -0.17281341552734375, 0.22678375244140625, 0.40948486328125, 1.0842781066894531, 1.128692626953125, 0.6372146606445312, 0.171661376953125, -0.01373291015625, 0.6623077392578125, 0.0442352294921875, 0.308380126953125, 0.572113037109375, 0.8811492919921875, 0.4085044860839844, 0.0637054443359375, 0.32635498046875, 0.7008800506591797, 0.17182540893554688, 0.24917984008789062, 0.09227752685546875, 1.11199951171875, 2.0029449462890625, 1.1591377258300781, -1.6348419189453125, 0.3419151306152344, -0.1176605224609375, 0.1629486083984375, 0.3264617919921875, 0.17067337036132812, 0.6276988983154297, 1.3668212890625, 0.6749267578125, -0.1991424560546875, 1.638875961303711, 0.550628662109375, -0.1997833251953125, 0.4911956787109375, -1.2224960327148438, 0.6941375732421875, 0.4303436279296875, -0.0198974609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000105.npy"}
{"epoch": 0.15873015873015872, "step": 106, "batch_size": 64, "mean": 0.27549615502357483, "std": 0.6000708341598511, "min": -1.619964599609375, "p10": -0.30638160705566403, "median": 0.2872581481933594, "p90": 1.0099170684814454, "max": 1.673431396484375, "pos_frac": 0.671875, "sample": [0.00396728515625, 0.64227294921875, 0.6185302734375, 0.1642303466796875, 0.27745819091796875, -0.07827377319335938, 1.673431396484375, 0.6743869781494141, -0.14988327026367188, 0.15274810791015625, 0.38405609130859375, 0.4153633117675781, 1.00177001953125, -0.12818145751953125, -0.3125419616699219, -0.1417388916015625, 0.6336212158203125, -0.08645248413085938, 0.06226348876953125, -0.423858642578125, -0.21637344360351562, -0.18708038330078125, 0.078826904296875, -0.02176666259765625, -0.0301055908203125, -0.5108184814453125, -0.46765708923339844, -0.514556884765625, 0.2773323059082031, 1.6326904296875, -0.10675811767578125, 0.061023712158203125, -0.20488739013671875, 1.0134086608886719, 0.652069091796875, 0.6090240478515625, 0.7416419982910156, -0.2761859893798828, 1.0817832946777344, 0.29705810546875, 0.4393272399902344, 0.7077178955078125, 1.3775463104248047, 0.3909587860107422, -0.2920074462890625, 0.587432861328125, 0.3337574005126953, 0.023563385009765625, -1.619964599609375, 0.112335205078125, 0.32244873046875, 0.732696533203125, 0.8525390625, 1.3060035705566406, 0.493988037109375, 0.3388824462890625, 0.14548110961914062, 1.3282337188720703, -0.27252960205078125, 0.6427001953125, -1.2952728271484375, 0.3871746063232422, 0.5849761962890625, 0.7119274139404297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000106.npy"}
{"epoch": 0.1602418745275888, "step": 107, "batch_size": 64, "mean": 0.19795984029769897, "std": 0.7202760577201843, "min": -1.8435440063476562, "p10": -0.4961791992187499, "median": 0.1568603515625, "p90": 1.0505537033081054, "max": 2.295806884765625, "pos_frac": 0.671875, "sample": [0.014142990112304688, -1.8435440063476562, -0.090179443359375, 2.285991668701172, 0.8121795654296875, 0.540863037109375, -0.845977783203125, 0.9507598876953125, -0.23627471923828125, 0.6243972778320312, -1.4127655029296875, 0.5186843872070312, 0.22721481323242188, 0.4929351806640625, 0.05400848388671875, -0.38523292541503906, 0.41846466064453125, -0.2371673583984375, 0.08260536193847656, -0.2664031982421875, -0.348358154296875, 2.295806884765625, 1.3865203857421875, 0.20573806762695312, 1.1169166564941406, -1.314178466796875, 1.0520877838134766, -0.23976898193359375, -0.5240249633789062, -0.10174560546875, 0.4263954162597656, -0.2288055419921875, 0.01639556884765625, 0.3156394958496094, 0.041461944580078125, 0.236968994140625, 1.0305862426757812, 0.15423583984375, 0.20467376708984375, 0.5314407348632812, 1.5112457275390625, 1.2518692016601562, 0.32863616943359375, -0.26453399658203125, 0.2723808288574219, 0.24022674560546875, 0.20012283325195312, 0.109375, -0.146728515625, -0.6016159057617188, 0.8714370727539062, 0.18421173095703125, 0.025205612182617188, -0.1779956817626953, -0.32411766052246094, 0.33261871337890625, 0.15948486328125, 0.1204681396484375, 0.03979682922363281, -0.565216064453125, -0.43120574951171875, 0.40301513671875, 0.12108612060546875, 1.0469741821289062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000107.npy"}
{"epoch": 0.1617535903250189, "step": 108, "batch_size": 64, "mean": 0.38168424367904663, "std": 0.6766772866249084, "min": -1.406219482421875, "p10": -0.38959598541259766, "median": 0.2365093231201172, "p90": 1.2469305038452148, "max": 2.0320587158203125, "pos_frac": 0.671875, "sample": [1.031005859375, 0.9496307373046875, 0.8416748046875, 0.9643478393554688, -0.2438182830810547, 1.3989486694335938, 0.8064651489257812, 0.3645915985107422, 1.1362457275390625, -0.244232177734375, -0.0947113037109375, 1.6147079467773438, 0.20084381103515625, -0.45661354064941406, 0.2381439208984375, 0.2991790771484375, -0.21759796142578125, 0.013675689697265625, 0.5712928771972656, 0.54815673828125, 1.5076904296875, 0.07722091674804688, -0.3981761932373047, 0.034942626953125, 1.0036544799804688, -0.3280181884765625, 0.9429359436035156, -0.2000732421875, 2.0320587158203125, 0.7491302490234375, 0.155975341796875, 0.14758682250976562, 0.5830535888671875, -0.14123153686523438, 0.23891448974609375, 1.8388137817382812, -0.11161994934082031, 0.23487472534179688, -1.406219482421875, 1.2396011352539062, -0.36957550048828125, 1.1458511352539062, 0.1103668212890625, -0.0268707275390625, -0.00287628173828125, 0.2897529602050781, -0.22925376892089844, 1.2473087310791016, -0.40805816650390625, -0.5043258666992188, -0.5139312744140625, 0.1087493896484375, 0.56768798828125, 0.99365234375, -0.12477874755859375, 0.6399688720703125, 0.6875686645507812, 1.2763671875, 0.19962692260742188, 1.2460479736328125, -0.27691078186035156, 0.93743896484375, -0.5867996215820312, 0.0977325439453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000108.npy"}
{"epoch": 0.16326530612244897, "step": 109, "batch_size": 64, "mean": 0.292807400226593, "std": 0.6282463073730469, "min": -1.024383544921875, "p10": -0.5229379653930664, "median": 0.2689857482910156, "p90": 1.0501079559326174, "max": 2.023162841796875, "pos_frac": 0.703125, "sample": [0.7382736206054688, 0.5813446044921875, 1.10784912109375, -0.10806655883789062, 0.30673789978027344, 0.19135093688964844, 1.0697517395019531, 0.6918258666992188, 0.3619232177734375, 0.7116203308105469, 0.2173614501953125, 0.35245513916015625, 1.16644287109375, 0.9084854125976562, -0.71038818359375, -0.363494873046875, 0.6399269104003906, 0.11856269836425781, 0.3083457946777344, 0.5521583557128906, 0.6332664489746094, 0.9769821166992188, -0.08770751953125, 0.060047149658203125, 0.1090240478515625, 1.9412994384765625, -0.5593948364257812, 0.2999134063720703, -0.5862522125244141, 2.023162841796875, 0.020368576049804688, 0.442230224609375, -0.19722366333007812, 0.3702354431152344, 0.1912822723388672, 0.18580245971679688, -0.43787193298339844, -1.024383544921875, 0.2028656005859375, 0.6237220764160156, -0.1260833740234375, -0.1292438507080078, -0.3115425109863281, -0.916595458984375, 0.20090866088867188, -0.23372650146484375, 0.29224395751953125, 0.31760597229003906, -0.43743896484375, -0.7115325927734375, 0.019626617431640625, -0.2346649169921875, -0.0539398193359375, 1.5970230102539062, 0.7856941223144531, 1.0042724609375, 1.5063018798828125, 0.6480941772460938, 0.2059478759765625, 0.7070159912109375, 0.29547882080078125, -0.593902587890625, 0.632568359375, 0.2457275390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000109.npy"}
{"epoch": 0.16477702191987906, "step": 110, "batch_size": 64, "mean": 0.24601292610168457, "std": 0.6973888278007507, "min": -1.5277252197265625, "p10": -0.5207000732421874, "median": 0.2303791046142578, "p90": 1.1548019409179693, "max": 2.7044677734375, "pos_frac": 0.65625, "sample": [0.5343189239501953, 0.2851142883300781, -1.5277252197265625, 0.15131759643554688, 0.28296661376953125, -0.258148193359375, 0.5264663696289062, 1.3077888488769531, 0.83319091796875, 1.4577178955078125, 0.8510704040527344, -0.15871047973632812, 0.0132598876953125, 1.0171966552734375, -0.03844451904296875, -0.7676467895507812, -0.018802642822265625, 1.4519500732421875, -0.06805992126464844, -0.9631328582763672, -0.11179161071777344, 0.7914199829101562, 0.3153533935546875, 0.01409912109375, -0.0390625, 0.02978515625, 0.310455322265625, 0.22893905639648438, -0.3099212646484375, -0.21697998046875, 0.2323150634765625, 0.5301055908203125, 0.648834228515625, -0.3905181884765625, -0.06504440307617188, 0.0972137451171875, -0.340423583984375, 2.7044677734375, -0.28266143798828125, 0.4001502990722656, 0.6361083984375, -0.2832183837890625, 0.04821014404296875, 0.21762847900390625, 0.5210990905761719, 0.23181915283203125, 0.33113861083984375, -0.8816604614257812, 0.69134521484375, -1.0555648803710938, 0.5921173095703125, 0.14188385009765625, 0.3117828369140625, -0.5764923095703125, 0.327423095703125, 1.47479248046875, 0.5391654968261719, 1.213775634765625, 0.2719993591308594, 1.7568817138671875, -0.006591796875, 0.1307220458984375, 0.481109619140625, -0.829071044921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000110.npy"}
{"epoch": 0.16628873771730915, "step": 111, "batch_size": 64, "mean": 0.2409285306930542, "std": 0.5866271257400513, "min": -1.249542236328125, "p10": -0.4981330871582031, "median": 0.2937917709350586, "p90": 0.9501716613769532, "max": 1.6070175170898438, "pos_frac": 0.671875, "sample": [-0.08066368103027344, -0.2969322204589844, -0.14759063720703125, 0.849609375, 0.6674575805664062, 0.15341949462890625, -1.249542236328125, 0.37998199462890625, 0.6318244934082031, 0.273345947265625, 0.48905181884765625, 0.8329849243164062, -0.46689605712890625, 0.32239532470703125, 0.13195037841796875, 0.6628646850585938, 0.10095977783203125, 0.3624076843261719, -0.34960174560546875, 1.1268672943115234, -0.21941375732421875, -0.4185791015625, -0.00611114501953125, 1.1175727844238281, 0.5303802490234375, 0.8155460357666016, 0.2119140625, 1.058929443359375, 0.10247230529785156, 0.7997589111328125, 0.12985610961914062, -0.9325199127197266, -1.042633056640625, -0.3078765869140625, 0.30687713623046875, 0.41561126708984375, 0.7698402404785156, 0.7770843505859375, -0.14748382568359375, 0.878753662109375, 0.9602813720703125, 0.5191650390625, 0.9265823364257812, -0.5115203857421875, 0.28070640563964844, 0.10903549194335938, 0.14748191833496094, 1.10809326171875, -0.5713653564453125, 0.5147438049316406, 1.6070175170898438, 0.9908218383789062, 0.4275360107421875, -0.20691871643066406, -1.0996551513671875, 0.3571052551269531, -0.553131103515625, -0.06231689453125, 0.0298614501953125, 0.5406379699707031, 0.56488037109375, 0.339874267578125, -0.19133377075195312, -0.04203033447265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000111.npy"}
{"epoch": 0.16780045351473924, "step": 112, "batch_size": 64, "mean": 0.3179827332496643, "std": 0.7054463624954224, "min": -1.3549423217773438, "p10": -0.540778350830078, "median": 0.26416015625, "p90": 1.2151363372802735, "max": 1.857696533203125, "pos_frac": 0.671875, "sample": [0.09039306640625, 1.8199615478515625, 1.2189483642578125, 0.6045036315917969, -0.21109580993652344, 0.576141357421875, 1.857696533203125, 0.3118324279785156, 1.6482391357421875, 0.9116363525390625, -0.092559814453125, -0.8634033203125, -0.40396881103515625, 0.7544097900390625, -0.1330718994140625, -0.1289825439453125, 0.65594482421875, 0.12970733642578125, -1.3549423217773438, -0.20211410522460938, 0.33165740966796875, -0.391845703125, 0.9510993957519531, 0.8700103759765625, -0.5994110107421875, 0.4836311340332031, 0.21929931640625, 0.9812774658203125, 0.24407958984375, 0.0138702392578125, 0.910308837890625, 0.02873992919921875, -0.18479537963867188, 0.48300743103027344, 0.5371170043945312, 1.5462646484375, -0.12740516662597656, 0.04692840576171875, 1.3211746215820312, -0.02419281005859375, 0.028045654296875, 0.28424072265625, 0.5220222473144531, 0.22942543029785156, -1.1471328735351562, 0.974090576171875, 0.5847320556640625, 0.7351341247558594, 0.12947845458984375, -0.2653923034667969, -1.1773414611816406, 1.2062416076660156, -0.14820098876953125, 0.7105941772460938, 0.8588409423828125, 0.76971435546875, -0.704254150390625, -0.21070480346679688, 0.8358688354492188, 1.326324462890625, 0.9115943908691406, -0.6448822021484375, 0.05228424072265625, -0.3399200439453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000112.npy"}
{"epoch": 0.1693121693121693, "step": 113, "batch_size": 64, "mean": 0.2545267343521118, "std": 0.6518250703811646, "min": -1.2592315673828125, "p10": -0.4982032775878906, "median": 0.24757766723632812, "p90": 1.2043254852294927, "max": 1.7338104248046875, "pos_frac": 0.65625, "sample": [-0.745635986328125, -0.02405548095703125, -0.3126373291015625, 0.1422557830810547, -0.40970611572265625, 1.2865409851074219, 0.2145843505859375, 0.6485595703125, 0.6433792114257812, 1.2718639373779297, 0.02039337158203125, 0.22524261474609375, 0.024169921875, 1.00555419921875, -0.5476264953613281, 0.2936553955078125, 0.2785320281982422, 0.8685760498046875, -0.3587188720703125, -0.8758087158203125, 0.173980712890625, 0.04245758056640625, 0.6990814208984375, -1.11370849609375, -0.5086631774902344, -0.1493377685546875, 1.0730514526367188, 0.4529094696044922, -0.041103363037109375, -0.01813507080078125, 0.8972206115722656, 0.33392333984375, -0.17994308471679688, -0.28115081787109375, 0.99688720703125, 1.2605857849121094, -1.2592315673828125, 0.6209716796875, -0.411376953125, 0.3484344482421875, 0.2973766326904297, 1.5688419342041016, 0.599609375, 1.7338104248046875, -0.4737968444824219, -0.388092041015625, 0.19380569458007812, 0.06977462768554688, -0.7594852447509766, 0.7409191131591797, -0.019622802734375, 0.35189056396484375, 0.3200397491455078, 0.2699127197265625, 1.5805511474609375, -0.281097412109375, 0.6785125732421875, -0.19302940368652344, 0.66204833984375, 0.4609527587890625, 0.40964508056640625, 0.3084564208984375, 0.11225509643554688, 1.4604606628417969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000113.npy"}
{"epoch": 0.1708238851095994, "step": 114, "batch_size": 64, "mean": 0.5088605880737305, "std": 0.6990227699279785, "min": -1.0643692016601562, "p10": -0.3953788757324218, "median": 0.4885978698730469, "p90": 1.3851940155029299, "max": 2.42205810546875, "pos_frac": 0.796875, "sample": [-0.414520263671875, 0.9052200317382812, 0.3110771179199219, 0.19844436645507812, 0.5641899108886719, -0.31290435791015625, 0.8072128295898438, -0.4587860107421875, 0.458343505859375, -0.35071563720703125, 0.14662742614746094, 1.2748527526855469, 0.7806053161621094, 0.9136962890625, 0.443145751953125, -1.0643692016601562, 0.667938232421875, 0.9033565521240234, -0.17688941955566406, 0.6506423950195312, 0.5188140869140625, 1.4172821044921875, 1.7136154174804688, 0.31122779846191406, 0.49343109130859375, 0.74029541015625, 0.13144302368164062, 1.1735248565673828, -0.6472854614257812, 0.608489990234375, 0.44830894470214844, 0.4921760559082031, 1.2395286560058594, 0.5626373291015625, 1.2285499572753906, 0.2380828857421875, 0.8878402709960938, 1.0387344360351562, 1.540802001953125, -0.4449882507324219, 0.170257568359375, 0.9473838806152344, 0.6854248046875, 0.07109642028808594, 0.26454925537109375, -0.2946205139160156, 1.399566650390625, 0.4850196838378906, 0.019687652587890625, -0.1182403564453125, -0.559783935546875, 2.42205810546875, 0.6263713836669922, 1.141510009765625, 1.3516578674316406, 2.15411376953125, 0.29939842224121094, 0.043674468994140625, 0.416778564453125, -0.6819686889648438, 1.82928466796875, -0.3437347412109375, 0.020751953125, 0.2771644592285156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000114.npy"}
{"epoch": 0.17233560090702948, "step": 115, "batch_size": 64, "mean": 0.27014434337615967, "std": 0.6306560039520264, "min": -1.0684394836425781, "p10": -0.49326019287109374, "median": 0.29395294189453125, "p90": 0.9165439605712892, "max": 2.541961669921875, "pos_frac": 0.703125, "sample": [0.51092529296875, 0.24262237548828125, 0.37918853759765625, -0.6917266845703125, 0.659271240234375, 0.3590087890625, 0.1002197265625, 0.7752685546875, 0.5468597412109375, 2.541961669921875, 0.1365337371826172, -0.055816650390625, 2.1539459228515625, -0.29541015625, 0.295196533203125, 0.3545989990234375, -0.08505821228027344, -0.8534698486328125, 0.0561676025390625, 0.11507415771484375, 1.009796142578125, 0.20993804931640625, -0.5058212280273438, -0.46395111083984375, -0.6504383087158203, 0.3904304504394531, -0.8553619384765625, 0.7619514465332031, 0.2927093505859375, 0.8796806335449219, 0.41497802734375, 0.5086765289306641, 0.1756439208984375, -0.107452392578125, 0.02832794189453125, 0.5102767944335938, -0.21198272705078125, 0.4092845916748047, 0.247100830078125, 0.7194976806640625, 1.2315254211425781, -1.0684394836425781, 1.0021438598632812, -0.037872314453125, 0.932342529296875, 0.6244869232177734, 0.581573486328125, 0.4747810363769531, 0.10977935791015625, -0.652252197265625, -0.1609477996826172, 0.10483169555664062, 1.3565902709960938, 0.024797439575195312, -0.44780540466308594, -0.026365280151367188, 0.37804412841796875, -0.3052406311035156, -0.3497314453125, 0.32273101806640625, 0.6631622314453125, 0.6422786712646484, 0.33803367614746094, 0.542144775390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000115.npy"}
{"epoch": 0.17384731670445955, "step": 116, "batch_size": 64, "mean": 0.37526440620422363, "std": 0.6735005974769592, "min": -1.1293716430664062, "p10": -0.30665893554687496, "median": 0.17713069915771484, "p90": 1.0903160095214843, "max": 2.0172653198242188, "pos_frac": 0.6875, "sample": [0.9137496948242188, -0.40883636474609375, 0.6445960998535156, 0.8736343383789062, 1.0385971069335938, -1.1293716430664062, 0.12162017822265625, 0.10120964050292969, 0.5609149932861328, 0.10784912109375, 0.9387741088867188, 0.019855499267578125, 0.5142135620117188, 1.08563232421875, -0.07315826416015625, 0.1717967987060547, 0.07599449157714844, 0.037448883056640625, 1.0435791015625, -0.4958534240722656, -0.9715461730957031, 0.7879409790039062, 0.4498329162597656, -0.08172988891601562, -0.22792625427246094, 1.3407096862792969, -0.17110443115234375, 1.7427024841308594, 0.182464599609375, 0.452056884765625, 0.0803985595703125, 0.026264190673828125, -0.03503608703613281, 0.0571136474609375, 1.347808837890625, 0.5852890014648438, 0.7184982299804688, 2.0070953369140625, -0.049800872802734375, -0.07688140869140625, 0.051361083984375, -0.075286865234375, 1.0923233032226562, 0.02557373046875, 0.66717529296875, 0.9890556335449219, 1.7046279907226562, 0.47756195068359375, 0.7151718139648438, 0.9221954345703125, -0.20081329345703125, 0.8335647583007812, -0.17061614990234375, -0.31451416015625, 0.4957008361816406, -1.0202865600585938, -0.047328948974609375, 0.867706298828125, -0.25453758239746094, 0.7919464111328125, 2.0172653198242188, -0.43283653259277344, 0.8638458251953125, -0.288330078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000116.npy"}
{"epoch": 0.17535903250188964, "step": 117, "batch_size": 64, "mean": 0.28429079055786133, "std": 0.6262572407722473, "min": -1.2623138427734375, "p10": -0.42131385803222654, "median": 0.2798805236816406, "p90": 0.9952117919921876, "max": 1.6947250366210938, "pos_frac": 0.671875, "sample": [0.40853118896484375, 0.605926513671875, 0.2482757568359375, 0.35289764404296875, -0.1857452392578125, 0.5817375183105469, 0.4253959655761719, 0.9807891845703125, 0.8474006652832031, 0.5960750579833984, -0.15679931640625, -0.593414306640625, 1.0439453125, -0.087799072265625, 0.05391693115234375, 0.7879104614257812, 0.212860107421875, 0.7688217163085938, 0.8249359130859375, 0.8296699523925781, -0.42151641845703125, 0.09197998046875, 0.21746826171875, 1.404510498046875, -0.07749366760253906, 0.0506439208984375, -1.0828094482421875, 0.2583160400390625, -0.3683891296386719, 1.6947250366210938, 0.1963043212890625, 1.182952880859375, 0.8474655151367188, -0.4208412170410156, 0.973663330078125, 0.32511138916015625, 0.4926490783691406, 0.9996566772460938, 0.33455657958984375, 0.9257583618164062, 0.5131149291992188, -0.17823028564453125, 0.19051361083984375, 0.6939544677734375, -0.08588409423828125, -1.2623138427734375, 0.26621246337890625, -0.19267654418945312, 0.9848403930664062, -0.29457855224609375, -0.2397918701171875, 0.6289138793945312, 1.2195205688476562, -0.5870285034179688, 0.293548583984375, -1.0805511474609375, -0.29900550842285156, -0.3235969543457031, -0.7099456787109375, 0.36159515380859375, 1.5030021667480469, 0.7550735473632812, 0.0266265869140625, -0.15874671936035156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000117.npy"}
{"epoch": 0.17687074829931973, "step": 118, "batch_size": 64, "mean": 0.4084276854991913, "std": 0.5618333220481873, "min": -0.6974334716796875, "p10": -0.2942668914794922, "median": 0.4041261672973633, "p90": 1.0807769775390625, "max": 1.5815849304199219, "pos_frac": 0.71875, "sample": [1.0088272094726562, 0.7881431579589844, 1.072509765625, 1.0089569091796875, -0.5688457489013672, 0.9344635009765625, 0.37633514404296875, 0.05213165283203125, 0.6772804260253906, -0.20998764038085938, -0.07455825805664062, 0.7859344482421875, 0.2645149230957031, 1.0493354797363281, 0.21352195739746094, -0.29465484619140625, -0.6974334716796875, 0.44940948486328125, -0.2933616638183594, 0.8482265472412109, 0.12895965576171875, -0.11199951171875, 0.11981582641601562, 1.2462310791015625, 0.5125961303710938, 0.46441650390625, -0.180816650390625, -0.31018829345703125, 0.4319171905517578, 0.123779296875, 0.7098388671875, 0.6104412078857422, 1.0070877075195312, -0.02753448486328125, -0.28993988037109375, 0.631744384765625, 0.08465576171875, 1.48773193359375, 1.230224609375, -0.3096160888671875, -0.31064796447753906, 0.9078369140625, -0.0599822998046875, 0.9295425415039062, 0.33000946044921875, 0.29982757568359375, -0.22702789306640625, 0.6562042236328125, 1.494049072265625, 0.15601348876953125, 0.2322845458984375, 0.7629756927490234, -0.0363616943359375, 1.084320068359375, 1.5815849304199219, 0.46527671813964844, -0.6881256103515625, 0.9246883392333984, 1.2373676300048828, 0.8539295196533203, 0.5819664001464844, -0.08307647705078125, 0.049785614013671875, 0.04683685302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000118.npy"}
{"epoch": 0.17838246409674982, "step": 119, "batch_size": 64, "mean": 0.35482484102249146, "std": 0.6968916654586792, "min": -1.1127738952636719, "p10": -0.5376379013061523, "median": 0.2938423156738281, "p90": 1.2644760131835937, "max": 1.996795654296875, "pos_frac": 0.671875, "sample": [0.5803298950195312, 0.7473831176757812, -0.8582839965820312, 1.0480499267578125, 0.21993255615234375, -0.1899261474609375, 0.7587089538574219, 0.120452880859375, -0.8614921569824219, 0.4117622375488281, 0.0390472412109375, 0.2986907958984375, 0.824554443359375, 0.3932647705078125, 1.0052986145019531, -0.221588134765625, 0.27993011474609375, -0.6611213684082031, -0.020883560180664062, -0.0937652587890625, -0.37911224365234375, 1.2712326049804688, 0.73779296875, 0.5156803131103516, 1.1736602783203125, -0.6023101806640625, -0.07275772094726562, 0.8871231079101562, 0.8062744140625, 1.48687744140625, 0.46453094482421875, -1.1127738952636719, 0.5858402252197266, 0.5578269958496094, 0.1436309814453125, 1.80999755859375, 0.225128173828125, 0.6637420654296875, 0.21204757690429688, 1.2487106323242188, 1.327850341796875, 0.39907073974609375, -0.29388427734375, 0.10718536376953125, -0.21564483642578125, -0.5773334503173828, 1.6063690185546875, 0.5715789794921875, 0.8407669067382812, 0.5364303588867188, 1.996795654296875, -0.5875701904296875, -0.44501495361328125, -0.07774162292480469, 0.28899383544921875, -0.2566261291503906, 0.5807723999023438, 1.941253662109375, 1.1701278686523438, -0.4317779541015625, 0.20310020446777344, 0.07263946533203125, -0.11851882934570312, -0.3735198974609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000119.npy"}
{"epoch": 0.17989417989417988, "step": 120, "batch_size": 64, "mean": 0.44925418496131897, "std": 0.7151040434837341, "min": -0.74871826171875, "p10": -0.3302530288696289, "median": 0.43616199493408203, "p90": 1.3656826019287125, "max": 2.754364013671875, "pos_frac": 0.703125, "sample": [-0.74871826171875, 0.5007572174072266, -0.2670402526855469, 0.8396072387695312, 0.70477294921875, 0.08173370361328125, 0.6037216186523438, 0.8532829284667969, 0.96905517578125, -0.26659393310546875, 0.78155517578125, -0.12630081176757812, -0.27669715881347656, 0.2671356201171875, 0.23842620849609375, 2.5233154296875, 0.47415924072265625, -0.01848602294921875, -0.05886077880859375, 0.978424072265625, 1.7276992797851562, 0.6178436279296875, 0.19686126708984375, 0.44443321228027344, -0.1815032958984375, 0.1689929962158203, 0.521759033203125, 0.9279403686523438, 0.31121826171875, 0.5931549072265625, 1.6339645385742188, -0.3476409912109375, 0.6211166381835938, 0.23816680908203125, 0.4278907775878906, 0.6808128356933594, 0.7180023193359375, -0.2454853057861328, 1.5316505432128906, 0.6653594970703125, -0.29895591735839844, 0.14903640747070312, 0.0686187744140625, 2.754364013671875, -0.14253616333007812, 0.9454421997070312, -0.6023635864257812, 0.42559814453125, 1.6356353759765625, 0.834136962890625, 0.49515533447265625, -0.5942115783691406, 0.7594528198242188, 0.5608673095703125, 0.9604682922363281, -0.447265625, 0.0309906005859375, 1.9762725830078125, -0.09249114990234375, -0.144317626953125, -0.7074737548828125, -0.34366607666015625, 0.8650493621826172, 0.3589744567871094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000120.npy"}
{"epoch": 0.18140589569160998, "step": 121, "batch_size": 64, "mean": 0.3151443600654602, "std": 0.6403861045837402, "min": -1.00408935546875, "p10": -0.4500293731689453, "median": 0.3307361602783203, "p90": 1.113055419921875, "max": 1.945281982421875, "pos_frac": 0.65625, "sample": [1.945281982421875, 0.5321044921875, -0.22750091552734375, -0.25960350036621094, 1.1629905700683594, 1.147552490234375, 1.1710700988769531, -0.1790943145751953, 0.49083709716796875, -1.00408935546875, 0.9120330810546875, 0.6673431396484375, 1.798919677734375, -0.14065170288085938, 0.26153564453125, -0.22598648071289062, 0.9133243560791016, -0.8652114868164062, 0.46678924560546875, -0.7215347290039062, -0.2434844970703125, 0.20770645141601562, 1.0045700073242188, 0.302642822265625, 0.6996307373046875, -0.065399169921875, 0.34452056884765625, -0.5235805511474609, 0.4742279052734375, 0.6086845397949219, 0.14429664611816406, 1.07275390625, 1.1139068603515625, 0.0692291259765625, -0.2677154541015625, -0.5664443969726562, 0.3222198486328125, 1.1110687255859375, 0.89453125, 0.43737030029296875, -0.14911651611328125, 0.3456096649169922, 0.7382354736328125, 0.8182373046875, -0.1451568603515625, 0.672454833984375, -0.7686882019042969, -0.44940185546875, 1.8351593017578125, 0.6178035736083984, -0.38532447814941406, -0.2714195251464844, 0.4658241271972656, 0.5499515533447266, 0.10111618041992188, 0.477294921875, 0.7250556945800781, 0.046421051025390625, -0.06932258605957031, -0.4502983093261719, -0.15361785888671875, 0.3392524719238281, 0.26263427734375, 0.029689788818359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000121.npy"}
{"epoch": 0.18291761148904007, "step": 122, "batch_size": 64, "mean": 0.5331156849861145, "std": 0.7177004218101501, "min": -1.7834625244140625, "p10": -0.28407821655273435, "median": 0.5331621170043945, "p90": 1.5667049407958986, "max": 2.2747039794921875, "pos_frac": 0.796875, "sample": [2.2747039794921875, -0.474517822265625, 0.371795654296875, 1.0323104858398438, -1.7834625244140625, -0.23915863037109375, 0.860382080078125, 0.8685684204101562, -0.14566421508789062, 0.1661224365234375, 0.7862682342529297, -0.3033294677734375, 0.05529022216796875, 0.1546478271484375, -0.3937549591064453, 0.44573974609375, 0.26397705078125, 1.654052734375, -0.01355743408203125, 0.7084465026855469, 0.3595104217529297, 2.1322154998779297, 0.5484123229980469, 0.9856758117675781, 1.8239479064941406, 0.2361297607421875, 0.5179119110107422, 0.12316131591796875, 0.2111968994140625, 0.7541351318359375, 0.50701904296875, 1.8303909301757812, -0.015699386596679688, 0.6939239501953125, -1.1363754272460938, 0.4727668762207031, 0.4120922088623047, 0.390350341796875, 1.1904315948486328, 0.787353515625, -0.04570198059082031, 0.601776123046875, -0.4501762390136719, 1.6564483642578125, 0.6340560913085938, 1.2296810150146484, 0.5666179656982422, 0.6467819213867188, 0.8356513977050781, -0.4453277587890625, 0.22924423217773438, 1.1346378326416016, 0.17227554321289062, 0.2393646240234375, -0.07141876220703125, 0.7104969024658203, 0.5585060119628906, 0.7417449951171875, 0.7900161743164062, 0.9096107482910156, 1.5006446838378906, 1.1193485260009766, 0.146697998046875, 1.5950164794921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000122.npy"}
{"epoch": 0.18442932728647016, "step": 123, "batch_size": 64, "mean": 0.3271239697933197, "std": 0.8048520088195801, "min": -2.262786865234375, "p10": -0.6248899459838866, "median": 0.3688373565673828, "p90": 1.3263519287109378, "max": 2.529449462890625, "pos_frac": 0.71875, "sample": [1.2805404663085938, 0.41436767578125, 0.096160888671875, -1.048126220703125, -0.6619396209716797, 0.280975341796875, -0.5384407043457031, -0.3703765869140625, 1.2364273071289062, 0.7923927307128906, 0.1800994873046875, 1.592529296875, -0.22203826904296875, -1.0876312255859375, 1.5770645141601562, 1.0262985229492188, 0.41600799560546875, 0.77325439453125, -0.9485206604003906, 0.4691314697265625, 1.2167587280273438, 1.3546142578125, 0.7055091857910156, 0.44800758361816406, 1.3459854125976562, 0.07483673095703125, 0.5857200622558594, -0.17711639404296875, 0.470367431640625, 0.3800239562988281, 0.14212417602539062, 1.6092987060546875, -1.3022441864013672, 0.5159416198730469, -0.26755523681640625, -0.6817474365234375, 1.6428070068359375, 0.4567241668701172, -0.09568405151367188, 0.5795993804931641, -0.47771453857421875, 0.656005859375, 0.6764926910400391, 0.6017303466796875, 0.08795166015625, 0.29151153564453125, -0.08257675170898438, 0.11498451232910156, -0.17447662353515625, 0.7728443145751953, 1.2265472412109375, 0.059665679931640625, -0.5119552612304688, 0.9852066040039062, -2.262786865234375, 1.026214599609375, 0.04052162170410156, 0.45697784423828125, -0.31351470947265625, 0.3576507568359375, 0.0655975341796875, 0.2942237854003906, 2.529449462890625, 0.25323486328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000123.npy"}
{"epoch": 0.18594104308390022, "step": 124, "batch_size": 64, "mean": 0.5234500169754028, "std": 0.6274797916412354, "min": -0.6174354553222656, "p10": -0.26434116363525384, "median": 0.49846649169921875, "p90": 1.3046123504638671, "max": 2.1504440307617188, "pos_frac": 0.796875, "sample": [-0.2809104919433594, 1.087860107421875, 1.3261528015136719, -0.5991401672363281, 0.5670070648193359, 0.5965194702148438, 0.777374267578125, 0.4799613952636719, 0.3368988037109375, 1.218170166015625, 0.8156280517578125, 1.0891227722167969, 0.16788101196289062, 1.9773712158203125, 0.3032817840576172, 0.10591506958007812, 0.6736068725585938, 0.9901008605957031, -0.20124435424804688, -0.5926532745361328, 1.85491943359375, 0.5543441772460938, 1.3080787658691406, 0.11933135986328125, -0.6174354553222656, 0.8128242492675781, -0.011209487915039062, 0.0807647705078125, 2.0020370483398438, -0.15103530883789062, 0.7859954833984375, 0.152557373046875, 0.8347930908203125, 1.1729583740234375, 0.18795394897460938, 0.2799224853515625, 1.2965240478515625, -0.3380584716796875, 2.1504440307617188, 0.3533172607421875, 0.5641937255859375, 1.315521240234375, -0.3151702880859375, 0.8493804931640625, 0.7293720245361328, 0.24414825439453125, 0.3955802917480469, -0.0738372802734375, -0.2256793975830078, 1.2629146575927734, 0.8366775512695312, 0.5293121337890625, 0.10873031616210938, -0.09050750732421875, 0.7026824951171875, 0.18438720703125, 0.4434394836425781, 0.11551666259765625, 0.5169715881347656, 0.5673141479492188, -0.3491020202636719, 0.36873626708984375, 0.37117767333984375, 0.7811126708984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000124.npy"}
{"epoch": 0.1874527588813303, "step": 125, "batch_size": 64, "mean": 0.5122197270393372, "std": 0.7481626272201538, "min": -0.9948348999023438, "p10": -0.46997127532958977, "median": 0.5014820098876953, "p90": 1.499740219116211, "max": 2.53851318359375, "pos_frac": 0.75, "sample": [1.95068359375, 1.0927886962890625, 0.89544677734375, 0.0565032958984375, 0.5491790771484375, -0.6303577423095703, 2.53851318359375, 0.404571533203125, 1.04705810546875, 0.8283863067626953, 2.029449462890625, 0.5779647827148438, -0.28089141845703125, -0.08393096923828125, 0.034976959228515625, 1.0825233459472656, 1.631011962890625, 0.5126190185546875, 0.35106658935546875, 0.6565837860107422, 1.5131721496582031, 0.3147850036621094, 1.0615768432617188, 0.0428466796875, 0.4950675964355469, 0.8244609832763672, 1.3017501831054688, -0.41469764709472656, -0.25800323486328125, 0.7408294677734375, 1.1031646728515625, -0.1319427490234375, 0.08930206298828125, 2.13739013671875, 1.0540237426757812, -0.24115753173828125, 0.6303615570068359, 0.4701805114746094, 0.7192611694335938, 1.4683990478515625, 1.084808349609375, -0.5877761840820312, 0.5078964233398438, 0.5984764099121094, 0.7379112243652344, -0.9948348999023438, 0.8292922973632812, 1.2721366882324219, 0.21291160583496094, -0.021497726440429688, -0.7292938232421875, 0.45151519775390625, 0.6962051391601562, 0.4711112976074219, -0.49365997314453125, 0.3822174072265625, 0.14311599731445312, -0.13294219970703125, 1.5190544128417969, -0.22963905334472656, 0.2368450164794922, -0.61920166015625, -0.7972793579101562, 0.07977294921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000125.npy"}
{"epoch": 0.1889644746787604, "step": 126, "batch_size": 64, "mean": 0.5251647233963013, "std": 0.8771575689315796, "min": -1.207662582397461, "p10": -0.3174577713012695, "median": 0.3556709289550781, "p90": 1.4394554138183595, "max": 4.4692230224609375, "pos_frac": 0.765625, "sample": [0.2826805114746094, 0.15697288513183594, 1.1725959777832031, 0.3759880065917969, -0.3375415802001953, 1.6392326354980469, 1.3720245361328125, 0.10746002197265625, 0.5110092163085938, -0.30299949645996094, 0.7678794860839844, 1.8190765380859375, -0.362823486328125, 0.4603424072265625, 2.1011886596679688, 0.14405059814453125, 0.3677253723144531, -0.09986495971679688, 0.044300079345703125, 0.07018280029296875, 0.557830810546875, 1.351043701171875, -0.04058074951171875, 0.4350118637084961, 0.04140472412109375, -0.21511077880859375, 0.150054931640625, -0.45748138427734375, 1.3011703491210938, -0.28267669677734375, 0.09168243408203125, 1.2214202880859375, -0.7181167602539062, 0.8955955505371094, 0.37465858459472656, -0.3236541748046875, 0.09320259094238281, 4.4692230224609375, 0.7018795013427734, -0.9993133544921875, 1.383544921875, 1.3709793090820312, 0.2776451110839844, 0.3763313293457031, -0.3001747131347656, 1.4634170532226562, 0.0899810791015625, 1.794403076171875, 0.98675537109375, 1.0813751220703125, 1.2811965942382812, 0.9101486206054688, 0.02283477783203125, 0.3436164855957031, 0.255767822265625, -0.127197265625, 0.596282958984375, 1.1574211120605469, 0.6222305297851562, 0.2906951904296875, 0.026214599609375, 2.1312408447265625, -1.207662582397461, -0.153228759765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000126.npy"}
{"epoch": 0.19047619047619047, "step": 127, "batch_size": 64, "mean": 0.4483017027378082, "std": 0.6767863631248474, "min": -1.00103759765625, "p10": -0.4542064666748047, "median": 0.4710044860839844, "p90": 1.4526735305786134, "max": 1.8970184326171875, "pos_frac": 0.75, "sample": [-0.34267425537109375, 0.966644287109375, -0.16506195068359375, 0.6405143737792969, 0.6182174682617188, 0.938140869140625, 0.25154876708984375, 1.4727916717529297, -0.2361602783203125, 0.5889739990234375, 0.6787605285644531, -0.6368789672851562, 0.6825828552246094, 1.844278335571289, 1.477142333984375, 0.6868095397949219, 0.45673370361328125, 0.5281715393066406, -0.716766357421875, 0.9319992065429688, 0.1725616455078125, -1.00103759765625, 0.518768310546875, 1.7420883178710938, 0.17694091796875, -0.372039794921875, 0.9953460693359375, 0.21437835693359375, -0.7971649169921875, 0.535675048828125, 0.2961387634277344, 0.38520050048828125, 0.06461143493652344, 0.8784637451171875, 0.19161415100097656, 0.9919395446777344, 0.4908733367919922, 1.405731201171875, 1.0881500244140625, 1.4793472290039062, 0.482879638671875, 0.9696807861328125, -0.5004692077636719, -0.5304241180419922, 0.8323211669921875, 0.2476043701171875, 0.2336578369140625, 0.07295989990234375, 1.8970184326171875, 0.5121002197265625, 0.8069992065429688, 1.64654541015625, 0.3347511291503906, -0.4340362548828125, -0.023746490478515625, -0.46225738525390625, 1.14593505859375, -0.25860595703125, -0.0477294921875, 1.1333503723144531, 0.28627967834472656, 0.199432373046875, 0.45912933349609375, -0.4354209899902344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000127.npy"}
{"epoch": 0.19198790627362056, "step": 128, "batch_size": 64, "mean": 0.45753246545791626, "std": 0.8523670434951782, "min": -1.7295112609863281, "p10": -0.4664943695068359, "median": 0.48433685302734375, "p90": 1.438298797607422, "max": 2.92919921875, "pos_frac": 0.703125, "sample": [2.925567626953125, 0.4943695068359375, 0.0059680938720703125, 1.4940452575683594, 0.5134391784667969, 0.6422500610351562, 0.77655029296875, 0.289764404296875, 0.5633926391601562, 1.448944091796875, 0.59796142578125, -0.934661865234375, -0.00048828125, -0.18317604064941406, 0.5682392120361328, -0.10765838623046875, -0.14689064025878906, -0.36124420166015625, -0.5810546875, 0.8652496337890625, 1.4126129150390625, -0.4511909484863281, -0.81787109375, 2.92919921875, 1.6771392822265625, 0.8651123046875, -0.26958465576171875, 1.4134597778320312, -0.12810707092285156, 0.6446151733398438, 0.2520751953125, 0.35882568359375, 0.231201171875, 1.0947265625, 0.5265426635742188, 0.7020416259765625, 0.05187225341796875, 1.5493793487548828, -0.5358600616455078, -0.473052978515625, -0.23642730712890625, 0.5676155090332031, 0.5428428649902344, 0.21575927734375, 0.18982505798339844, -0.7444305419921875, 0.6781806945800781, 0.8131942749023438, 0.6295166015625, -0.33658790588378906, 0.9365386962890625, -0.13991546630859375, 2.89532470703125, 0.2215576171875, 0.9086456298828125, 0.37647056579589844, 0.20703125, 0.9816474914550781, 1.3080596923828125, -0.07336044311523438, -1.7295112609863281, 0.04021453857421875, 0.6518783569335938, 0.47430419921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000128.npy"}
{"epoch": 0.19349962207105065, "step": 129, "batch_size": 64, "mean": 0.3193177878856659, "std": 0.700340986251831, "min": -1.95538330078125, "p10": -0.354718017578125, "median": 0.2317028045654297, "p90": 1.1919378280639654, "max": 1.9443359375, "pos_frac": 0.6875, "sample": [-0.3351287841796875, -0.2884998321533203, 0.6061973571777344, -0.7185287475585938, 0.08388900756835938, 1.623931884765625, -0.704010009765625, -0.2690753936767578, -0.3631134033203125, 0.6419830322265625, 1.0418548583984375, 0.5329608917236328, 0.825714111328125, -0.099151611328125, -1.0904464721679688, 0.0589447021484375, 0.9298324584960938, 0.09135055541992188, 1.02996826171875, 0.3637046813964844, 0.9716949462890625, 0.16943359375, 0.150543212890625, -0.27234649658203125, -0.2621269226074219, 1.9443359375, 0.9531230926513672, 0.2342681884765625, 0.5272178649902344, 0.8475341796875, 0.5539741516113281, 0.7891464233398438, -0.01442718505859375, 1.247842788696289, 0.027256011962890625, 0.3563385009765625, -0.0973358154296875, -0.2498931884765625, 0.09893798828125, 1.4180774688720703, 0.755615234375, -0.286651611328125, 0.162933349609375, 0.2145519256591797, 0.22913742065429688, 0.5196304321289062, -0.055572509765625, 1.6632461547851562, 0.44126129150390625, 1.3084564208984375, 0.5017890930175781, 0.41039276123046875, 0.9988555908203125, -0.21062660217285156, 0.18512916564941406, 0.08138084411621094, 1.4290809631347656, -1.0020599365234375, -1.95538330078125, -0.402801513671875, 0.3717365264892578, 1.061492919921875, -0.1361236572265625, 0.7948951721191406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000129.npy"}
{"epoch": 0.19501133786848074, "step": 130, "batch_size": 64, "mean": 0.39817655086517334, "std": 0.6557602882385254, "min": -0.8083343505859375, "p10": -0.4824932098388672, "median": 0.2675132751464844, "p90": 1.189196014404297, "max": 2.084369659423828, "pos_frac": 0.703125, "sample": [0.8102455139160156, 0.014766693115234375, 1.4570999145507812, 0.274200439453125, 1.6099166870117188, 0.581146240234375, 0.8913116455078125, 1.0512466430664062, 0.49064064025878906, -0.4875526428222656, -0.4706878662109375, 0.9583282470703125, 0.1454925537109375, -0.04099845886230469, 0.06938934326171875, 0.8954544067382812, 0.6365623474121094, -0.29744720458984375, 0.22528076171875, 0.7826385498046875, 0.9672012329101562, 0.877655029296875, 1.1129608154296875, 0.6740341186523438, -0.16155242919921875, -0.2939796447753906, 0.26082611083984375, 0.225006103515625, -0.6027297973632812, 0.1790313720703125, 0.7636566162109375, 0.7343826293945312, 0.227386474609375, -0.15627288818359375, 0.1137847900390625, 2.084369659423828, 1.2046661376953125, 0.10415267944335938, 1.3708648681640625, 0.18752670288085938, -0.047115325927734375, 1.0091590881347656, 0.201019287109375, -0.27291107177734375, 0.6983680725097656, 0.5422534942626953, -0.040142059326171875, -0.7520065307617188, 1.862060546875, -0.01598358154296875, -0.5311336517333984, 0.3537712097167969, -0.8083343505859375, 0.2547798156738281, -0.515289306640625, 0.7846717834472656, 1.1530990600585938, -0.013729095458984375, -0.7192001342773438, -0.08880615234375, 1.7638092041015625, 0.3084087371826172, 0.5084095001220703, 0.37813568115234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000130.npy"}
{"epoch": 0.1965230536659108, "step": 131, "batch_size": 64, "mean": 0.4692104160785675, "std": 0.7207480072975159, "min": -1.06878662109375, "p10": -0.2857522964477539, "median": 0.42041015625, "p90": 1.4763845443725587, "max": 2.207916259765625, "pos_frac": 0.671875, "sample": [-0.13837051391601562, 0.5512599945068359, -0.15576171875, 0.18100738525390625, -0.19477081298828125, 1.7004776000976562, 0.4162464141845703, 1.0063495635986328, -0.26801109313964844, 1.2316436767578125, 0.8181228637695312, 0.1998157501220703, -0.3070106506347656, -0.6916656494140625, -0.04718780517578125, 0.5516834259033203, -0.6751861572265625, 0.96661376953125, 1.4457015991210938, 0.27056884765625, -0.2916145324707031, -0.2033843994140625, 0.7714691162109375, 0.66729736328125, -0.4859619140625, 0.7507171630859375, 0.8052215576171875, 0.01490020751953125, 0.48137664794921875, -0.1500091552734375, 0.21584320068359375, 0.5643157958984375, 1.180511474609375, 0.9741287231445312, 0.3187713623046875, 0.5538482666015625, 0.2791175842285156, 2.207916259765625, 0.2680320739746094, 1.9013900756835938, 0.10895538330078125, 1.8031234741210938, -0.27207374572753906, 1.3314132690429688, 0.3662986755371094, -0.19029998779296875, 0.4245738983154297, 1.3383865356445312, -0.25922393798828125, 1.051849365234375, -0.2579193115234375, -0.2999305725097656, 0.6119213104248047, -0.15607070922851562, -1.06878662109375, 0.5270004272460938, -0.16070938110351562, 0.5655593872070312, 1.9087677001953125, -0.08195877075195312, 0.8409576416015625, 1.4895343780517578, 1.8389339447021484, 0.8837509155273438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000131.npy"}
{"epoch": 0.1980347694633409, "step": 132, "batch_size": 64, "mean": 0.3912178874015808, "std": 0.828413188457489, "min": -2.5086669921875, "p10": -0.4922760009765625, "median": 0.4046974182128906, "p90": 1.3146533966064455, "max": 2.934825897216797, "pos_frac": 0.734375, "sample": [0.8747940063476562, -0.014432907104492188, 0.7385787963867188, 1.6275444030761719, 0.11438179016113281, -0.4961395263671875, 1.0364761352539062, -0.14586448669433594, -0.40673828125, 1.0986480712890625, 1.3395271301269531, 1.2485237121582031, 0.7301826477050781, 0.4086456298828125, -1.1052322387695312, -0.4832611083984375, 0.3713111877441406, 0.5973968505859375, 0.2474365234375, -0.021392822265625, 0.09889984130859375, -2.5086669921875, 0.6825103759765625, 0.183319091796875, -0.010372161865234375, -1.1533203125, 0.284881591796875, 1.2566146850585938, 1.0181865692138672, 0.14006423950195312, 0.41121673583984375, 0.2888469696044922, -1.2009296417236328, 0.7469940185546875, -0.13862228393554688, 0.6612548828125, 1.4683990478515625, 0.9732856750488281, 0.5897464752197266, 0.40074920654296875, -0.6201438903808594, 2.934825897216797, 0.2770233154296875, 0.866912841796875, 1.0671710968017578, 0.46857452392578125, 0.27301788330078125, 0.3099365234375, 0.5500450134277344, 0.5676364898681641, 1.377828598022461, -0.37255859375, 1.9532623291015625, 1.0305404663085938, 1.3575420379638672, 0.5928192138671875, 0.41351318359375, -0.37370872497558594, 0.3575592041015625, 0.9538497924804688, -0.25006103515625, 0.0425872802734375, 0.37844085693359375, -1.072113037109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000132.npy"}
{"epoch": 0.19954648526077098, "step": 133, "batch_size": 64, "mean": 0.36285945773124695, "std": 0.7754817605018616, "min": -1.7417755126953125, "p10": -0.4691909790039062, "median": 0.30767250061035156, "p90": 1.2664764404296875, "max": 1.9250335693359375, "pos_frac": 0.640625, "sample": [0.7665023803710938, 0.052032470703125, -0.0522613525390625, 1.0808944702148438, 0.2532787322998047, 0.7293167114257812, 0.2934150695800781, 1.2675933837890625, -0.21526527404785156, -0.6440200805664062, -0.11630630493164062, 1.7522430419921875, 1.0098018646240234, 0.09373855590820312, -0.4396629333496094, 0.153564453125, 0.9925765991210938, -1.64630126953125, 0.8417854309082031, -0.369903564453125, -0.3669281005859375, -0.3924102783203125, -0.2976646423339844, 1.1506576538085938, 0.9852886199951172, -0.37490081787109375, 0.8706512451171875, -0.1624755859375, 0.161346435546875, -0.22448158264160156, 0.6650619506835938, 0.321929931640625, -1.7417755126953125, 0.7574558258056641, -0.24958038330078125, 0.7349319458007812, 0.20811843872070312, 0.7301864624023438, 0.9391403198242188, 0.9465484619140625, 0.789337158203125, 0.8826828002929688, 1.2638702392578125, -0.7576789855957031, -0.057781219482421875, 1.788238525390625, 0.5374755859375, 1.142669677734375, 0.493682861328125, 1.8972549438476562, 0.34947967529296875, 0.13142013549804688, -0.07545661926269531, 1.51458740234375, 0.406646728515625, -0.20004653930664062, 1.1952285766601562, 1.312570571899414, 0.18378448486328125, -0.33971595764160156, 1.9250335693359375, -0.5503997802734375, -0.5921554565429688, -0.4818458557128906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000133.npy"}
{"epoch": 0.20105820105820105, "step": 134, "batch_size": 64, "mean": 0.5747714042663574, "std": 0.7321699857711792, "min": -1.194478988647461, "p10": -0.24492912292480468, "median": 0.5294723510742188, "p90": 1.4064495086669924, "max": 3.2221221923828125, "pos_frac": 0.765625, "sample": [0.1651611328125, 0.5187530517578125, 0.4520378112792969, 0.6715927124023438, 0.19736862182617188, 0.3176078796386719, -0.000194549560546875, 1.7420120239257812, 0.9368667602539062, -0.1240692138671875, 0.46612548828125, 0.540191650390625, 1.0967330932617188, 0.8952369689941406, 0.17868614196777344, 0.1669464111328125, 0.7993011474609375, 1.0630264282226562, -0.24121475219726562, 0.6632614135742188, 0.9734573364257812, 0.5001602172851562, 0.9676780700683594, 0.42591094970703125, 0.4778289794921875, 1.5450172424316406, 1.2468948364257812, -0.034374237060546875, -0.1680126190185547, 0.8452072143554688, -0.6980857849121094, 0.3752613067626953, 1.1281585693359375, 0.5592117309570312, -0.06532669067382812, -1.194478988647461, 1.422882080078125, 0.7350120544433594, 1.31365966796875, 0.6224117279052734, 1.944488525390625, 0.9099845886230469, -0.5913009643554688, -0.6262359619140625, 1.3681068420410156, -0.3723907470703125, 1.0485382080078125, -0.24652099609375, 0.718902587890625, 0.4086437225341797, -0.4101982116699219, 1.6513671875, 0.14043426513671875, 0.9837493896484375, 3.2221221923828125, 0.4418525695800781, -0.17658233642578125, 0.623870849609375, 0.2988128662109375, 0.7033119201660156, 1.025146484375, 2.0603866577148438, 0.259429931640625, -0.08445358276367188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000134.npy"}
{"epoch": 0.20256991685563114, "step": 135, "batch_size": 64, "mean": 0.31128865480422974, "std": 0.6821062564849854, "min": -1.1162109375, "p10": -0.46430225372314454, "median": 0.2983837127685547, "p90": 1.2891128540039063, "max": 2.077911376953125, "pos_frac": 0.640625, "sample": [0.6461029052734375, -0.4700889587402344, 1.2843170166015625, 0.10531234741210938, 0.32377052307128906, -0.39777374267578125, -0.23174285888671875, -0.914947509765625, 0.2729969024658203, 2.077911376953125, 0.4987640380859375, -0.003692626953125, -0.06105804443359375, 0.2424182891845703, -0.17171478271484375, -0.5414161682128906, 1.1902313232421875, 0.9618759155273438, -0.05051612854003906, 0.4129810333251953, 0.20449066162109375, -0.3079833984375, -0.45079994201660156, -0.331298828125, 0.6430587768554688, 1.6297569274902344, 0.60968017578125, -0.5531387329101562, 0.0908966064453125, 0.3432502746582031, 0.1653900146484375, 0.5895843505859375, 1.3689422607421875, 0.12214851379394531, 1.291168212890625, 0.5267715454101562, 0.3836212158203125, 0.5180892944335938, 1.0622787475585938, -1.1162109375, -0.031169891357421875, 1.6808929443359375, 0.07239723205566406, 0.2342529296875, -0.2017822265625, 0.49072265625, -0.907684326171875, -1.0361347198486328, 0.47363853454589844, 0.591278076171875, 0.8438186645507812, -0.38177490234375, 0.5838470458984375, 0.7138328552246094, 1.4153060913085938, 0.6650962829589844, -0.10745811462402344, 0.5997352600097656, -0.04681587219238281, 0.3774070739746094, -0.331634521484375, 0.5538673400878906, 1.7139625549316406, -0.00655364990234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000135.npy"}
{"epoch": 0.20408163265306123, "step": 136, "batch_size": 64, "mean": 0.3623313307762146, "std": 0.9706945419311523, "min": -1.3369483947753906, "p10": -0.7859771728515624, "median": 0.3713846206665039, "p90": 1.6090522766113284, "max": 3.838470458984375, "pos_frac": 0.640625, "sample": [2.88372802734375, 1.342763900756836, -1.1882648468017578, -0.7167854309082031, 0.0456390380859375, 0.5523357391357422, 0.45836639404296875, 0.1337432861328125, -0.14910125732421875, 0.5160369873046875, 0.47571563720703125, -0.4579925537109375, -0.027751922607421875, 0.39399147033691406, -0.35283660888671875, 0.5179061889648438, 1.6728057861328125, 0.58209228515625, 0.45438385009765625, -0.4005126953125, 0.811126708984375, 0.12739944458007812, 0.9331436157226562, 1.6327438354492188, -0.8127059936523438, 0.8270339965820312, 1.55377197265625, 0.9595737457275391, 0.726226806640625, 2.17681884765625, 0.30096435546875, 0.12836456298828125, 0.2042236328125, 1.0932846069335938, -0.26849365234375, -1.221435546875, 0.34877777099609375, 0.5251808166503906, 0.1452484130859375, 0.2602386474609375, -1.3369483947753906, 0.8420257568359375, 0.67633056640625, 0.5748062133789062, -0.3901176452636719, 1.426034927368164, -0.14252471923828125, 0.4315338134765625, -1.000030517578125, 3.838470458984375, 0.4379730224609375, -1.0120773315429688, -0.7236099243164062, -0.089569091796875, -0.3958473205566406, -0.51092529296875, 0.8215255737304688, -0.3203887939453125, 1.8454341888427734, 1.9905681610107422, 0.701995849609375, -0.09356689453125, -0.3703880310058594, -1.199249267578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000136.npy"}
{"epoch": 0.20559334845049132, "step": 137, "batch_size": 64, "mean": 0.45677661895751953, "std": 0.7848842144012451, "min": -1.240325927734375, "p10": -0.4342552185058593, "median": 0.3259124755859375, "p90": 1.5483394622802735, "max": 2.2136306762695312, "pos_frac": 0.71875, "sample": [-0.25002288818359375, -1.240325927734375, 0.4061622619628906, 1.4178390502929688, 0.8711700439453125, 1.8739700317382812, 1.3084373474121094, 1.73333740234375, -0.3281135559082031, 0.9792938232421875, 0.9204463958740234, 1.2119522094726562, 1.857696533203125, 1.6838531494140625, 1.0042190551757812, 1.553131103515625, 0.318511962890625, 1.1376800537109375, -0.1981353759765625, 0.08154487609863281, -0.3803234100341797, 0.5040740966796875, 0.58648681640625, -0.8245162963867188, 0.17792510986328125, 0.061279296875, 1.5371589660644531, 2.1872482299804688, -0.4573688507080078, -0.2477264404296875, 0.04979705810546875, -0.0553131103515625, 0.19557571411132812, 0.03240966796875, 2.2136306762695312, 0.12265968322753906, -0.042682647705078125, -0.8499069213867188, 0.3903007507324219, -0.35341644287109375, -0.7988853454589844, 0.039669036865234375, 0.33331298828125, 0.525787353515625, 1.0312004089355469, -0.07979202270507812, 0.0638885498046875, 1.1420307159423828, -0.0364837646484375, 1.2569427490234375, 0.2296924591064453, 0.16263198852539062, 0.28040122985839844, -0.4728355407714844, 0.8393039703369141, 0.8772106170654297, 0.5374565124511719, -0.09561920166015625, -0.9129486083984375, 0.6209030151367188, 0.10826873779296875, 0.7651023864746094, 0.3834228515625, 1.24310302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000137.npy"}
{"epoch": 0.20710506424792138, "step": 138, "batch_size": 64, "mean": 0.3412603735923767, "std": 0.7413797378540039, "min": -1.7987060546875, "p10": -0.5090545654296875, "median": 0.41749000549316406, "p90": 1.323129272460938, "max": 2.260986328125, "pos_frac": 0.71875, "sample": [0.07218551635742188, 1.3793563842773438, 0.9226322174072266, 1.4412727355957031, 0.4724388122558594, 0.2692394256591797, 0.9287033081054688, 1.1019668579101562, 0.1543426513671875, 0.019407272338867188, 0.17786026000976562, 0.0831298828125, 1.50048828125, 0.724578857421875, 2.260986328125, -0.744415283203125, -0.3588218688964844, 0.6349067687988281, 0.1278076171875, 0.4458961486816406, -0.3642425537109375, 0.9386825561523438, 0.395477294921875, 0.8340492248535156, 0.4334259033203125, 0.55145263671875, 0.48557281494140625, 1.0446014404296875, 0.4643707275390625, -0.025852203369140625, 0.8305540084838867, -0.1304492950439453, 1.4655036926269531, -0.9501495361328125, -0.23860549926757812, 0.7476692199707031, -0.5091552734375, -1.7987060546875, 0.1411457061767578, 1.1842460632324219, 0.01457977294921875, 1.0435867309570312, 0.5510215759277344, 0.433868408203125, 0.17763137817382812, -1.5663070678710938, 1.5056095123291016, 0.5161457061767578, 0.47968292236328125, 0.61883544921875, 0.434844970703125, 0.15262603759765625, 1.5264205932617188, -0.508819580078125, -0.9313812255859375, -0.12525177001953125, -0.43882179260253906, 0.4015541076660156, -0.07453727722167969, -0.158966064453125, 1.1919326782226562, -0.03809356689453125, 0.04303741455078125, -0.5220870971679688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000138.npy"}
{"epoch": 0.20861678004535147, "step": 139, "batch_size": 64, "mean": 0.515658974647522, "std": 0.7811711430549622, "min": -1.1227188110351562, "p10": -0.37766113281249997, "median": 0.4308757781982422, "p90": 1.6012012481689455, "max": 2.313140869140625, "pos_frac": 0.703125, "sample": [0.6045646667480469, 1.4681568145751953, -0.25399017333984375, -0.7837600708007812, -0.3941154479980469, 1.6116828918457031, -0.234130859375, 2.313140869140625, 0.3473243713378906, 0.1258087158203125, 0.28876495361328125, 1.8709430694580078, 1.283639907836914, -0.3365898132324219, 1.8820953369140625, 1.5767440795898438, 1.6858291625976562, 0.12372398376464844, 0.0670013427734375, 1.0503482818603516, -0.19085693359375, -0.493927001953125, 0.0233612060546875, 0.5608482360839844, 0.6843643188476562, 0.6926307678222656, 0.4076690673828125, 0.0014801025390625, 0.9455795288085938, 1.53399658203125, 0.4985198974609375, 0.863311767578125, 0.46443939208984375, 0.79803466796875, -0.20597076416015625, 0.9622802734375, -0.029804229736328125, 0.5014724731445312, 1.37188720703125, 0.09762191772460938, 1.044168472290039, 0.33905792236328125, 1.425485610961914, 0.08907890319824219, -0.1485004425048828, -0.3392677307128906, -0.1864013671875, 0.316558837890625, 1.6975250244140625, -0.06505203247070312, 2.160980224609375, -0.1744537353515625, 0.45302581787109375, -0.43255615234375, -1.1227188110351562, -0.2623939514160156, -0.3941307067871094, 0.490203857421875, -0.7129039764404297, 1.0064239501953125, 1.253631591796875, 1.310699462890625, 0.4087257385253906, 1.0608673095703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000139.npy"}
{"epoch": 0.21012849584278157, "step": 140, "batch_size": 64, "mean": 0.649819552898407, "std": 0.9740663766860962, "min": -2.33270263671875, "p10": -0.6393243789672851, "median": 0.7047653198242188, "p90": 1.723345184326172, "max": 2.369537353515625, "pos_frac": 0.78125, "sample": [2.278564453125, 0.2337646484375, 0.11500930786132812, -0.370941162109375, 0.6800460815429688, 0.9120635986328125, -0.6473007202148438, -1.892333984375, 1.7395248413085938, 1.1377944946289062, -2.33270263671875, 0.892242431640625, 0.7160873413085938, 1.63482666015625, 0.8600921630859375, 0.6626358032226562, -0.6431941986083984, 0.9160919189453125, -0.427490234375, 1.3172607421875, 0.6934432983398438, 1.262664794921875, 0.9513969421386719, 1.2294387817382812, 0.08933258056640625, 0.59979248046875, 0.5127620697021484, 2.233675003051758, 0.43376922607421875, -0.7763214111328125, 0.007171630859375, -0.41243743896484375, 1.1929645538330078, 0.36815643310546875, -0.3583850860595703, 0.5732803344726562, 2.1464786529541016, 1.4454498291015625, 1.6855926513671875, 1.5218963623046875, 1.3941192626953125, 1.661041259765625, 1.6659317016601562, 1.2394962310791016, 0.4447479248046875, -0.6834049224853516, 1.3656539916992188, 0.9605178833007812, 1.7510452270507812, -0.14008331298828125, -1.6039772033691406, 0.7353515625, 0.5347137451171875, 0.6154518127441406, 2.369537353515625, 0.4229240417480469, 1.921051025390625, 0.6438827514648438, 0.9629058837890625, 0.9043731689453125, 0.6563072204589844, 1.4678764343261719, -0.2528800964355469, -0.6302947998046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000140.npy"}
{"epoch": 0.21164021164021163, "step": 141, "batch_size": 64, "mean": 0.4977456331253052, "std": 0.9037750363349915, "min": -1.44720458984375, "p10": -0.38619995117187494, "median": 0.37320995330810547, "p90": 1.5501590728759769, "max": 3.292938232421875, "pos_frac": 0.609375, "sample": [-1.44720458984375, -0.01604461669921875, 1.3955841064453125, -0.2714385986328125, 0.24451446533203125, -0.10599517822265625, 0.8705940246582031, 1.1970443725585938, 0.02691650390625, -0.7257575988769531, 1.13189697265625, 0.5382957458496094, 1.334280014038086, -0.2313709259033203, 1.3306159973144531, 0.4829864501953125, 2.1978912353515625, -0.14078330993652344, -0.30874061584472656, -0.71356201171875, -0.116302490234375, 0.3453197479248047, -0.3990631103515625, 1.0844841003417969, 0.8127365112304688, 0.7638702392578125, 0.3017730712890625, 0.6668853759765625, 0.036468505859375, 0.167755126953125, -0.31375885009765625, -1.0695953369140625, 0.5022735595703125, 0.40110015869140625, 0.5504951477050781, -0.3474864959716797, 0.9111747741699219, 0.0341949462890625, 2.2804222106933594, 1.5736846923828125, 1.8883590698242188, 1.105245590209961, 1.1512870788574219, 1.10430908203125, 0.8335800170898438, -0.2061767578125, -0.1750011444091797, 1.4648895263671875, 2.0201950073242188, 2.017181396484375, -0.5805206298828125, -0.09690284729003906, -0.038845062255859375, 1.34246826171875, 1.0547027587890625, 3.292938232421875, -0.18490219116210938, 1.4952659606933594, -0.6761245727539062, 1.0018234252929688, -0.3561859130859375, -0.07721900939941406, -0.19439697265625, -0.30640411376953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000141.npy"}
{"epoch": 0.21315192743764172, "step": 142, "batch_size": 64, "mean": 0.45612233877182007, "std": 0.8043399453163147, "min": -1.2833843231201172, "p10": -0.4273483276367187, "median": 0.43277931213378906, "p90": 1.2314300537109375, "max": 2.8287200927734375, "pos_frac": 0.6875, "sample": [0.098602294921875, -0.581146240234375, 0.5301914215087891, 1.230255126953125, 1.0698394775390625, 0.9711761474609375, -0.029815673828125, 0.7240333557128906, 0.68389892578125, -0.27147674560546875, 1.208892822265625, -0.019474029541015625, 0.6827869415283203, -1.1238288879394531, -0.14951324462890625, 0.23760986328125, 1.1525382995605469, -1.2833843231201172, 0.6368541717529297, 0.21515846252441406, 0.6822357177734375, 2.8287200927734375, 0.6359786987304688, 0.9472923278808594, 1.00927734375, 0.3073272705078125, -0.2464141845703125, 0.4329376220703125, 2.1131973266601562, -0.29445648193359375, 0.05770111083984375, -0.3990325927734375, 0.7475738525390625, 2.7917633056640625, 0.5005569458007812, -0.50128173828125, 0.6538200378417969, -0.4714012145996094, -0.16941070556640625, 0.5048255920410156, 2.1374893188476562, 1.0281524658203125, 0.4326210021972656, 0.38869476318359375, -0.061557769775390625, 0.5167579650878906, -0.349212646484375, -0.009307861328125, 0.16666412353515625, 0.4112701416015625, -0.439483642578125, -0.34919166564941406, 0.6325836181640625, 0.820465087890625, 0.023281097412109375, 0.783599853515625, 1.3253021240234375, 1.23193359375, 1.948760986328125, -0.30446815490722656, 0.0976104736328125, -0.5035381317138672, 1.0486679077148438, 0.1003265380859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000142.npy"}
{"epoch": 0.2146636432350718, "step": 143, "batch_size": 64, "mean": 0.38283705711364746, "std": 0.8058531880378723, "min": -1.9451751708984375, "p10": -0.5661521911621094, "median": 0.31029224395751953, "p90": 1.4646419525146483, "max": 2.3728103637695312, "pos_frac": 0.71875, "sample": [0.30914306640625, 0.80499267578125, -0.1861724853515625, 0.5218162536621094, 0.6744155883789062, -0.5854949951171875, 1.0065650939941406, -0.08817100524902344, 1.4632987976074219, -1.1184463500976562, 1.3787612915039062, -0.0343017578125, 1.5411529541015625, -0.1385498046875, 0.111846923828125, 0.8809051513671875, -1.2200698852539062, 0.748046875, 1.5210113525390625, 0.2886772155761719, 1.56298828125, 0.4185295104980469, 2.136688232421875, 1.3444862365722656, 0.6158390045166016, 0.4590930938720703, 0.7998504638671875, -0.87738037109375, 0.04803466796875, 0.13566207885742188, -0.24905776977539062, 0.31144142150878906, -0.0985107421875, 1.3928871154785156, 1.5116424560546875, -0.8850364685058594, -0.09995269775390625, 0.042194366455078125, 0.11283111572265625, 0.45809173583984375, 0.8026237487792969, -1.9451751708984375, 1.4652175903320312, -0.074859619140625, -0.07090950012207031, 0.0101470947265625, 0.1930694580078125, 0.8778533935546875, 0.06647109985351562, 0.5323638916015625, -0.5210189819335938, 0.39333343505859375, 0.45505523681640625, 2.3728103637695312, -1.1350326538085938, 0.9824314117431641, 0.5254287719726562, 0.08254241943359375, 1.3155364990234375, 0.6312808990478516, 0.17691612243652344, 0.10086631774902344, 0.2735862731933594, -0.028715133666992188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000143.npy"}
{"epoch": 0.2161753590325019, "step": 144, "batch_size": 64, "mean": 0.5119173526763916, "std": 0.8969858288764954, "min": -1.3927536010742188, "p10": -0.33386573791503904, "median": 0.3468046188354492, "p90": 1.584603500366211, "max": 3.10577392578125, "pos_frac": 0.65625, "sample": [1.5679035186767578, 2.1399078369140625, -0.0616607666015625, -0.1385021209716797, -0.7522239685058594, 0.30426597595214844, 0.029859542846679688, 0.5942230224609375, 0.2836761474609375, 0.15056610107421875, -0.05764007568359375, 0.4067535400390625, 0.7164955139160156, 1.21044921875, 1.0385971069335938, -0.06017303466796875, 0.9715042114257812, 0.4184608459472656, -0.5190200805664062, 1.1036911010742188, -0.212921142578125, 1.4508514404296875, -0.22844696044921875, -0.2150726318359375, 0.05005836486816406, -0.18726348876953125, 1.2710113525390625, 1.586090087890625, -0.1651592254638672, -0.22636795043945312, 0.38934326171875, -0.13297271728515625, -0.35118865966796875, 0.2071990966796875, 0.4347496032714844, 0.04080963134765625, 0.07206344604492188, 1.5811347961425781, 2.269866943359375, 1.5579605102539062, 1.037506103515625, 1.05364990234375, -0.12088775634765625, 2.2949676513671875, -0.2934455871582031, 0.6531524658203125, 0.4934577941894531, -0.47624778747558594, 1.3810615539550781, -0.231597900390625, -0.022317886352539062, -0.5107269287109375, 0.22433090209960938, 1.5604476928710938, -1.3098602294921875, 1.6244220733642578, 3.10577392578125, 2.504364013671875, 0.059665679931640625, -1.3927536010742188, 0.530853271484375, 0.5940284729003906, 0.5509452819824219, 0.9130439758300781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000144.npy"}
{"epoch": 0.21768707482993196, "step": 145, "batch_size": 64, "mean": 0.4308076798915863, "std": 0.8949626088142395, "min": -1.52142333984375, "p10": -0.5115636825561524, "median": 0.30729103088378906, "p90": 1.6987888336181642, "max": 2.548877716064453, "pos_frac": 0.65625, "sample": [0.29561614990234375, 0.5513019561767578, -0.4800395965576172, -0.206085205078125, 0.3762664794921875, -1.52142333984375, 0.6112213134765625, 0.08998870849609375, 2.548877716064453, 0.7081451416015625, 0.3256950378417969, 1.7203941345214844, -0.30187034606933594, 0.9874649047851562, 0.026996612548828125, 0.1009674072265625, -0.1107330322265625, 1.9311294555664062, -0.3554840087890625, 0.30748748779296875, 0.3102455139160156, 0.12246322631835938, -0.6962852478027344, 0.270355224609375, 2.1151695251464844, -0.38443756103515625, -0.1583709716796875, 1.14959716796875, 1.64837646484375, 0.8931350708007812, 2.3163299560546875, 1.1431503295898438, 1.1220817565917969, 1.940185546875, -0.195404052734375, 1.3194084167480469, 0.6773872375488281, 1.2201347351074219, 0.6935577392578125, 0.7800025939941406, -0.8600006103515625, 0.5465278625488281, 1.2408447265625, 0.9009151458740234, -0.16426467895507812, -1.4752349853515625, 0.3070945739746094, 0.18774986267089844, -0.3680267333984375, -0.42186737060546875, 0.29274749755859375, 0.7223949432373047, 1.0290355682373047, -0.9198875427246094, 0.857452392578125, 2.49346923828125, 1.2352752685546875, -0.5250740051269531, -0.3796234130859375, -0.25998687744140625, 0.085845947265625, -0.5512008666992188, -0.1577301025390625, -0.13776397705078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000145.npy"}
{"epoch": 0.21919879062736206, "step": 146, "batch_size": 64, "mean": 0.5313305854797363, "std": 0.8889718651771545, "min": -1.4467697143554688, "p10": -0.44040489196777344, "median": 0.5299530029296875, "p90": 1.4309928894042971, "max": 3.9316864013671875, "pos_frac": 0.734375, "sample": [0.8513870239257812, 1.1962451934814453, -0.44710540771484375, 0.3951263427734375, 0.8511123657226562, -0.746551513671875, -0.1682586669921875, 0.21046066284179688, 0.5104637145996094, 0.8455581665039062, -1.3911209106445312, -1.4467697143554688, 1.6926116943359375, 1.0120811462402344, -0.290924072265625, -0.30718994140625, 0.7665252685546875, -0.289642333984375, 1.2418022155761719, 0.7460174560546875, 0.26837158203125, 1.3596267700195312, 1.68426513671875, 0.3010902404785156, -1.176666259765625, -0.4247703552246094, 0.7222938537597656, 1.5451736450195312, -0.5731658935546875, 0.8064460754394531, 0.42291831970214844, -0.2506294250488281, 2.8305511474609375, 0.1865234375, -0.1392230987548828, 0.457489013671875, 0.56298828125, 0.358001708984375, 0.346649169921875, -0.2516613006591797, 1.2999763488769531, 1.1066322326660156, 0.5494422912597656, 1.019317626953125, 1.4898834228515625, 1.0691070556640625, 0.9388713836669922, 0.7851848602294922, 1.2341384887695312, 0.8832359313964844, 0.6617279052734375, 1.461578369140625, 0.37081336975097656, 0.42180633544921875, -0.7641239166259766, -0.1873779296875, 3.9316864013671875, 0.43255615234375, 0.5735359191894531, 0.2117481231689453, 0.7776031494140625, 0.4225883483886719, -0.2547416687011719, 1.3018646240234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000146.npy"}
{"epoch": 0.22071050642479215, "step": 147, "batch_size": 64, "mean": 0.37672001123428345, "std": 0.763209342956543, "min": -1.8341064453125, "p10": -0.6547309875488281, "median": 0.38596248626708984, "p90": 1.3543861389160166, "max": 2.088775634765625, "pos_frac": 0.71875, "sample": [0.6206130981445312, 0.07123947143554688, 0.3287467956542969, 0.00937652587890625, 0.5330352783203125, 0.548797607421875, 0.15132904052734375, 1.462860107421875, -0.040924072265625, -0.11767959594726562, 0.797698974609375, 0.6173286437988281, 2.088775634765625, 0.936370849609375, 0.0869293212890625, 0.9365577697753906, 1.8049545288085938, -1.8341064453125, 0.9109249114990234, -0.07341194152832031, 0.3354606628417969, -0.30821990966796875, 0.4637012481689453, 1.0624542236328125, -0.1744842529296875, 1.7611732482910156, -0.5695114135742188, 1.1012802124023438, 0.6102447509765625, 0.17138671875, 0.3818798065185547, 0.04288482666015625, 0.390045166015625, -0.7012786865234375, -0.9581756591796875, 0.6878890991210938, 0.5954303741455078, 1.9905891418457031, 1.8873519897460938, -0.8378429412841797, -0.691253662109375, -0.09436798095703125, 0.6285781860351562, 0.7148780822753906, 1.03546142578125, -0.2650337219238281, 1.4690704345703125, -0.09358978271484375, 0.8839302062988281, 0.14153289794921875, 0.8833389282226562, 0.3346900939941406, 0.5655517578125, -0.060394287109375, -0.8688812255859375, 0.7856674194335938, 0.7414684295654297, 0.33583831787109375, 0.4716644287109375, -0.380859375, -1.1966514587402344, 0.14435577392578125, 0.7233066558837891, 0.13010406494140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000147.npy"}
{"epoch": 0.2222222222222222, "step": 148, "batch_size": 64, "mean": 0.5912688970565796, "std": 0.9423065781593323, "min": -1.2310962677001953, "p10": -0.4357915878295897, "median": 0.3722391128540039, "p90": 2.063240814208984, "max": 3.1253662109375, "pos_frac": 0.796875, "sample": [2.0665435791015625, -1.1591205596923828, 0.3070106506347656, -0.30703163146972656, 1.4004440307617188, 3.1253662109375, 0.5782432556152344, -0.7363948822021484, 0.538238525390625, 1.7309341430664062, -0.20477294921875, 1.3917217254638672, 0.551971435546875, 0.5700416564941406, 1.7812156677246094, 0.0955047607421875, -0.13170623779296875, 2.405731201171875, 0.49883270263671875, 0.0450439453125, 0.02044677734375, -0.5236053466796875, 2.0555343627929688, 0.21587753295898438, 2.3893775939941406, 1.089935302734375, 0.7950267791748047, 0.05582427978515625, 2.5681610107421875, 0.8538589477539062, 0.1551380157470703, 0.046630859375, 1.4787006378173828, -0.8686447143554688, 0.9197311401367188, 1.3753204345703125, 0.31443214416503906, 0.6935997009277344, -1.1130599975585938, 0.10134506225585938, 0.14029693603515625, 0.024127960205078125, 2.10009765625, 2.4892730712890625, -1.2310962677001953, 0.7671985626220703, 1.0316333770751953, 0.9399299621582031, 0.501495361328125, 0.43004608154296875, 0.4564189910888672, 1.0701828002929688, 0.27817344665527344, 0.21213150024414062, 0.09679794311523438, 1.2857208251953125, 0.0948944091796875, 0.2859230041503906, 0.23046302795410156, -0.49097442626953125, -0.06075096130371094, 0.21630859375, -0.1889495849609375, -0.00958251953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000148.npy"}
{"epoch": 0.2237339380196523, "step": 149, "batch_size": 64, "mean": 0.5744273662567139, "std": 0.8544904589653015, "min": -1.392608642578125, "p10": -0.4242671966552734, "median": 0.40200233459472656, "p90": 1.6363677978515627, "max": 2.4782562255859375, "pos_frac": 0.765625, "sample": [0.2375640869140625, 1.095306396484375, 1.5934295654296875, -0.0547027587890625, 1.293304443359375, 0.13497161865234375, 0.16415786743164062, 1.0629043579101562, -0.13059234619140625, 1.4267749786376953, -0.29146575927734375, 0.4351692199707031, 0.24875640869140625, 0.36883544921875, 0.495849609375, 0.6887664794921875, 0.3204231262207031, 0.70330810546875, 0.1788921356201172, -0.44647216796875, 0.24829673767089844, 1.7970199584960938, 1.3316993713378906, -1.0525932312011719, 2.1591339111328125, 0.7948703765869141, 0.009462356567382812, -0.5255317687988281, 0.33651161193847656, -1.392608642578125, -0.3724555969238281, 1.5715408325195312, -0.2968864440917969, 2.033975601196289, 0.1491069793701172, 1.080169677734375, 1.4069061279296875, -0.6032180786132812, 2.4782562255859375, 0.0515899658203125, -0.9001617431640625, 0.709136962890625, -0.016492843627929688, 2.026153564453125, 2.4638748168945312, 1.3471488952636719, 0.04257965087890625, 0.7124385833740234, 0.35459136962890625, -0.34429168701171875, 0.15383148193359375, 0.6944580078125, -0.16424560546875, -0.580963134765625, 0.5202922821044922, 1.07049560546875, 1.4495849609375, 1.6547698974609375, 0.94488525390625, 0.18109703063964844, 1.0779495239257812, 1.0749835968017578, 0.013214111328125, 1.5475921630859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000149.npy"}
{"epoch": 0.2252456538170824, "step": 150, "batch_size": 64, "mean": 0.5896967649459839, "std": 0.7882404923439026, "min": -1.380706787109375, "p10": -0.515000534057617, "median": 0.5192337036132812, "p90": 1.6246810913085938, "max": 2.5831680297851562, "pos_frac": 0.765625, "sample": [0.7637233734130859, -0.06080818176269531, 1.2386474609375, 0.5520858764648438, -0.5957260131835938, 0.040637969970703125, 0.4563560485839844, 1.6681289672851562, 1.1911163330078125, 1.9186763763427734, 0.3981475830078125, 0.4380340576171875, 1.6030426025390625, 1.7697219848632812, -0.07354927062988281, 0.509124755859375, 0.42249298095703125, 1.6351165771484375, 0.6817626953125, 1.5421218872070312, 1.0553512573242188, 0.17122650146484375, -0.57403564453125, -1.380706787109375, 0.8386383056640625, 1.0541534423828125, 0.4003486633300781, 0.6939735412597656, -0.3281707763671875, 1.0491886138916016, 0.9108428955078125, 0.4416351318359375, 1.343780517578125, -0.062366485595703125, 2.5831680297851562, -0.9525299072265625, 0.5293426513671875, 0.6933555603027344, 0.3804473876953125, 1.6268463134765625, 0.825836181640625, 1.61962890625, 0.281585693359375, -0.01920318603515625, 1.348968505859375, 0.4778900146484375, 0.8596858978271484, 1.2326087951660156, -0.5701255798339844, 1.6195526123046875, -0.65789794921875, 0.447052001953125, 0.36766815185546875, 0.15626907348632812, 0.32920265197753906, 0.6821136474609375, -0.1732025146484375, -0.9693145751953125, 1.7380447387695312, -0.38637542724609375, -0.28617095947265625, 1.0106353759765625, 0.8574752807617188, 0.3753204345703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000150.npy"}
{"epoch": 0.22675736961451248, "step": 151, "batch_size": 64, "mean": 0.6668342351913452, "std": 0.7952594757080078, "min": -1.1727371215820312, "p10": -0.3136581420898437, "median": 0.7597446441650391, "p90": 1.6102935791015627, "max": 2.718109130859375, "pos_frac": 0.796875, "sample": [0.931976318359375, 0.12418937683105469, 0.8680496215820312, -0.28917694091796875, -0.07148170471191406, 0.04780387878417969, 2.718109130859375, 0.22629928588867188, 1.492034912109375, -0.17363739013671875, 0.6730880737304688, 0.6717720031738281, 1.10308837890625, 0.7973098754882812, 1.0910377502441406, 0.9733829498291016, 0.9230003356933594, -0.21706581115722656, 0.7221794128417969, 0.24164962768554688, -1.1727371215820312, -0.5572624206542969, 1.0789260864257812, -1.1444282531738281, 2.6436309814453125, -0.09037017822265625, 0.7174739837646484, 0.8444633483886719, -0.32415008544921875, -0.911590576171875, 0.4748058319091797, -0.5302658081054688, 1.881744384765625, 1.3841400146484375, 0.82952880859375, 0.03093719482421875, 0.41271209716796875, 1.5866241455078125, 0.08843231201171875, 1.6220703125, 0.3374061584472656, 0.4866943359375, 1.14849853515625, -0.0066394805908203125, 1.6671485900878906, 0.9730987548828125, 1.2860450744628906, 1.4694366455078125, 0.9126815795898438, 0.5491523742675781, 0.6523666381835938, 1.1700286865234375, 1.0243453979492188, 1.7123947143554688, -0.6245250701904297, 1.1863555908203125, 0.9967727661132812, 0.7201347351074219, 0.19115638732910156, 0.99310302734375, 1.6204376220703125, 1.2543830871582031, 0.16189193725585938, 1.0767288208007812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000151.npy"}
{"epoch": 0.22826908541194255, "step": 152, "batch_size": 64, "mean": 0.6589239835739136, "std": 0.8608132600784302, "min": -1.2972640991210938, "p10": -0.42378425598144526, "median": 0.5939340591430664, "p90": 1.84399585723877, "max": 2.7645416259765625, "pos_frac": 0.78125, "sample": [-0.8434982299804688, 1.3830490112304688, 1.1738739013671875, 0.0708160400390625, -0.3885231018066406, -1.2972640991210938, 2.1158065795898438, 1.27264404296875, 0.500335693359375, -0.2985687255859375, 0.6035022735595703, -0.43651580810546875, 1.0056037902832031, 0.36049652099609375, 0.895294189453125, 1.3449325561523438, 0.30760765075683594, 0.4429779052734375, 0.5311298370361328, -0.3940773010253906, -0.5948715209960938, 1.7488441467285156, 0.71270751953125, 0.844696044921875, 0.13103103637695312, 0.04170417785644531, 1.2599639892578125, 0.7077865600585938, 0.5129985809326172, 0.49810028076171875, -0.025768280029296875, 1.884775161743164, 0.42864990234375, 0.5370864868164062, 0.8159103393554688, 0.922607421875, 1.0307750701904297, 0.05254364013671875, 1.2142181396484375, 1.185577392578125, -0.6328353881835938, 1.615325927734375, 1.088470458984375, 0.03908538818359375, 2.3310775756835938, 1.9524879455566406, 1.0964889526367188, -0.4858894348144531, 1.7081832885742188, 0.83526611328125, -1.0316085815429688, 1.273651123046875, 0.13738441467285156, 2.0888519287109375, 1.041067123413086, 0.5843658447265625, 2.2114715576171875, 1.1081695556640625, -0.125701904296875, 2.7645416259765625, 0.2273406982421875, -0.2898883819580078, -0.11846160888671875, 0.4933319091796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000152.npy"}
{"epoch": 0.22978080120937264, "step": 153, "batch_size": 64, "mean": 0.5102236270904541, "std": 0.8042303323745728, "min": -1.5160369873046875, "p10": -0.4069797515869141, "median": 0.4812431335449219, "p90": 1.4374225616455079, "max": 2.4954605102539062, "pos_frac": 0.765625, "sample": [0.6933078765869141, 0.48828887939453125, 0.888275146484375, 2.02734375, 0.4653949737548828, 0.9679450988769531, 1.3107147216796875, -0.4059410095214844, 0.2923774719238281, 0.30926513671875, 0.3466167449951172, 1.3518943786621094, -0.2647247314453125, 1.331451416015625, 0.6518707275390625, -0.04575347900390625, 0.9962921142578125, 0.1241912841796875, -1.1854400634765625, 1.7208328247070312, 2.4954605102539062, 0.5230941772460938, -0.6108779907226562, 0.8180313110351562, 1.4042015075683594, -0.34606361389160156, 1.45166015625, 0.3155632019042969, 0.13790130615234375, 1.2957305908203125, -0.5194854736328125, 0.49016761779785156, -0.8069190979003906, 0.6043052673339844, -0.24900054931640625, 0.10534095764160156, 1.2322540283203125, 1.7831268310546875, 1.30877685546875, 0.689239501953125, 0.624786376953125, -0.4074249267578125, 0.3956298828125, 1.9902915954589844, 0.9583930969238281, 0.0184783935546875, 0.8084487915039062, 1.3202896118164062, 0.04477691650390625, -0.056819915771484375, -0.189697265625, 0.4741973876953125, 0.15152740478515625, 0.15708160400390625, 1.5769691467285156, -1.2345123291015625, 0.4638519287109375, 1.2064266204833984, -1.5160369873046875, 0.4326171875, 0.7213478088378906, 0.28687286376953125, -0.3801536560058594, 0.6202621459960938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000153.npy"}
{"epoch": 0.23129251700680273, "step": 154, "batch_size": 64, "mean": 0.4456136226654053, "std": 0.9554128050804138, "min": -2.2928314208984375, "p10": -0.5691253662109375, "median": 0.4829673767089844, "p90": 1.701276016235353, "max": 3.0977554321289062, "pos_frac": 0.640625, "sample": [-0.5372848510742188, 0.6534957885742188, -1.3641815185546875, -1.7813282012939453, 0.9748764038085938, 0.9829845428466797, -0.5020904541015625, 0.4102973937988281, 0.3428192138671875, -0.7107467651367188, 0.9870452880859375, -0.05435752868652344, 1.0369377136230469, -0.1621856689453125, -0.3638496398925781, -0.8508377075195312, -0.3206443786621094, 0.9242267608642578, 0.08514976501464844, 1.1370697021484375, 2.091156005859375, 1.9596385955810547, 1.3511371612548828, 0.6164894104003906, -0.7485885620117188, 0.8824100494384766, 3.0977554321289062, 0.8622207641601562, 2.267181396484375, -2.2928314208984375, 0.484710693359375, 0.8345699310302734, 1.9138031005859375, 0.23319244384765625, 0.8822746276855469, -0.2514610290527344, -0.1367015838623047, 0.6057701110839844, 0.8284797668457031, 0.26646995544433594, -0.32154083251953125, -0.3996124267578125, 0.8451805114746094, -0.13607406616210938, 0.48122406005859375, 1.8513355255126953, 1.0728683471679688, 1.250335693359375, 0.234954833984375, -0.13537979125976562, -0.3166961669921875, -0.03818511962890625, 0.8003501892089844, -0.5827713012695312, 2.123046875, 0.9336624145507812, 1.1328239440917969, -0.08674812316894531, 1.1858291625976562, -0.01287841796875, 0.9873733520507812, 0.07925796508789062, 0.2691307067871094, 0.6667118072509766], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000154.npy"}
{"epoch": 0.2328042328042328, "step": 155, "batch_size": 64, "mean": 0.40763652324676514, "std": 0.9428458213806152, "min": -1.6631317138671875, "p10": -0.8737113952636718, "median": 0.4149646759033203, "p90": 1.3273155212402346, "max": 2.4136886596679688, "pos_frac": 0.671875, "sample": [0.195037841796875, 0.116943359375, 2.1792449951171875, 0.8578567504882812, 0.04489898681640625, -0.051853179931640625, 0.30731964111328125, 1.133626937866211, 0.17957496643066406, 1.9731063842773438, 0.7288589477539062, 1.232177734375, -0.8947296142578125, -1.4513473510742188, 0.018436431884765625, -0.53741455078125, -0.1971282958984375, 1.00164794921875, 0.702392578125, 1.1434135437011719, 1.2343215942382812, -0.04215812683105469, 1.9164276123046875, -0.8246688842773438, 1.059661865234375, 2.0721092224121094, 1.0041275024414062, 0.9636688232421875, -0.13864898681640625, 0.2950286865234375, -1.6631317138671875, 0.4329109191894531, 2.1429004669189453, 1.2334022521972656, 0.6085739135742188, 1.202789306640625, -0.08113479614257812, -1.1913909912109375, -0.5322723388671875, 0.296417236328125, -1.2657318115234375, -0.21450424194335938, 1.3465805053710938, 0.3970184326171875, 1.0433807373046875, 0.6688575744628906, 0.34571075439453125, 1.2823638916015625, -1.3282546997070312, 0.9376945495605469, 0.9353485107421875, 1.075775146484375, -0.5557098388671875, -0.1678009033203125, 0.6595067977905273, -0.6539154052734375, 0.14073562622070312, -0.7234630584716797, -1.1590576171875, -0.07646560668945312, 0.6072463989257812, 2.4136886596679688, 1.1958122253417969, 0.5129241943359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000155.npy"}
{"epoch": 0.23431594860166288, "step": 156, "batch_size": 64, "mean": 0.6174949407577515, "std": 0.9200046062469482, "min": -0.8643074035644531, "p10": -0.3674886703491211, "median": 0.40352630615234375, "p90": 1.8835178375244153, "max": 3.241668701171875, "pos_frac": 0.765625, "sample": [0.921661376953125, -0.13248443603515625, -0.0268402099609375, -0.8643074035644531, 1.1041412353515625, 2.12335205078125, 2.9383811950683594, -0.441375732421875, 0.2302398681640625, 1.400533676147461, -0.034332275390625, 1.1678047180175781, 0.4116020202636719, 0.117950439453125, 2.741455078125, 1.2428855895996094, 1.597412109375, 0.7822837829589844, -0.6367359161376953, 0.8873023986816406, 0.1731853485107422, 0.47203826904296875, 0.4022064208984375, 0.5061073303222656, -0.08367156982421875, 0.07498931884765625, -0.37128639221191406, 0.43643951416015625, 0.37714385986328125, 1.53607177734375, 0.9044647216796875, 0.320953369140625, -0.6936492919921875, 1.1322021484375, 0.0074462890625, 2.8831863403320312, -0.3586273193359375, 1.378021240234375, -0.329559326171875, 2.0050621032714844, 1.021484375, 3.241668701171875, 0.40484619140625, 0.6365966796875, 0.2575359344482422, -0.6843528747558594, -0.17816543579101562, 0.26760101318359375, 0.9963531494140625, 0.73681640625, 0.2665233612060547, -0.84478759765625, 0.9175167083740234, 0.0899505615234375, 0.7486343383789062, 0.23021697998046875, 0.26676177978515625, 0.16533660888671875, 1.59991455078125, -0.09279632568359375, 0.3986377716064453, 2.2751617431640625, 0.4575920104980469, 0.036975860595703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000156.npy"}
{"epoch": 0.23582766439909297, "step": 157, "batch_size": 64, "mean": 0.5440424084663391, "std": 0.8481102585792542, "min": -2.6854019165039062, "p10": -0.3801759719848632, "median": 0.5393486022949219, "p90": 1.5216964721679689, "max": 2.76324462890625, "pos_frac": 0.796875, "sample": [0.5831375122070312, 1.595876693725586, 0.73785400390625, 1.138031005859375, 1.7728195190429688, -0.55291748046875, 1.0605926513671875, -0.1855010986328125, 1.5247268676757812, 0.4588165283203125, 0.2365875244140625, 0.6734771728515625, 0.942626953125, 0.03310966491699219, -2.6854019165039062, 0.03765869140625, 0.76751708984375, 0.8267440795898438, 0.51971435546875, 1.5146255493164062, 1.0508499145507812, 1.3968276977539062, 2.6134796142578125, 0.5264892578125, 1.5003833770751953, 0.25337982177734375, 0.33148193359375, 0.3619956970214844, 0.36884307861328125, -0.22820472717285156, 0.9443111419677734, -0.43001556396484375, 0.964019775390625, 1.0480270385742188, 0.13672637939453125, 0.5522079467773438, 0.2593231201171875, 1.3894004821777344, 1.743438720703125, 0.19332122802734375, 0.10365676879882812, -0.26151275634765625, -0.026035308837890625, 1.5988197326660156, 2.76324462890625, 0.6861705780029297, -0.5802764892578125, 0.617919921875, 0.130584716796875, -0.2638835906982422, 0.4051055908203125, -0.6497917175292969, 0.2526588439941406, 0.4556465148925781, 0.8613014221191406, 0.8961029052734375, 0.7530784606933594, 0.4007301330566406, -1.2177963256835938, 1.1365184783935547, 0.7173614501953125, -0.72943115234375, -0.0709075927734375, 0.863067626953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000157.npy"}
{"epoch": 0.23733938019652306, "step": 158, "batch_size": 64, "mean": 0.5799820423126221, "std": 0.8256279230117798, "min": -1.3802490234375, "p10": -0.46793785095214835, "median": 0.5887298583984375, "p90": 1.4253629684448244, "max": 3.002960205078125, "pos_frac": 0.75, "sample": [0.7724075317382812, 0.48647308349609375, 0.6250839233398438, 0.7648677825927734, -1.0187530517578125, 1.1867790222167969, 1.258941650390625, 0.30609130859375, -0.7110443115234375, -0.17600250244140625, 1.474517822265625, 1.1276473999023438, 0.4069366455078125, 0.6637344360351562, -1.0045032501220703, 1.3875904083251953, 0.749786376953125, 0.5532894134521484, 0.6063690185546875, 1.0837440490722656, 0.44321441650390625, 0.94622802734375, 1.252187728881836, 1.0075454711914062, 1.3693561553955078, -0.40146636962890625, 0.1465301513671875, 2.2471389770507812, 0.4752655029296875, 0.6735076904296875, 0.0761566162109375, 0.5710906982421875, 0.9878883361816406, 1.236328125, 1.2652397155761719, 0.9862632751464844, 1.2323150634765625, -0.113433837890625, 0.7687492370605469, -1.0701141357421875, 2.103851318359375, 0.45891571044921875, 1.3420639038085938, 1.672943115234375, 1.4415512084960938, 0.6536674499511719, 0.5603485107421875, 0.48656463623046875, 3.002960205078125, -0.11226081848144531, 0.32796478271484375, -0.21501922607421875, -0.2870903015136719, 0.250335693359375, 0.33356666564941406, -0.20647239685058594, -0.4964256286621094, 2.02532958984375, 0.7855072021484375, 0.4571552276611328, -0.5193519592285156, -0.12164115905761719, -0.08931159973144531, -1.3802490234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000158.npy"}
{"epoch": 0.23885109599395313, "step": 159, "batch_size": 64, "mean": 0.43719035387039185, "std": 0.9253829121589661, "min": -2.40264892578125, "p10": -0.49153060913085933, "median": 0.43190860748291016, "p90": 1.4555671691894532, "max": 2.8055877685546875, "pos_frac": 0.703125, "sample": [0.29781341552734375, 1.1259098052978516, -0.71099853515625, 0.74615478515625, 1.3474922180175781, -0.0149688720703125, -2.40264892578125, 1.7289142608642578, -0.362518310546875, 2.8055877685546875, 1.3253173828125, 0.42336273193359375, 1.444936752319336, -0.1785736083984375, 0.07260894775390625, 0.286773681640625, 1.0816879272460938, 2.079833984375, 0.55145263671875, -0.17249107360839844, -0.49935150146484375, -1.2997817993164062, 0.5123653411865234, -0.4732818603515625, 0.4599151611328125, 0.44045448303222656, 0.2287578582763672, -0.22138214111328125, 1.1944732666015625, 1.911041259765625, -0.5299873352050781, 0.78729248046875, 0.5444011688232422, -0.22545623779296875, 0.1089630126953125, 0.4098472595214844, 1.3766288757324219, 0.9037704467773438, -0.10211181640625, 0.1564788818359375, 1.0539112091064453, -0.8777618408203125, 1.5746688842773438, -0.42778778076171875, 0.6140365600585938, 1.2393836975097656, 0.10353851318359375, 0.4515953063964844, -0.26031494140625, 0.1219940185546875, -0.24882125854492188, 1.460123062133789, 0.81439208984375, 0.073974609375, -0.04842185974121094, -2.3495025634765625, 1.0171775817871094, 0.932220458984375, 0.32703399658203125, 0.21002769470214844, 0.946502685546875, 1.1969451904296875, 1.7337646484375, 1.1628189086914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000159.npy"}
{"epoch": 0.24036281179138322, "step": 160, "batch_size": 64, "mean": 0.48271453380584717, "std": 0.9080784320831299, "min": -1.3426513671875, "p10": -0.5638574600219726, "median": 0.3257293701171875, "p90": 1.7201446533203126, "max": 3.1576385498046875, "pos_frac": 0.671875, "sample": [0.10753822326660156, -0.08849716186523438, -0.04915618896484375, 1.6631546020507812, 0.6316051483154297, 0.7899856567382812, 0.1100006103515625, -0.06021308898925781, 1.0949783325195312, 1.4510269165039062, 3.1576385498046875, 0.00817108154296875, 1.8644180297851562, 0.6678619384765625, 0.7176704406738281, 2.2752761840820312, -0.22913742065429688, -0.40070343017578125, 0.8191871643066406, 1.2523651123046875, 0.5946731567382812, -0.7245445251464844, -0.6622314453125, -0.8640594482421875, 1.0734214782714844, 0.8021507263183594, 0.6133270263671875, 0.051006317138671875, 0.9831619262695312, -0.23490524291992188, 0.23524093627929688, 0.09025764465332031, 2.736419677734375, 1.9926261901855469, 0.49770355224609375, -0.3959312438964844, 1.7266769409179688, 0.2985992431640625, 0.07253265380859375, 1.7049026489257812, -0.01120758056640625, 1.1600074768066406, -0.16073226928710938, -0.15230560302734375, -0.09566497802734375, 0.117523193359375, -1.2498245239257812, -0.6073284149169922, 0.6094799041748047, 0.94219970703125, -0.46242523193359375, 0.1029052734375, -0.2352142333984375, 0.896026611328125, 1.0754470825195312, -0.21197509765625, 0.3528594970703125, 1.7942771911621094, -0.7060546875, 0.391876220703125, 0.6139335632324219, 1.4973983764648438, -1.3426513671875, 0.20098114013671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000160.npy"}
{"epoch": 0.2418745275888133, "step": 161, "batch_size": 64, "mean": 0.5282736420631409, "std": 0.9782010316848755, "min": -1.3512420654296875, "p10": -0.4953762054443359, "median": 0.34529590606689453, "p90": 1.9304811477661135, "max": 3.2187347412109375, "pos_frac": 0.703125, "sample": [-0.5150108337402344, -1.2312889099121094, 0.7120590209960938, 0.5375213623046875, 0.998931884765625, -0.735626220703125, 1.28216552734375, 2.5666122436523438, -0.1648998260498047, -0.10943889617919922, -0.36780548095703125, 0.989898681640625, 0.44023895263671875, 1.3170928955078125, 1.6889228820800781, 0.2503528594970703, 1.0036277770996094, -1.3512420654296875, 0.8845138549804688, 0.04965972900390625, 0.0100860595703125, -0.33425140380859375, 0.4655303955078125, -0.41473388671875, 0.44549560546875, -0.5264396667480469, 0.2017364501953125, 1.3337039947509766, 0.8124160766601562, -0.44956207275390625, 1.9553966522216797, 0.9473800659179688, 1.177377700805664, 3.2187347412109375, 2.2553558349609375, -0.13765716552734375, -0.41419029235839844, 2.4914398193359375, 0.823883056640625, 0.15253067016601562, 0.08640480041503906, 2.4975738525390625, 0.10092544555664062, 0.23191070556640625, 0.24680328369140625, -0.44736480712890625, 1.027008056640625, 0.6155681610107422, -0.8957672119140625, -0.14831161499023438, 0.001003265380859375, 0.18053436279296875, 1.677591323852539, 0.14416122436523438, -0.8335494995117188, -0.30859375, 0.5317535400390625, 0.7613525390625, 1.872344970703125, 0.6980819702148438, 2.5488967895507812, -0.099853515625, 0.07497215270996094, 0.9855499267578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000161.npy"}
{"epoch": 0.24338624338624337, "step": 162, "batch_size": 64, "mean": 0.7493584156036377, "std": 0.9465083479881287, "min": -2.56494140625, "p10": -0.49989547729492173, "median": 0.7800407409667969, "p90": 1.783990478515625, "max": 3.07830810546875, "pos_frac": 0.8125, "sample": [2.553253173828125, 1.7828369140625, 0.6043548583984375, 1.1903190612792969, -0.16214752197265625, 1.1596031188964844, 0.7231369018554688, 0.24019622802734375, 1.6771697998046875, 1.3153209686279297, 0.6660995483398438, 0.776092529296875, 1.3424072265625, 1.3744354248046875, 1.202606201171875, 0.8023967742919922, 0.489288330078125, 2.665130615234375, -0.602569580078125, 2.188312530517578, 1.3754463195800781, 0.6972084045410156, 0.5031166076660156, 0.3702545166015625, 0.35227203369140625, -0.6823883056640625, 1.970367431640625, 1.1792068481445312, -0.644256591796875, 1.78448486328125, 0.46402740478515625, 1.2144927978515625, -0.7490997314453125, 1.331644058227539, 3.07830810546875, 0.778167724609375, 1.5640144348144531, 0.7819137573242188, -0.17461204528808594, 0.10680961608886719, -0.552764892578125, -1.1522254943847656, 0.8522109985351562, 1.6551551818847656, 1.1199970245361328, 1.0073013305664062, -0.37653350830078125, 0.6198806762695312, -2.56494140625, 0.05170440673828125, 0.1919384002685547, -0.1148681640625, 1.2813796997070312, 0.8018665313720703, 2.0016098022460938, 1.0663909912109375, -0.0680999755859375, 0.08383941650390625, 0.40728759765625, 0.3446998596191406, 0.5648536682128906, 1.2269268035888672, 1.1155624389648438, 1.1061439514160156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000162.npy"}
{"epoch": 0.24489795918367346, "step": 163, "batch_size": 64, "mean": 0.7414567470550537, "std": 0.986305832862854, "min": -1.6008453369140625, "p10": -0.4926620483398436, "median": 0.669163703918457, "p90": 2.228405380249024, "max": 2.8092041015625, "pos_frac": 0.765625, "sample": [-0.15121841430664062, 1.9422760009765625, -1.6008453369140625, 2.044342041015625, 0.8662490844726562, 0.21260452270507812, -0.828155517578125, 0.8930511474609375, 2.351409912109375, -0.23148345947265625, 0.5604286193847656, 1.1469039916992188, 2.3492050170898438, -0.29666900634765625, 0.07042694091796875, 1.1709022521972656, 0.3547248840332031, 0.491485595703125, 2.8092041015625, 0.8869552612304688, 0.7455062866210938, 0.5551376342773438, 1.265848159790039, -0.5472869873046875, -0.6630382537841797, 1.8283233642578125, 0.16242599487304688, 2.0488433837890625, 0.6084041595458984, 0.889404296875, 1.1831378936767578, 1.1117286682128906, 0.8092880249023438, 0.31806373596191406, 1.5433197021484375, 1.2330474853515625, -0.5590553283691406, 2.2884445190429688, -0.365203857421875, 0.38137054443359375, 0.6711578369140625, 2.6568260192871094, 0.9183425903320312, 2.0883140563964844, -1.128387451171875, -0.08719444274902344, -0.35401153564453125, -0.11508560180664062, 0.34200286865234375, 0.36492156982421875, -0.9390487670898438, 1.2376155853271484, 0.6207313537597656, 0.6166591644287109, 0.5279617309570312, -0.31558990478515625, 2.6270828247070312, 2.422454833984375, 1.4144744873046875, 1.238037109375, 0.7390003204345703, 0.9720115661621094, 0.6671695709228516, 0.3882789611816406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000163.npy"}
{"epoch": 0.24640967498110355, "step": 164, "batch_size": 64, "mean": 0.6510298252105713, "std": 0.9414679408073425, "min": -2.06109619140625, "p10": -0.4161964416503906, "median": 0.5308904647827148, "p90": 1.9079568862915044, "max": 2.605703353881836, "pos_frac": 0.734375, "sample": [-0.7330551147460938, 0.6539783477783203, 0.44232177734375, 0.38521575927734375, 0.7483444213867188, 0.41742706298828125, 2.4211578369140625, 1.2079925537109375, 0.1232452392578125, 0.4290199279785156, 2.442108154296875, -0.3493919372558594, 1.1255207061767578, -0.386444091796875, 1.405303955078125, 0.5241851806640625, 1.4993667602539062, 0.5503253936767578, 2.074737548828125, -0.2761573791503906, 0.5076904296875, -0.5225143432617188, 0.29778289794921875, 0.38330078125, -0.1468639373779297, 0.6111640930175781, 1.5001506805419922, 0.8788566589355469, -0.10248565673828125, 0.03450775146484375, 0.943695068359375, 1.959676742553711, -0.42894744873046875, 1.1689567565917969, 2.262451171875, -0.0498199462890625, 1.2510833740234375, 2.534637451171875, 2.605703353881836, -1.2511215209960938, 0.3749675750732422, 1.4700393676757812, -0.1642913818359375, 1.1861839294433594, 0.7271385192871094, 0.23208236694335938, 1.3254470825195312, 0.1369342803955078, 1.23370361328125, -0.5483894348144531, 0.46893310546875, 0.7918052673339844, 1.6589202880859375, -0.5971031188964844, 0.2829608917236328, -2.06109619140625, -0.11772918701171875, 1.2480583190917969, 1.7872772216796875, -0.16650772094726562, 1.57196044921875, -0.21999359130859375, 0.5375957489013672, 1.3639068603515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000164.npy"}
{"epoch": 0.24792139077853365, "step": 165, "batch_size": 64, "mean": 0.6598936319351196, "std": 0.8865832090377808, "min": -1.861297607421875, "p10": -0.2955974578857421, "median": 0.5924396514892578, "p90": 1.6456985473632817, "max": 3.3445663452148438, "pos_frac": 0.78125, "sample": [1.5173873901367188, 0.5272274017333984, 1.1195201873779297, 0.3433113098144531, 0.39576148986816406, 0.151641845703125, 0.2507667541503906, -0.4319000244140625, 0.5962257385253906, 0.7550754547119141, 0.20172500610351562, 1.1883811950683594, 1.332000732421875, 0.4715404510498047, 1.013397216796875, 0.6480712890625, 0.5849685668945312, 0.6849327087402344, -0.21287155151367188, 0.588653564453125, -1.861297607421875, -0.0092620849609375, 3.3445663452148438, -0.114410400390625, 0.75836181640625, 1.8506717681884766, 0.3631134033203125, 0.6001224517822266, 2.133249282836914, 1.4280281066894531, 0.31472015380859375, -0.2265491485595703, 2.913482666015625, 1.36627197265625, 0.5082550048828125, -0.5994148254394531, -0.09618377685546875, 1.5105476379394531, 0.9545021057128906, 0.23004150390625, -0.32518959045410156, 0.7210235595703125, 0.7486114501953125, -0.3340167999267578, 1.6922531127929688, -0.01465606689453125, 0.3239936828613281, 0.16231536865234375, 0.8420295715332031, -0.1365509033203125, 1.7188568115234375, 2.1328964233398438, -0.6919631958007812, 1.3747825622558594, 0.7624435424804688, -1.2591896057128906, 0.08414459228515625, 0.016780853271484375, 0.5437030792236328, 1.1125335693359375, 1.3463668823242188, 1.3430633544921875, 1.5370712280273438, 1.437255859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000165.npy"}
{"epoch": 0.2494331065759637, "step": 166, "batch_size": 64, "mean": 0.4935823976993561, "std": 0.9335113763809204, "min": -1.5663986206054688, "p10": -0.6863182067871093, "median": 0.3924102783203125, "p90": 1.784011459350586, "max": 2.53485107421875, "pos_frac": 0.65625, "sample": [1.517547607421875, -0.06308746337890625, -0.8777542114257812, 1.2351837158203125, 1.2135009765625, -0.3055610656738281, 0.18896484375, 1.7371559143066406, 0.401519775390625, 2.53485107421875, 0.306488037109375, -0.18448257446289062, 0.8958015441894531, -0.162353515625, 1.3172607421875, -0.8020057678222656, 0.3419189453125, 2.0813159942626953, 1.2797622680664062, -1.0211181640625, -0.10341453552246094, 1.4808082580566406, -0.8051185607910156, 0.38330078125, 1.4047470092773438, 0.7564220428466797, -0.3386688232421875, 0.095916748046875, 0.8796005249023438, -0.4102630615234375, 0.5072784423828125, -1.1534996032714844, 0.4267559051513672, 1.9660797119140625, 0.36081695556640625, -0.6021614074707031, 0.8020172119140625, 0.49530029296875, 0.6028900146484375, 0.5830345153808594, 1.8040924072265625, 2.5318603515625, 1.0507125854492188, -1.5663986206054688, 0.947052001953125, 0.41732025146484375, -0.2252655029296875, -0.231964111328125, 0.37103271484375, 1.6866493225097656, 0.188873291015625, -0.7223854064941406, 0.2480010986328125, 0.5665130615234375, -0.29703712463378906, -0.5986175537109375, 1.5215606689453125, -0.0310211181640625, 2.2484359741210938, -0.31607818603515625, 1.0136566162109375, 1.8488006591796875, -0.09310150146484375, 0.2598304748535156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000166.npy"}
{"epoch": 0.2509448223733938, "step": 167, "batch_size": 64, "mean": 0.7090966701507568, "std": 0.8900340795516968, "min": -1.6723251342773438, "p10": -0.17302284240722657, "median": 0.6490688323974609, "p90": 1.8382156372070315, "max": 3.0803070068359375, "pos_frac": 0.765625, "sample": [-0.079315185546875, 0.227569580078125, 0.48764991760253906, 1.3393974304199219, -1.6723251342773438, 1.782684326171875, 0.858184814453125, -0.0183868408203125, 0.8211441040039062, 0.3034477233886719, 1.1331558227539062, 1.4383697509765625, 0.2836780548095703, 0.3676719665527344, 0.004016876220703125, 0.35186767578125, 1.5406951904296875, -0.1748046875, 1.4225921630859375, -0.6987228393554688, -0.8796463012695312, 0.6145591735839844, 0.6393489837646484, 0.8071517944335938, 0.6676406860351562, 0.9997100830078125, 0.7460861206054688, -0.08252143859863281, 0.5549545288085938, 1.066497802734375, 0.2844696044921875, 2.1260414123535156, -0.16886520385742188, 0.9529991149902344, 0.8131484985351562, 1.8620147705078125, 1.6541061401367188, 2.5207672119140625, 2.225067138671875, 1.0936203002929688, 0.46479034423828125, 0.28973388671875, -0.6502685546875, 0.6576938629150391, 2.522432327270508, 0.4130401611328125, -0.620880126953125, 1.5340728759765625, -0.00749969482421875, 1.064910888671875, 1.2935028076171875, -0.12494277954101562, -0.1795024871826172, 2.185749053955078, 0.6404438018798828, 1.5467681884765625, 0.044719696044921875, 3.0803070068359375, 0.698944091796875, 0.8941116333007812, 1.5738372802734375, 0.015119552612304688, -0.10089874267578125, -0.06972122192382812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000167.npy"}
{"epoch": 0.25245653817082386, "step": 168, "batch_size": 64, "mean": 0.37709951400756836, "std": 0.7683062553405762, "min": -1.3486328125, "p10": -0.5979793548583985, "median": 0.36055755615234375, "p90": 1.4627464294433596, "max": 1.89971923828125, "pos_frac": 0.65625, "sample": [1.1493911743164062, 0.0604095458984375, 1.0050163269042969, -1.0172615051269531, 0.48052406311035156, 0.7596511840820312, 1.89971923828125, -0.5900459289550781, -0.13550567626953125, -1.3486328125, 0.46181488037109375, 0.08452987670898438, 0.4530792236328125, 0.9493312835693359, -0.114532470703125, 0.7945518493652344, 1.0126380920410156, -0.07924652099609375, 0.0157012939453125, -0.0044403076171875, 0.6835784912109375, -0.08732032775878906, -0.13715362548828125, 0.6412925720214844, 0.03500175476074219, -0.036197662353515625, 1.5488338470458984, 0.019321441650390625, -0.60137939453125, 0.1678924560546875, 1.10162353515625, -0.1101226806640625, -1.035858154296875, 0.74371337890625, 0.8511314392089844, 0.5504150390625, 1.74432373046875, 1.1686248779296875, 0.2123260498046875, 0.7820892333984375, -0.3460826873779297, 1.4911575317382812, -0.9222335815429688, 0.7920951843261719, 1.50030517578125, -0.2868804931640625, 1.5298004150390625, 0.0966339111328125, 0.6782455444335938, -0.08544921875, 1.16455078125, -0.1658477783203125, 1.2108879089355469, 0.17468643188476562, -0.8976669311523438, 1.6962623596191406, 0.6076812744140625, -1.2267684936523438, -0.119293212890625, -0.338653564453125, 0.8312873840332031, 1.0063323974609375, 1.396453857421875, 0.268035888671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000168.npy"}
{"epoch": 0.25396825396825395, "step": 169, "batch_size": 64, "mean": 0.7125036716461182, "std": 1.0383238792419434, "min": -1.00421142578125, "p10": -0.48464050292968747, "median": 0.6065177917480469, "p90": 1.9271907806396489, "max": 4.6280975341796875, "pos_frac": 0.796875, "sample": [0.8779678344726562, 1.0992927551269531, 0.5858154296875, -0.7344245910644531, 1.357229232788086, 0.3812408447265625, -0.9660606384277344, 0.4913501739501953, 1.0336380004882812, 0.9383392333984375, 0.05358123779296875, 0.5076370239257812, 1.7232818603515625, 2.8257598876953125, -0.9542694091796875, -0.2520599365234375, 0.7973823547363281, -0.451507568359375, 1.0251388549804688, -0.4418601989746094, 1.25604248046875, 1.7778167724609375, 1.7926750183105469, 0.466064453125, 0.039337158203125, 3.0092239379882812, 1.024505615234375, 0.0948638916015625, 0.24960708618164062, 1.1889095306396484, 0.5809555053710938, 0.6272201538085938, 2.0204238891601562, -0.49884033203125, 1.3954658508300781, 0.6357765197753906, 1.0232772827148438, 0.37261962890625, 0.7368488311767578, 0.4307861328125, 0.07801055908203125, 1.0743331909179688, 0.0521087646484375, -0.699005126953125, 0.875579833984375, 0.6938018798828125, 1.0726699829101562, -0.378631591796875, 1.9895172119140625, 0.4902801513671875, -0.3292236328125, 0.9024429321289062, 0.3526458740234375, -1.00421142578125, 0.3529815673828125, 4.6280975341796875, 1.101654052734375, 1.9848403930664062, -0.7928047180175781, 3.1408843994140625, 0.46898651123046875, 1.488912582397461, -0.227142333984375, 0.16245269775390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000169.npy"}
{"epoch": 0.25547996976568405, "step": 170, "batch_size": 64, "mean": 0.5480765104293823, "std": 0.8882081508636475, "min": -1.5771675109863281, "p10": -0.5646236419677734, "median": 0.6299343109130859, "p90": 1.8471946716308594, "max": 2.2750167846679688, "pos_frac": 0.765625, "sample": [0.73187255859375, 0.7411994934082031, 0.16686058044433594, 1.0469284057617188, -0.022871017456054688, 2.2750167846679688, 0.7482738494873047, 0.9562397003173828, -0.391082763671875, 1.9925346374511719, -0.6533279418945312, 0.1652545928955078, 1.4502029418945312, 1.8692474365234375, 1.5193862915039062, 0.09704208374023438, 0.7767772674560547, 0.16197967529296875, 0.7889785766601562, -0.08721923828125, -0.4690742492675781, 1.8336563110351562, -0.04102325439453125, 0.5738945007324219, -0.45513153076171875, 0.790771484375, -1.2595596313476562, 0.23342132568359375, -1.5771675109863281, 0.5627288818359375, 1.2515716552734375, 0.38683319091796875, 0.9852218627929688, 0.29937171936035156, -1.0187492370605469, 0.1276092529296875, -1.563507080078125, 1.852996826171875, 1.9577102661132812, 0.4938812255859375, 1.0813446044921875, -0.39107322692871094, 0.2420673370361328, 1.8837432861328125, 0.20525360107421875, 2.2456512451171875, 0.8199462890625, 1.1405868530273438, 1.1268959045410156, 0.6452713012695312, -0.5990142822265625, -0.4843788146972656, 0.6849594116210938, 1.198974609375, 0.123809814453125, 0.21441078186035156, 0.7059555053710938, 0.1423492431640625, 1.003448486328125, 0.6145973205566406, 1.2645034790039062, 1.16119384765625, 1.54901123046875, -0.801361083984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000170.npy"}
{"epoch": 0.25699168556311414, "step": 171, "batch_size": 64, "mean": 0.7254012823104858, "std": 0.9969090819358826, "min": -2.5400390625, "p10": -0.25383529663085935, "median": 0.6146736145019531, "p90": 2.146918869018555, "max": 2.958282470703125, "pos_frac": 0.765625, "sample": [1.1219406127929688, 0.8124885559082031, 1.0117340087890625, 1.0524368286132812, 0.6214218139648438, 2.4109878540039062, 0.35086822509765625, 1.7852020263671875, -0.10160636901855469, 0.4360847473144531, 0.07493400573730469, 0.59613037109375, 2.034320831298828, 0.6079254150390625, 2.2406044006347656, 0.19697189331054688, 1.0815887451171875, 0.3283100128173828, 0.9764480590820312, -0.04108428955078125, -0.057399749755859375, 0.030809402465820312, 1.0189533233642578, 0.788421630859375, 1.3387184143066406, -0.12017822265625, -0.3287353515625, 0.4672088623046875, 2.1951751708984375, -1.0802268981933594, 0.01203155517578125, 0.478485107421875, 1.6737174987792969, 0.20766448974609375, 0.4787940979003906, 1.5560646057128906, 0.47023773193359375, -0.651519775390625, 1.7300910949707031, 2.958282470703125, -2.5400390625, 0.3369407653808594, 0.8946990966796875, 0.8885421752929688, 1.3727035522460938, 2.2030181884765625, 1.6368408203125, 0.27031707763671875, -0.12091064453125, 1.8448257446289062, 0.7377548217773438, 1.8229827880859375, 1.1794071197509766, 1.2848052978515625, -0.038578033447265625, -0.0133514404296875, 2.4077911376953125, -0.2571563720703125, -0.24608612060546875, -1.32159423828125, 2.6029510498046875, 0.8091506958007812, 0.3919830322265625, -0.48561859130859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000171.npy"}
{"epoch": 0.2585034013605442, "step": 172, "batch_size": 64, "mean": 0.6935364007949829, "std": 0.9573734402656555, "min": -1.6438941955566406, "p10": -0.7465829849243164, "median": 0.7800140380859375, "p90": 1.8635766983032231, "max": 3.4271697998046875, "pos_frac": 0.78125, "sample": [-0.8524017333984375, -0.2066802978515625, 0.25730133056640625, 1.26904296875, -0.14269256591796875, 0.5366153717041016, -0.16424560546875, 1.0838279724121094, -0.01055908203125, 1.9873504638671875, 1.986907958984375, 1.7042083740234375, 0.7487945556640625, -0.27574920654296875, 0.600189208984375, 2.919361114501953, 1.0181961059570312, -0.8757553100585938, -0.792938232421875, 0.5367813110351562, 2.038848876953125, 0.8457183837890625, -0.7603473663330078, -1.2534713745117188, 1.2831039428710938, 1.0110588073730469, 0.8032989501953125, 1.26153564453125, 0.8685073852539062, 0.9287643432617188, 1.0420455932617188, 0.9438400268554688, 0.405548095703125, 3.4271697998046875, -0.3945121765136719, 0.0038623809814453125, 1.35235595703125, 1.0270843505859375, 2.1147117614746094, 0.9713821411132812, 0.29843711853027344, 0.7567291259765625, 0.721527099609375, 0.6577587127685547, 0.9719123840332031, -0.8602485656738281, 0.8390045166015625, 1.0033721923828125, 1.3218231201171875, 0.8606815338134766, -0.7144660949707031, 1.727386474609375, 1.903921127319336, -1.6438941955566406, 0.5981578826904297, 1.483795166015625, 0.00035858154296875, 0.24462127685546875, 0.27963829040527344, 1.641439437866211, 0.100341796875, 0.5002689361572266, 1.769439697265625, 0.6762619018554688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000172.npy"}
{"epoch": 0.2600151171579743, "step": 173, "batch_size": 64, "mean": 0.7386313080787659, "std": 1.1737096309661865, "min": -1.3042144775390625, "p10": -0.4921483993530273, "median": 0.4627361297607422, "p90": 2.142719268798828, "max": 4.491363525390625, "pos_frac": 0.75, "sample": [-0.7738304138183594, 0.8059844970703125, 0.00075531005859375, 1.72149658203125, 0.307586669921875, 1.3000946044921875, 0.6923160552978516, -0.10526275634765625, -0.09308242797851562, 0.41858673095703125, 0.2678070068359375, 0.7416572570800781, 1.9001388549804688, -1.269296646118164, -0.46312904357910156, 0.990936279296875, 2.3772430419921875, -0.21357345581054688, 1.1844520568847656, 0.25991058349609375, 1.924468994140625, 2.1181259155273438, -0.05013275146484375, 4.10577392578125, -1.036905288696289, -0.250701904296875, 0.6852188110351562, 0.4568634033203125, 0.6462249755859375, -0.20609283447265625, -0.5045852661132812, 2.2000732421875, 0.3540973663330078, 2.15325927734375, 0.83380126953125, 0.43018341064453125, 0.9369010925292969, -0.5180931091308594, 1.0884246826171875, 4.01690673828125, 0.33736419677734375, 0.13076019287109375, 0.5341796875, 1.5944900512695312, 0.5768566131591797, -1.3042144775390625, 0.4622230529785156, 0.46324920654296875, 2.82574462890625, 1.6083946228027344, 1.5628890991210938, -0.2523937225341797, 0.594207763671875, 1.0334396362304688, 1.940093994140625, 0.22684478759765625, -0.9022903442382812, -0.1424713134765625, 0.3513317108154297, 4.491363525390625, 0.3043670654296875, 0.6623878479003906, 0.32012939453125, 0.418853759765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000173.npy"}
{"epoch": 0.2615268329554044, "step": 174, "batch_size": 64, "mean": 0.7647665143013, "std": 1.1052614450454712, "min": -1.7018871307373047, "p10": -0.47950477600097646, "median": 0.6878318786621094, "p90": 2.0809249877929688, "max": 3.9981613159179688, "pos_frac": 0.71875, "sample": [0.6148757934570312, -0.139923095703125, 1.3211097717285156, 0.7246208190917969, 0.7896060943603516, -1.255767822265625, 0.5375137329101562, 1.9500885009765625, 1.5940170288085938, 1.0825309753417969, 1.31121826171875, 1.6632423400878906, -0.1947174072265625, -0.00583648681640625, 1.88934326171875, -0.40117454528808594, 0.4964942932128906, 1.4170913696289062, 0.707183837890625, -1.7018871307373047, 1.6050186157226562, 1.0455551147460938, -0.8406410217285156, 0.9000816345214844, 2.2628326416015625, -0.5130748748779297, 0.097991943359375, 0.5216064453125, 2.1306304931640625, 0.5093994140625, 0.6684799194335938, 3.9981613159179688, 2.476003646850586, -0.8711643218994141, 1.2149391174316406, 1.8890380859375, 0.3722991943359375, 2.932464599609375, -0.23596572875976562, 1.767059326171875, 0.1498870849609375, 2.0314178466796875, 2.015186309814453, -0.3590812683105469, 0.02916717529296875, 1.4022750854492188, 1.28936767578125, -0.17426681518554688, 2.2479019165039062, -0.014123916625976562, -1.3574600219726562, 1.9788436889648438, 0.08698272705078125, 0.13075828552246094, 2.102142333984375, -0.85479736328125, 0.38722991943359375, -0.15209197998046875, 1.2305717468261719, -0.22422027587890625, 1.3832626342773438, 0.46656036376953125, 1.0853805541992188, -0.26618194580078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000174.npy"}
{"epoch": 0.26303854875283444, "step": 175, "batch_size": 64, "mean": 0.776630163192749, "std": 1.3012498617172241, "min": -3.3328857421875, "p10": -0.5534610748291015, "median": 0.8303966522216797, "p90": 2.2615188598632816, "max": 3.2037925720214844, "pos_frac": 0.75, "sample": [-0.6042289733886719, 0.7630825042724609, -0.4755401611328125, 0.8977108001708984, 0.13309478759765625, 1.5556659698486328, 0.11412239074707031, 3.110595703125, 1.7032012939453125, -0.4450206756591797, 0.23270416259765625, 1.436492919921875, 0.44046974182128906, 0.44196319580078125, -0.33481597900390625, 2.148458480834961, 1.531005859375, 0.9902191162109375, 1.678323745727539, 0.3192100524902344, 0.9225387573242188, 0.9977951049804688, 0.03223419189453125, 2.5032806396484375, 1.6790008544921875, 1.4427490234375, -0.040721893310546875, 0.2527008056640625, 1.936859130859375, -3.3328857421875, 2.1064834594726562, 1.1982955932617188, 0.5010910034179688, 2.209320068359375, -0.9592132568359375, 0.022500991821289062, 0.394287109375, 2.7661781311035156, -0.4840202331542969, -0.3587646484375, 0.993377685546875, -0.583221435546875, -0.02764129638671875, 0.9818458557128906, -2.3549957275390625, -1.855438232421875, 1.0877227783203125, -0.3910255432128906, 2.0908737182617188, 2.0670337677001953, 1.1300411224365234, 1.7817649841308594, 0.028533935546875, -1.2888412475585938, 0.6291961669921875, 1.9749984741210938, -0.2599830627441406, 2.0810585021972656, 2.2838897705078125, 2.6613006591796875, 0.31436920166015625, 2.997344970703125, 0.7319107055664062, 3.2037925720214844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000175.npy"}
{"epoch": 0.26455026455026454, "step": 176, "batch_size": 64, "mean": 0.6184365153312683, "std": 1.289999008178711, "min": -2.93310546875, "p10": -0.6372177124023437, "median": 0.5022487640380859, "p90": 2.1889270782470707, "max": 4.94097900390625, "pos_frac": 0.6875, "sample": [1.3120269775390625, -1.8084640502929688, 1.836883544921875, 1.28240966796875, 0.0189971923828125, 3.624542236328125, 0.0139007568359375, 0.5819778442382812, 0.2733192443847656, -0.5657501220703125, -0.6678466796875, -0.7271881103515625, -0.8898200988769531, 0.7753944396972656, -0.38173675537109375, 1.9927921295166016, -0.19645309448242188, 1.1213798522949219, -1.4910125732421875, 0.11975479125976562, 0.4225196838378906, 0.06304740905761719, 0.6777591705322266, 2.7175636291503906, 0.6206207275390625, 0.619293212890625, -0.1797637939453125, 0.6965484619140625, -0.48403167724609375, 1.0690994262695312, 2.6767578125, -0.12203407287597656, -0.9600677490234375, 1.4662017822265625, -0.05963325500488281, 1.6345100402832031, 0.11113739013671875, 0.6814422607421875, 0.0928497314453125, -0.44261932373046875, -0.010589599609375, 0.415863037109375, 1.2528915405273438, 2.0903587341308594, 0.8394317626953125, 2.231170654296875, -0.47646331787109375, -0.4239349365234375, 1.8358993530273438, 2.3793258666992188, 0.010009765625, 0.8552017211914062, -0.2841053009033203, 2.7781448364257812, 0.8386878967285156, 0.9472122192382812, 4.94097900390625, 1.9229965209960938, 0.023773193359375, 1.334808349609375, 1.5750770568847656, -2.93310546875, 0.42151641845703125, -0.5115203857421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000176.npy"}
{"epoch": 0.2660619803476946, "step": 177, "batch_size": 64, "mean": 1.0227817296981812, "std": 1.2106256484985352, "min": -2.4633560180664062, "p10": -0.13809909820556632, "median": 1.0163602828979492, "p90": 2.4999216079711917, "max": 4.3414764404296875, "pos_frac": 0.875, "sample": [0.9533920288085938, 0.8066444396972656, 1.688924789428711, 4.3414764404296875, 0.89459228515625, 0.4840068817138672, 0.25446319580078125, 2.593536376953125, 1.72998046875, 1.6529159545898438, 3.3061599731445312, 1.05584716796875, 2.168069839477539, 0.09794235229492188, 0.28763580322265625, -0.4069404602050781, 1.377655029296875, 0.34856414794921875, -0.8471298217773438, 1.6955490112304688, 1.4452056884765625, 1.54400634765625, 0.9794216156005859, 3.4880123138427734, 1.2879791259765625, 0.7667694091796875, 1.265390396118164, 1.7715301513671875, 0.16649818420410156, 0.11267471313476562, 0.3487548828125, 0.5051498413085938, 2.2689666748046875, 2.9730224609375, 0.02503204345703125, 1.9911479949951172, 1.1232948303222656, 0.5162506103515625, 1.225372314453125, 0.21012115478515625, -0.5412445068359375, 0.8540897369384766, 1.6096992492675781, -2.4633560180664062, 2.0078811645507812, -0.9788055419921875, 2.4285888671875, 1.213897705078125, 1.4272308349609375, 0.07851219177246094, 0.807159423828125, 0.30319976806640625, -2.3246307373046875, 2.921201705932617, 1.0532989501953125, 1.7683029174804688, 1.2176132202148438, 1.8731536865234375, 0.146697998046875, -0.04645347595214844, 0.4552154541015625, 2.5304927825927734, -0.17737579345703125, 0.7957763671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000177.npy"}
{"epoch": 0.2675736961451247, "step": 178, "batch_size": 64, "mean": 0.592870831489563, "std": 1.0015522241592407, "min": -1.386688232421875, "p10": -0.5451969146728516, "median": 0.48508548736572266, "p90": 1.790237617492676, "max": 3.3669509887695312, "pos_frac": 0.703125, "sample": [-0.3243122100830078, 0.611175537109375, 1.5351448059082031, -0.1075286865234375, 1.68048095703125, 0.3389854431152344, -0.20604705810546875, 1.762716293334961, 0.32302093505859375, 0.3185920715332031, -0.9678268432617188, 1.4483489990234375, 0.5915985107421875, -0.5529537200927734, 1.5321731567382812, 1.1208000183105469, 1.5964889526367188, 1.0771942138671875, -0.16658782958984375, 0.2474956512451172, 1.8637275695800781, 0.22194671630859375, -0.6287994384765625, -0.137481689453125, 1.3550148010253906, 0.8726959228515625, 0.8488693237304688, -0.154144287109375, 0.6822357177734375, 0.1939849853515625, 0.3736724853515625, 0.0280303955078125, -0.2402191162109375, 1.8488731384277344, -0.9673309326171875, 1.1167716979980469, -0.503204345703125, 0.8713760375976562, 3.3080902099609375, -0.13716888427734375, -1.386688232421875, 1.3309249877929688, 3.3669509887695312, 0.7483634948730469, -1.3289031982421875, 1.1044845581054688, 0.08707427978515625, 0.0565185546875, 1.8918304443359375, 1.1312541961669922, 0.74237060546875, 2.7416534423828125, 0.45214271545410156, 1.6967544555664062, 0.3609962463378906, -0.33791351318359375, 1.802032470703125, 0.5604248046875, -0.45417022705078125, -0.6947784423828125, 0.5180282592773438, 0.32944488525390625, 1.0761337280273438, -0.5270977020263672], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000178.npy"}
{"epoch": 0.2690854119425548, "step": 179, "batch_size": 64, "mean": 0.6777083873748779, "std": 1.1805986166000366, "min": -1.5077972412109375, "p10": -0.6399681091308593, "median": 0.6136093139648438, "p90": 2.164153289794922, "max": 3.9707794189453125, "pos_frac": 0.6875, "sample": [0.8726119995117188, -0.19464111328125, 0.5867691040039062, 1.630645751953125, 0.5773506164550781, 0.7610645294189453, 0.5289516448974609, 0.9661712646484375, 3.8187255859375, 1.7419242858886719, 1.4732170104980469, 3.0858325958251953, 0.9923000335693359, -0.7163772583007812, 1.0190677642822266, -0.574005126953125, -1.2445755004882812, 0.08209991455078125, -0.024829864501953125, 1.5037689208984375, 0.0055522918701171875, 1.5882453918457031, -1.5077972412109375, 0.4135589599609375, 2.3519210815429688, 0.18892669677734375, -0.6251640319824219, 1.17608642578125, -0.013837814331054688, -0.4078044891357422, 1.2192344665527344, 2.1258697509765625, 0.7354774475097656, -0.299896240234375, -0.21468353271484375, -0.6134834289550781, -0.3836517333984375, 2.9764556884765625, 0.2766265869140625, -0.2663002014160156, 0.32010650634765625, 1.5025711059570312, 0.8467597961425781, 1.0525245666503906, 1.3141860961914062, 1.1886444091796875, 2.170806884765625, 0.6404495239257812, 2.1486282348632812, 3.9707794189453125, 1.4217720031738281, 0.3800468444824219, 2.621337890625, 0.1586761474609375, -0.2559032440185547, 0.734466552734375, 0.17380523681640625, -0.8217926025390625, 0.7552528381347656, -0.9515228271484375, -1.4372482299804688, -0.3602714538574219, 0.83416748046875, -0.6463127136230469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000179.npy"}
{"epoch": 0.2705971277399849, "step": 180, "batch_size": 64, "mean": 0.6426514387130737, "std": 1.0895947217941284, "min": -1.884918212890625, "p10": -0.6971748352050781, "median": 0.6550521850585938, "p90": 2.1868223190307625, "max": 3.2340850830078125, "pos_frac": 0.671875, "sample": [1.231231689453125, 0.6699028015136719, 1.551177978515625, -0.01493072509765625, 0.74957275390625, 1.4802360534667969, -0.5848846435546875, 0.6320610046386719, 0.7179088592529297, 0.14377212524414062, 1.0953006744384766, 1.5528488159179688, -1.4535064697265625, -0.31858062744140625, 1.106536865234375, 0.31036376953125, -1.884918212890625, 1.5601806640625, -0.053741455078125, -0.8736839294433594, 2.6569137573242188, 0.4937171936035156, 1.6135616302490234, 0.25818634033203125, 0.7991046905517578, 1.7628555297851562, -0.6552085876464844, 0.8055267333984375, 0.2798023223876953, 1.1414833068847656, 1.2160606384277344, 2.2694091796875, -1.3013153076171875, -0.1715850830078125, 1.502349853515625, 1.30682373046875, -0.029157638549804688, 2.4834823608398438, 0.42391395568847656, -0.18162155151367188, 0.28285789489746094, 0.9852867126464844, 1.994119644165039, -0.29804420471191406, -0.09228134155273438, 0.5194091796875, 2.3286209106445312, 0.05963134765625, 3.2340850830078125, -0.254119873046875, 1.0376815795898438, -0.7151603698730469, 1.5020523071289062, -0.7570266723632812, -0.362274169921875, 1.0839958190917969, 2.5359153747558594, -0.5037841796875, 1.2154884338378906, -0.9503707885742188, 1.3907318115234375, 0.6402015686035156, 2.5355911254882812, -0.5740642547607422], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000180.npy"}
{"epoch": 0.272108843537415, "step": 181, "batch_size": 64, "mean": 0.9661591053009033, "std": 1.0475860834121704, "min": -0.927215576171875, "p10": -0.22743320465087888, "median": 0.831385612487793, "p90": 2.325623321533203, "max": 3.4687652587890625, "pos_frac": 0.796875, "sample": [-0.45984649658203125, 1.1244468688964844, -0.5890350341796875, -0.24155807495117188, 0.5637779235839844, 0.5052509307861328, 0.06018829345703125, 0.15618896484375, 1.1037101745605469, 0.421630859375, 1.5523757934570312, -0.054470062255859375, 1.3829154968261719, 1.5136566162109375, 0.8860092163085938, 2.6090164184570312, 0.8990097045898438, 2.3219223022460938, 1.0361480712890625, -0.91790771484375, 1.0020751953125, 3.1019630432128906, 1.6260032653808594, 1.6365737915039062, 2.207794189453125, 2.8563079833984375, 0.8097305297851562, 3.2056121826171875, -0.1551513671875, -0.5547943115234375, 3.1855010986328125, 1.4718780517578125, 2.32720947265625, -0.7468700408935547, 0.8006439208984375, -0.0247039794921875, -0.060520172119140625, 2.31005859375, 0.46836090087890625, 0.31108856201171875, 1.5871696472167969, 0.5187606811523438, 1.4645195007324219, 2.0957412719726562, 1.6517601013183594, 0.3960552215576172, 0.6438751220703125, 0.8530406951904297, 0.804962158203125, -0.1944751739501953, 0.27696990966796875, 2.02569580078125, 3.4687652587890625, 0.5383224487304688, 0.5928802490234375, 1.1200103759765625, 1.132537841796875, 0.6585235595703125, 1.3216571807861328, -0.049457550048828125, 0.7478408813476562, 0.07254219055175781, 1.3815116882324219, -0.927215576171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000181.npy"}
{"epoch": 0.273620559334845, "step": 182, "batch_size": 64, "mean": 0.7490732669830322, "std": 1.298134684562683, "min": -3.6471023559570312, "p10": -0.7380325317382811, "median": 0.6835594177246094, "p90": 2.413564491271973, "max": 3.3641510009765625, "pos_frac": 0.734375, "sample": [3.3641510009765625, -0.46677398681640625, -0.3992195129394531, 0.28763580322265625, 1.6492156982421875, 2.423187255859375, 0.5960330963134766, -0.786285400390625, -1.1923675537109375, 2.2872161865234375, 1.9655475616455078, 0.39414215087890625, 2.30828857421875, -1.111419677734375, 1.1296157836914062, 0.93829345703125, 0.747467041015625, 0.47180938720703125, 2.8028564453125, 0.170379638671875, 1.0392913818359375, 0.1355762481689453, 0.6693115234375, -0.1283111572265625, 0.1450347900390625, -0.6254425048828125, 2.391111373901367, 1.315216064453125, 2.49835205078125, 2.512481689453125, 2.66302490234375, 1.0677986145019531, 1.326263427734375, 0.6978073120117188, 1.4649505615234375, -1.05694580078125, -0.19857025146484375, 0.17253875732421875, 2.039886474609375, 0.061614990234375, 1.049560546875, -0.9567852020263672, 2.7668075561523438, -0.024383544921875, 1.8619842529296875, 2.3467254638671875, 0.17254257202148438, 0.42858123779296875, 1.2693405151367188, 0.6120376586914062, 0.9148101806640625, 2.0085601806640625, -0.045379638671875, 2.1466617584228516, 1.6403579711914062, 0.24854278564453125, -0.5609970092773438, 1.1071968078613281, 1.1473045349121094, 0.3484077453613281, -0.27120208740234375, -2.2239913940429688, -0.16965293884277344, -3.6471023559570312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000182.npy"}
{"epoch": 0.2751322751322751, "step": 183, "batch_size": 64, "mean": 0.41555219888687134, "std": 1.0706548690795898, "min": -2.4967880249023438, "p10": -1.0054672241210938, "median": 0.34752941131591797, "p90": 1.7723567962646485, "max": 2.7613677978515625, "pos_frac": 0.609375, "sample": [0.10678482055664062, 1.6291351318359375, -0.148651123046875, -0.12865066528320312, 0.6061859130859375, -1.2177886962890625, 1.1026458740234375, 1.8403244018554688, -0.6574821472167969, -0.838897705078125, -2.4967880249023438, 0.6263427734375, 0.47403907775878906, -0.2793426513671875, 0.18471336364746094, 1.953268051147461, -0.47445106506347656, -0.518035888671875, 2.1753158569335938, 0.7161998748779297, 1.7682952880859375, -1.66717529296875, 1.7580795288085938, -0.12715530395507812, 0.13946914672851562, -0.1304779052734375, 0.7382354736328125, 0.9031829833984375, -0.867156982421875, -1.01947021484375, 2.7613677978515625, 1.8018798828125, -0.080047607421875, 0.9327545166015625, -1.1968460083007812, 1.0759811401367188, 1.1227073669433594, 1.521270751953125, -1.1724853515625, 1.7740974426269531, -1.0441093444824219, 0.19845199584960938, 1.04644775390625, 0.3517475128173828, -0.02980804443359375, 0.8703575134277344, 0.6585922241210938, 0.3433113098144531, -0.22247314453125, -0.10600090026855469, 0.2879486083984375, 1.388580322265625, 0.19525146484375, 0.7300796508789062, 2.6002197265625, -0.14908409118652344, 1.234130859375, -0.9727935791015625, -0.3197212219238281, -0.4691429138183594, 1.3540191650390625, 1.4654693603515625, 1.2006378173828125, 1.2918548583984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000183.npy"}
{"epoch": 0.2766439909297052, "step": 184, "batch_size": 64, "mean": 0.6851844191551208, "std": 0.9078963398933411, "min": -0.6450424194335938, "p10": -0.31958751678466796, "median": 0.5161552429199219, "p90": 2.0474807739257814, "max": 3.127552032470703, "pos_frac": 0.734375, "sample": [0.2724609375, 2.330596923828125, 1.3001728057861328, 0.8488349914550781, 1.2623977661132812, 0.0150146484375, -0.6450424194335938, 0.1863250732421875, -0.12794113159179688, 0.8331146240234375, -0.01529693603515625, 0.8738174438476562, -0.3248100280761719, 0.6665248870849609, 1.4803009033203125, -0.5330238342285156, 1.8996963500976562, 2.3044586181640625, 0.08869171142578125, 2.4449615478515625, 0.027862548828125, 2.704519271850586, 0.5974502563476562, 2.36602783203125, -0.067718505859375, 1.0833892822265625, -0.08419036865234375, 2.0898284912109375, 3.127552032470703, 0.7851066589355469, 1.2464065551757812, 0.10269927978515625, -0.1505126953125, 0.5316810607910156, -0.022624969482421875, 0.3143310546875, 1.8023757934570312, 1.3997325897216797, -0.15877914428710938, 1.5866317749023438, 0.0059032440185546875, -0.033203125, 1.94866943359375, 0.05776214599609375, 1.5122833251953125, 1.1353588104248047, -0.2829132080078125, 0.3678131103515625, 0.11328887939453125, 1.4194183349609375, -0.5982704162597656, 0.1608428955078125, 0.19994735717773438, 0.5006294250488281, 0.5931243896484375, 0.833282470703125, -0.47969627380371094, 1.140380859375, -0.3473491668701172, 0.5736160278320312, 0.2970466613769531, -0.3375835418701172, -0.3074016571044922, 0.9358291625976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000184.npy"}
{"epoch": 0.2781557067271353, "step": 185, "batch_size": 64, "mean": 0.7103538513183594, "std": 1.3366320133209229, "min": -2.0654067993164062, "p10": -0.8371513366699217, "median": 0.6469135284423828, "p90": 2.49031982421875, "max": 3.7982635498046875, "pos_frac": 0.71875, "sample": [0.6535682678222656, 3.7982635498046875, -0.6755905151367188, 1.0036449432373047, 2.4781646728515625, -2.0091781616210938, 1.0637378692626953, 0.16324615478515625, 0.122039794921875, -0.705810546875, 2.4955291748046875, 3.0887832641601562, 0.9317779541015625, -0.3187408447265625, 3.714874267578125, 1.0652103424072266, 2.4735679626464844, 0.34676361083984375, 1.0183219909667969, 1.1611385345458984, -0.6543350219726562, 1.7569122314453125, 0.7422828674316406, 1.3056869506835938, 0.8658714294433594, 2.0887718200683594, 1.0243186950683594, 0.9550018310546875, 1.4750518798828125, 0.485748291015625, -0.5502510070800781, 0.10774993896484375, 0.9542198181152344, 2.5933380126953125, -1.2620773315429688, 2.0619659423828125, -0.0795745849609375, 1.488250732421875, 0.5238151550292969, 0.15634536743164062, -2.0654067993164062, 0.6402587890625, -0.6224308013916016, 1.3094940185546875, 1.0409736633300781, -2.0640716552734375, 1.494710922241211, 0.4835853576660156, 2.36834716796875, -0.10540008544921875, 0.2191314697265625, -1.6854324340820312, 2.849853515625, -1.151947021484375, 0.1876678466796875, 0.3992881774902344, -0.8934402465820312, -0.3538036346435547, -0.04739570617675781, -0.19722747802734375, 0.15334129333496094, 0.62042236328125, 2.728179931640625, 2.245542526245117], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000185.npy"}
{"epoch": 0.2796674225245654, "step": 186, "batch_size": 64, "mean": 0.6798614859580994, "std": 1.0568002462387085, "min": -2.1789703369140625, "p10": -0.38770370483398436, "median": 0.5683879852294922, "p90": 2.058578681945801, "max": 3.6079559326171875, "pos_frac": 0.6875, "sample": [2.0954208374023438, 3.2010345458984375, -1.03363037109375, 0.9369640350341797, 0.508636474609375, 0.380035400390625, 1.7549285888671875, -0.6272048950195312, -0.5847015380859375, 1.30657958984375, 0.00847625732421875, -0.246795654296875, 0.5568389892578125, 2.0865840911865234, 1.95367431640625, 1.93841552734375, 0.70477294921875, -0.010875701904296875, -0.02552032470703125, -0.3736419677734375, 0.43631553649902344, -0.019750595092773438, 1.419297218322754, 2.3488006591796875, -2.1789703369140625, -0.027313232421875, -0.25299835205078125, -0.6506843566894531, -0.0645904541015625, 0.5799369812011719, 0.754180908203125, 1.8215045928955078, -0.2262115478515625, 0.020477294921875, -0.14823341369628906, -0.3363361358642578, 1.029296875, 1.9932327270507812, 1.5415992736816406, 0.6188163757324219, 0.21762466430664062, -0.39373016357421875, -0.1269989013671875, 0.3813629150390625, -0.02880859375, 0.0749053955078125, 1.2600078582763672, 1.4361114501953125, 0.1191253662109375, 0.6149978637695312, 2.1764678955078125, 1.0363216400146484, 1.9411773681640625, 2.616363525390625, 0.1519775390625, 0.7957305908203125, 3.6079559326171875, 1.1578083038330078, 0.6665267944335938, 1.3161773681640625, 1.17706298828125, 0.1426239013671875, -0.72149658203125, 0.7034778594970703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000186.npy"}
{"epoch": 0.2811791383219955, "step": 187, "batch_size": 64, "mean": 0.8149705529212952, "std": 1.5193383693695068, "min": -4.548431396484375, "p10": -0.3702386856079101, "median": 0.9097537994384766, "p90": 2.2854255676269535, "max": 4.9098663330078125, "pos_frac": 0.796875, "sample": [1.2659988403320312, 0.1717529296875, 3.629425048828125, 2.1365432739257812, 4.9098663330078125, 1.3743515014648438, 2.03662109375, 0.13860321044921875, 1.1349334716796875, 0.9417572021484375, -0.278656005859375, 1.829742431640625, 0.4785575866699219, 2.9438629150390625, 1.1710052490234375, 0.9778461456298828, -0.06143379211425781, 2.1524505615234375, 1.2183837890625, 0.8777503967285156, -2.820098876953125, -2.3985462188720703, 2.3424148559570312, 0.20613670349121094, 1.0492134094238281, -0.18372344970703125, 0.19269180297851562, 0.363555908203125, 1.1959228515625, 1.588836669921875, 1.5065994262695312, 1.6127853393554688, -1.4679031372070312, 0.3696556091308594, 1.1529617309570312, 3.665821075439453, 0.4457359313964844, -0.112335205078125, -0.337646484375, 0.6934719085693359, -0.38420677185058594, -0.6991729736328125, 1.7689590454101562, -0.29070281982421875, 0.0975189208984375, 0.9784164428710938, 1.8133926391601562, 0.5251388549804688, 2.3860931396484375, 1.7784881591796875, 1.5590362548828125, 0.16713714599609375, 1.0030288696289062, 4.834625244140625, 0.3587799072265625, 0.1884765625, 0.084625244140625, 1.3342056274414062, 0.248046875, 0.5775833129882812, 0.42568206787109375, -1.1911354064941406, -4.548431396484375, 1.027618408203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000187.npy"}
{"epoch": 0.28269085411942557, "step": 188, "batch_size": 64, "mean": 0.7770620584487915, "std": 1.272729516029358, "min": -4.2092437744140625, "p10": -0.44824295043945295, "median": 0.7131814956665039, "p90": 2.291024208068848, "max": 3.798431396484375, "pos_frac": 0.828125, "sample": [0.6746673583984375, 0.32932090759277344, 2.027698516845703, -0.5547771453857422, 0.1307830810546875, 1.4815597534179688, 1.832427978515625, 0.785003662109375, 0.0491943359375, 0.2214202880859375, 0.13191986083984375, 2.22406005859375, 1.1928863525390625, -0.2094879150390625, 0.10717201232910156, 0.5737056732177734, 1.7340011596679688, 1.8213119506835938, 0.24250030517578125, -0.1262950897216797, 0.7516956329345703, -0.7191314697265625, 0.3602142333984375, 0.06285858154296875, 1.5064353942871094, 0.7779388427734375, -0.5279693603515625, 1.426483154296875, 0.050457000732421875, 2.866985321044922, 1.6362628936767578, 0.41756439208984375, 1.646881103515625, 2.5337142944335938, 1.5267410278320312, -0.23576736450195312, 2.4863510131835938, 1.6353607177734375, 0.06472015380859375, 0.18076324462890625, 3.0616722106933594, 0.4898681640625, 0.8902511596679688, 0.891448974609375, 0.5717620849609375, -0.967498779296875, 0.9793701171875, 1.6012191772460938, 0.9595375061035156, 0.41463661193847656, 3.798431396484375, -2.9259033203125, 0.2593345642089844, 0.5526161193847656, 1.3496856689453125, 1.7900123596191406, 0.2378368377685547, -0.6988754272460938, 2.4356307983398438, 2.319723129272461, 1.2256755828857422, 1.849365234375, -0.26221466064453125, -4.2092437744140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000188.npy"}
{"epoch": 0.2842025699168556, "step": 189, "batch_size": 64, "mean": 0.8247175216674805, "std": 1.0902695655822754, "min": -1.7923583984375, "p10": -0.5342132568359375, "median": 0.7757673263549805, "p90": 2.327540397644044, "max": 3.0952682495117188, "pos_frac": 0.765625, "sample": [0.18561553955078125, 1.3426036834716797, 0.7750301361083984, 1.0741729736328125, 0.7765045166015625, 2.153594970703125, 1.5140151977539062, 1.1827163696289062, 0.36900901794433594, 1.3355560302734375, -1.7923583984375, 1.5260276794433594, -0.5598297119140625, 1.3126068115234375, 0.5906448364257812, -0.25270652770996094, 3.0952682495117188, 1.5569534301757812, -1.4084663391113281, 0.8079795837402344, 0.49414825439453125, 1.7264575958251953, -0.09174728393554688, 1.2625732421875, 0.15920257568359375, 0.7409515380859375, 1.0688323974609375, 0.8285064697265625, 0.9074802398681641, 0.0146942138671875, 2.0437698364257812, -0.498260498046875, 1.2182273864746094, 3.0930213928222656, 2.904735565185547, 1.3061676025390625, 0.687286376953125, -0.06015777587890625, 0.12570953369140625, 1.4033279418945312, 2.397798538208008, -0.046356201171875, 0.43274688720703125, 0.175689697265625, 0.8682937622070312, 1.5637092590332031, -0.9029502868652344, -0.6255302429199219, 0.2137451171875, 0.245635986328125, -0.280487060546875, 2.4706668853759766, -0.057708740234375, 2.0261917114257812, 2.163604736328125, -0.39163970947265625, -1.0975265502929688, -0.54962158203125, 0.7270088195800781, 2.642608642578125, 0.6031875610351562, 0.6364593505859375, 2.6439132690429688, 2.002614974975586], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000189.npy"}
{"epoch": 0.2857142857142857, "step": 190, "batch_size": 64, "mean": 0.821425199508667, "std": 1.1322664022445679, "min": -2.1625823974609375, "p10": -0.28799457550048824, "median": 0.7140998840332031, "p90": 2.432204818725586, "max": 3.0603561401367188, "pos_frac": 0.796875, "sample": [0.8040504455566406, 1.56536865234375, 2.9531631469726562, -2.1625823974609375, 0.9626007080078125, 1.5036239624023438, 0.2796783447265625, 0.2167205810546875, -0.8777999877929688, 2.7632598876953125, 0.37404632568359375, 1.0635986328125, 0.6064281463623047, -0.13941192626953125, 0.44196128845214844, 0.12697601318359375, 0.7099266052246094, 1.572540283203125, 1.000539779663086, 0.7370185852050781, 0.7846260070800781, 2.394855499267578, -0.028961181640625, -0.17979049682617188, -0.6028060913085938, 1.1618194580078125, -2.0773468017578125, 0.13454055786132812, 0.304718017578125, 0.5502567291259766, 2.3508453369140625, 0.20972824096679688, 0.8205108642578125, 0.8775634765625, 0.25995635986328125, 0.21644973754882812, 1.4137725830078125, -0.3110847473144531, -0.2341175079345703, 0.7182731628417969, 0.30643463134765625, 2.98687744140625, 2.2161293029785156, 1.6658935546875, 0.2800559997558594, -7.62939453125e-05, -0.9647579193115234, 1.9709320068359375, 0.5179901123046875, -0.6533184051513672, 0.7398681640625, 1.0030345916748047, 0.5206413269042969, 3.0603561401367188, 1.655120849609375, 2.448211669921875, 2.4794769287109375, 0.31630706787109375, 2.3083229064941406, 3.0590744018554688, -0.12932586669921875, 1.523193359375, 0.48193359375, 1.5133228302001953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000190.npy"}
{"epoch": 0.2872260015117158, "step": 191, "batch_size": 64, "mean": 0.7929338216781616, "std": 1.1891398429870605, "min": -1.977294921875, "p10": -0.8296012878417967, "median": 0.8202714920043945, "p90": 2.4419923782348634, "max": 3.5983123779296875, "pos_frac": 0.765625, "sample": [1.6676559448242188, -0.58209228515625, -1.8759994506835938, 0.8237266540527344, 2.932260513305664, 1.2016372680664062, 1.453369140625, 1.6624603271484375, 0.7569217681884766, 0.9407501220703125, 1.4252166748046875, 2.8638839721679688, 1.3781051635742188, 0.8989906311035156, 1.1615371704101562, 0.7676353454589844, -1.0983963012695312, -1.1591644287109375, 3.34130859375, 0.48316383361816406, 1.4898223876953125, 3.09661865234375, 0.8168163299560547, 1.3362350463867188, -0.4375495910644531, 1.5088577270507812, -1.178802490234375, 2.469949722290039, -0.164886474609375, 0.3822174072265625, 1.4345588684082031, -0.9356765747070312, 0.10126495361328125, 0.571502685546875, 3.5983123779296875, -1.0742950439453125, 0.16796493530273438, -0.39910888671875, 2.376758575439453, 0.3543701171875, 1.4158821105957031, 0.919189453125, 0.44178009033203125, 0.814208984375, 1.0223579406738281, 0.5085773468017578, 1.8221054077148438, 1.4853858947753906, -0.06684494018554688, 0.32715606689453125, -0.19689178466796875, 0.8421688079833984, 1.1571044921875, 0.6643943786621094, -0.375, 1.126739501953125, 2.7290496826171875, -1.977294921875, 0.5404930114746094, -0.05934906005859375, 0.2900848388671875, 1.8607635498046875, 0.06136322021484375, 0.8364410400390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000191.npy"}
{"epoch": 0.2887377173091459, "step": 192, "batch_size": 64, "mean": 0.6475502848625183, "std": 1.0501163005828857, "min": -1.9852027893066406, "p10": -0.670711326599121, "median": 0.641230583190918, "p90": 1.9633405685424807, "max": 3.4850521087646484, "pos_frac": 0.78125, "sample": [-1.6682205200195312, -1.2858428955078125, 0.2933158874511719, 1.9213809967041016, 1.1030998229980469, -0.22565460205078125, -0.7648429870605469, 1.5081634521484375, 1.663970947265625, 0.6938629150390625, -0.9977073669433594, 0.3429107666015625, 0.08392333984375, 0.029010772705078125, 1.0539321899414062, 1.9813232421875, 1.7816810607910156, 0.903564453125, 0.3245410919189453, 1.0271377563476562, 1.251485824584961, 2.1784133911132812, 0.08801841735839844, 0.6032314300537109, 1.6331787109375, 0.18041038513183594, 0.0294036865234375, -0.06504440307617188, 0.9550018310546875, -0.20016098022460938, 1.5065536499023438, 0.4550628662109375, 0.9381542205810547, 0.7744102478027344, 0.1890087127685547, 0.2750530242919922, 1.1356201171875, -0.037799835205078125, 0.1547393798828125, 0.7512588500976562, 0.679229736328125, -0.2755126953125, -1.9852027893066406, 3.4850521087646484, 2.7354736328125, 0.7474994659423828, -0.6952972412109375, 1.0691299438476562, 0.11208343505859375, -0.8149948120117188, 1.47222900390625, 2.4089832305908203, 0.29163360595703125, 2.3473281860351562, 0.17077255249023438, 1.2061996459960938, 1.8588600158691406, -0.4813385009765625, 1.858367919921875, 1.9882125854492188, 0.7389812469482422, -0.6133441925048828, 0.17326736450195312, 0.400054931640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000192.npy"}
{"epoch": 0.29024943310657597, "step": 193, "batch_size": 64, "mean": 0.8840053081512451, "std": 1.123247504234314, "min": -1.9583740234375, "p10": -0.35294723510742154, "median": 0.7561569213867188, "p90": 2.085428810119629, "max": 4.3653717041015625, "pos_frac": 0.859375, "sample": [0.604736328125, 0.7714385986328125, 0.9233551025390625, 0.845428466796875, 0.34842491149902344, 0.45458221435546875, 0.166656494140625, 1.539215087890625, 0.753936767578125, 0.06575393676757812, 1.8434600830078125, 0.021429061889648438, 1.9178962707519531, 0.76190185546875, 1.5446014404296875, -0.5448837280273438, 1.2426681518554688, -0.0046539306640625, 0.7217121124267578, 1.0931015014648438, -0.7019805908203125, 2.3031978607177734, 4.3653717041015625, 0.4366950988769531, 2.0373001098632812, 2.0414505004882812, 0.1962413787841797, 1.0521926879882812, 0.7583770751953125, 0.2726268768310547, 2.4641799926757812, 1.5540733337402344, 0.286590576171875, 0.9276199340820312, 0.5211563110351562, -0.49024200439453125, -0.7843475341796875, 0.5378646850585938, 0.8352203369140625, 1.8509140014648438, 1.4295501708984375, 0.3606834411621094, 1.1901969909667969, 4.33892822265625, -0.4949359893798828, -0.6974945068359375, 0.5920448303222656, 0.018795013427734375, 0.023645401000976562, 1.8900833129882812, 2.523773193359375, 0.022491455078125, 1.9986419677734375, 2.104276657104492, 3.1768531799316406, 1.9100112915039062, -0.0325927734375, -1.9583740234375, 0.66534423828125, 0.8147411346435547, 0.20815277099609375, 0.7689247131347656, 0.03083038330078125, 0.15650367736816406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000193.npy"}
{"epoch": 0.29176114890400606, "step": 194, "batch_size": 64, "mean": 0.6577843427658081, "std": 1.237583041191101, "min": -2.050821304321289, "p10": -0.9687057495117187, "median": 0.7600011825561523, "p90": 2.084179115295411, "max": 4.335075378417969, "pos_frac": 0.71875, "sample": [0.5642471313476562, 0.8709716796875, 1.1554908752441406, -0.5367279052734375, 0.7220916748046875, 0.421661376953125, -0.7640953063964844, 0.6502246856689453, 0.48870086669921875, 1.0689239501953125, 1.355112075805664, 1.1619338989257812, 0.02501678466796875, 1.262603759765625, -1.1038970947265625, -1.8321990966796875, 2.5362777709960938, -0.41819000244140625, 0.8842906951904297, -1.2337799072265625, -0.97760009765625, 0.8475856781005859, 1.5499725341796875, 1.213470458984375, -0.5913105010986328, 0.8973159790039062, -0.13355064392089844, 0.982025146484375, 0.00438690185546875, 1.224365234375, -0.17026710510253906, 1.5778427124023438, -1.3388252258300781, -0.51708984375, -1.9202613830566406, -0.044708251953125, 1.9348125457763672, 1.87603759765625, 1.2137489318847656, 2.5603790283203125, -2.050821304321289, 0.9330291748046875, 0.6843357086181641, 0.5296897888183594, 2.6790695190429688, 2.3443603515625, 1.5505046844482422, -0.9479522705078125, 3.5076980590820312, 2.148193359375, 1.6282939910888672, 0.01900482177734375, 0.98175048828125, 1.6189384460449219, 0.5391769409179688, 4.335075378417969, 0.7687511444091797, -0.18363571166992188, -0.26615333557128906, 1.669342041015625, 0.7887134552001953, 0.751251220703125, 0.19281578063964844, 0.40978050231933594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000194.npy"}
{"epoch": 0.29327286470143615, "step": 195, "batch_size": 64, "mean": 0.6532056331634521, "std": 1.1388905048370361, "min": -2.8780975341796875, "p10": -0.8566230773925779, "median": 0.6435317993164062, "p90": 2.0735788345336914, "max": 2.6155776977539062, "pos_frac": 0.71875, "sample": [1.1190338134765625, 0.7305393218994141, -1.3703155517578125, -0.9364089965820312, 2.252920150756836, 1.0492744445800781, 0.9600276947021484, 0.270294189453125, 1.7117767333984375, 0.6431121826171875, 2.0603408813476562, 1.8897647857666016, -1.7572479248046875, 0.1857452392578125, -0.9700927734375, -2.8780975341796875, -0.31339073181152344, 0.5093727111816406, 0.4040069580078125, 2.079252243041992, -0.6704559326171875, 1.9544830322265625, 1.3619728088378906, 1.71014404296875, 0.645233154296875, -0.2309246063232422, 0.9791984558105469, 2.1921119689941406, -0.129608154296875, 0.643951416015625, -0.46688079833984375, 0.36932373046875, 2.6155776977539062, 0.9172592163085938, 1.4812088012695312, -0.0930633544921875, -0.20783615112304688, 1.9877471923828125, 1.3516082763671875, 1.9621505737304688, -0.014028549194335938, -0.16933441162109375, 0.25176239013671875, 0.333648681640625, 0.12616539001464844, 0.40219688415527344, 0.208343505859375, 2.1190032958984375, -1.3267745971679688, 0.13851165771484375, -0.045013427734375, 0.8962554931640625, 2.3390274047851562, 1.703603744506836, 0.9696502685546875, 1.583944320678711, 2.405609130859375, 0.3363037109375, -1.1603927612304688, 2.0420303344726562, 0.8047599792480469, 0.1311492919921875, -0.079376220703125, 1.7950077056884766], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000195.npy"}
{"epoch": 0.2947845804988662, "step": 196, "batch_size": 64, "mean": 0.7313063144683838, "std": 1.2022453546524048, "min": -2.9636383056640625, "p10": -0.5437797546386719, "median": 0.7211494445800781, "p90": 2.1552688598632814, "max": 3.6820831298828125, "pos_frac": 0.71875, "sample": [-0.14794921875, -2.9636383056640625, -0.7490425109863281, 1.9472808837890625, 0.2767829895019531, -0.5567378997802734, 0.8061981201171875, 0.374420166015625, 0.8410415649414062, -0.7907943725585938, 1.50567626953125, 1.73175048828125, 1.30938720703125, 0.7423439025878906, 0.81536865234375, -0.0913543701171875, -1.7915077209472656, 1.8063735961914062, -1.6489944458007812, 3.06787109375, -0.4524726867675781, -0.07122039794921875, 1.7867374420166016, -0.1661224365234375, -0.1613941192626953, 1.36944580078125, -0.25579833984375, 0.5741195678710938, 2.151092529296875, 2.6329498291015625, 0.09145355224609375, 1.5646629333496094, 0.9244537353515625, 0.03968048095703125, 0.5062408447265625, -0.1049652099609375, 0.3244152069091797, 0.7263259887695312, 1.5732421875, 3.1302261352539062, 0.4960746765136719, 0.6998138427734375, 0.5737056732177734, 1.2312450408935547, 1.5495796203613281, -0.5650634765625, -0.5135440826416016, 1.069580078125, 1.0763931274414062, 2.1570587158203125, 0.715972900390625, 1.9955177307128906, -0.38124847412109375, 0.48370361328125, 1.0050411224365234, 3.6820831298828125, 2.7226295471191406, 2.5845680236816406, 0.05767822265625, 0.90472412109375, 0.7452011108398438, -0.2756767272949219, 0.45745849609375, 1.6635589599609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000196.npy"}
{"epoch": 0.2962962962962963, "step": 197, "batch_size": 64, "mean": 0.836578905582428, "std": 1.5804094076156616, "min": -4.36083984375, "p10": -0.7455717086791992, "median": 0.6245536804199219, "p90": 2.9312469482421877, "max": 5.247413635253906, "pos_frac": 0.71875, "sample": [-0.43775367736816406, 3.2574691772460938, 0.8658847808837891, 0.167755126953125, -0.266357421875, 0.5944366455078125, -1.2499122619628906, -4.36083984375, 0.24964141845703125, 0.5043563842773438, 2.933208465576172, 1.7506427764892578, -0.7730712890625, -0.65277099609375, -0.07847404479980469, 3.246671676635742, -0.40187835693359375, 0.4777183532714844, 2.9422149658203125, 2.8228988647460938, -0.45688629150390625, 0.4917755126953125, 2.9266700744628906, 0.4836540222167969, 0.6591873168945312, 0.4227752685546875, 0.644287109375, -0.4806938171386719, 1.309539794921875, 2.0775604248046875, 1.8853034973144531, 3.237274169921875, -1.8240509033203125, 1.7839641571044922, -0.8017044067382812, -2.824512481689453, 1.0686225891113281, -0.9983062744140625, 0.5530853271484375, 1.6948928833007812, 0.12403488159179688, 2.031269073486328, 1.7742729187011719, -0.62115478515625, -0.17095184326171875, 2.357391357421875, 0.8158721923828125, 0.6048202514648438, 0.6963005065917969, 2.1179676055908203, -0.6814060211181641, 0.2118682861328125, 1.4891128540039062, 1.9645824432373047, 0.5400381088256836, -0.276580810546875, 5.247413635253906, 1.2930221557617188, 0.156585693359375, 1.1353492736816406, 3.5117645263671875, 2.282073974609375, 1.5393218994140625, 1.9538021087646484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000197.npy"}
{"epoch": 0.29780801209372637, "step": 198, "batch_size": 64, "mean": 1.0111191272735596, "std": 1.259752631187439, "min": -1.81787109375, "p10": -0.5920225143432617, "median": 0.9652633666992188, "p90": 2.454821968078613, "max": 5.089302062988281, "pos_frac": 0.78125, "sample": [-0.869720458984375, 1.4585494995117188, 0.7996082305908203, 0.6966476440429688, 0.4262256622314453, 1.9209518432617188, -1.1843223571777344, 1.08837890625, 0.5246143341064453, 0.7251358032226562, 2.4768753051757812, 0.7582550048828125, -0.3045787811279297, -0.5464954376220703, 3.3640289306640625, 2.445354461669922, 1.82977294921875, -1.33551025390625, 1.597564697265625, -0.08660507202148438, 0.6481208801269531, 1.5554828643798828, 1.9968910217285156, -1.196298599243164, 2.5688934326171875, -0.8259735107421875, -0.018218994140625, -0.021589279174804688, 1.74676513671875, 0.45662498474121094, 1.1882057189941406, 1.0147705078125, 0.5434722900390625, -1.81787109375, 0.1723480224609375, -0.6115341186523438, -0.16952133178710938, 1.00213623046875, 0.29892730712890625, 0.6494979858398438, 1.1458587646484375, 0.9751968383789062, 2.7384376525878906, 0.9553298950195312, 1.4668941497802734, 1.7706680297851562, 0.3253459930419922, 2.0532093048095703, 3.9725875854492188, 2.4264144897460938, 1.6937026977539062, 0.7513198852539062, 0.47635650634765625, 2.1532726287841797, 1.4294662475585938, 1.9344329833984375, 2.07147216796875, 1.3543453216552734, -0.0902862548828125, 0.4490013122558594, 0.730560302734375, 2.4588794708251953, 5.089302062988281, 1.4139995574951172], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000198.npy"}
{"epoch": 0.29931972789115646, "step": 199, "batch_size": 64, "mean": 0.7136895656585693, "std": 1.1558401584625244, "min": -1.6448726654052734, "p10": -0.8559510231018066, "median": 0.6822271347045898, "p90": 2.0853666305541996, "max": 3.782642364501953, "pos_frac": 0.765625, "sample": [0.6039276123046875, 1.0244255065917969, -0.27861785888671875, 1.2277679443359375, 0.100830078125, -0.889556884765625, 3.1379852294921875, 1.5953483581542969, 0.5747756958007812, 1.48773193359375, 2.1056976318359375, -0.5778465270996094, 2.581157684326172, 1.7088050842285156, 2.3055992126464844, 1.45538330078125, 1.9857826232910156, 1.2622756958007812, 2.458770751953125, 2.0379276275634766, -0.01618194580078125, -1.60894775390625, 1.2823600769042969, 1.0862808227539062, 0.1971454620361328, 0.6852035522460938, 0.94195556640625, -0.440032958984375, 0.7596359252929688, 0.1002655029296875, 0.6638031005859375, 0.25739479064941406, 3.782642364501953, 0.7842254638671875, -0.8207426071166992, 0.18780517578125, 0.65765380859375, -0.7652740478515625, -0.8710403442382812, 0.9072341918945312, -1.6448726654052734, 0.87591552734375, 0.166290283203125, 1.5030593872070312, -1.5235061645507812, 1.5657196044921875, 1.1908283233642578, 1.3931198120117188, -0.029209136962890625, -0.245452880859375, 0.1412067413330078, -1.5641632080078125, 0.290618896484375, 2.2748870849609375, -1.0626373291015625, 0.338134765625, 1.6829643249511719, 0.6005630493164062, 0.8773193359375, 0.6792507171630859, 2.0044403076171875, 0.039127349853515625, 1.985076904296875, 0.45789337158203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000199.npy"}
{"epoch": 0.30083144368858655, "step": 200, "batch_size": 64, "mean": 0.9813716411590576, "std": 1.402414321899414, "min": -2.819732666015625, "p10": -0.5804630279541015, "median": 0.7611675262451172, "p90": 3.0113304138183596, "max": 4.33953857421875, "pos_frac": 0.75, "sample": [2.003602981567383, 2.417133331298828, 4.33953857421875, -0.30709075927734375, 2.217254638671875, 0.6211299896240234, 0.7407455444335938, -0.596160888671875, 1.3025588989257812, 0.363128662109375, -0.83465576171875, 0.8087158203125, -0.039394378662109375, 3.6011505126953125, 0.3576927185058594, -0.10453605651855469, 0.9574127197265625, -0.09744453430175781, 2.6619033813476562, -1.286041259765625, -0.799896240234375, 0.5416946411132812, 1.8764114379882812, 3.2946205139160156, 0.10360336303710938, 1.2985706329345703, 0.7240486145019531, 1.1422805786132812, 1.5266227722167969, 0.44687652587890625, 2.747772216796875, 1.1640243530273438, 1.02349853515625, 3.9221649169921875, 1.8555374145507812, -0.2868461608886719, -2.819732666015625, 1.9168663024902344, 3.1824493408203125, 1.8174266815185547, 1.988525390625, 0.8722457885742188, 4.0048675537109375, 0.6011199951171875, 0.6702804565429688, -1.5679779052734375, 0.14211273193359375, 0.76544189453125, 0.6468410491943359, 3.0460128784179688, -0.438568115234375, 0.9714202880859375, 1.5569190979003906, -0.07028388977050781, 0.7528610229492188, 0.9302825927734375, -0.28926658630371094, -0.6492729187011719, 0.16091156005859375, 0.7568931579589844, 0.42583274841308594, -0.5438346862792969, 2.9304046630859375, 1.3393783569335938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000200.npy"}
{"epoch": 0.30234315948601664, "step": 201, "batch_size": 64, "mean": 0.8532842397689819, "std": 1.1067678928375244, "min": -1.1401405334472656, "p10": -0.5156665802001953, "median": 0.775238037109375, "p90": 2.2434299468994143, "max": 3.492328643798828, "pos_frac": 0.8125, "sample": [0.957305908203125, 2.178192138671875, 0.36529541015625, 0.2437286376953125, 0.13842010498046875, 0.9747467041015625, 3.187053680419922, 0.18919754028320312, 0.7414398193359375, 0.207794189453125, 0.200286865234375, 1.5441741943359375, 0.6052494049072266, 2.2720718383789062, 0.23065948486328125, 1.0465545654296875, 2.3201904296875, 1.3262710571289062, -0.3987579345703125, 3.3610000610351562, -1.1401405334472656, 0.8839111328125, -0.01604461669921875, -0.5386543273925781, 0.48681640625, 3.0252761840820312, 0.6836090087890625, 0.7478561401367188, -0.877777099609375, 0.9551582336425781, 0.7620391845703125, 1.9530792236328125, 0.3131904602050781, -0.46202850341796875, 1.5991535186767578, 3.492328643798828, -0.8901824951171875, -0.8443527221679688, 0.108978271484375, 0.10967254638671875, 1.9364166259765625, 0.8695507049560547, -0.40252113342285156, 0.951385498046875, 0.06804847717285156, 1.8770675659179688, 0.7979202270507812, 2.0186767578125, 0.9505271911621094, 2.0860137939453125, -0.34503173828125, 2.163238525390625, 0.17097091674804688, 1.8879165649414062, 0.4857959747314453, 2.1299591064453125, 1.5660820007324219, -0.8152236938476562, 0.0859832763671875, 1.1739044189453125, 2.2713890075683594, 0.846954345703125, 0.7884368896484375, -0.99603271484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000201.npy"}
{"epoch": 0.30385487528344673, "step": 202, "batch_size": 64, "mean": 0.9893627166748047, "std": 1.2383246421813965, "min": -1.1313018798828125, "p10": -0.529421043395996, "median": 0.9170818328857422, "p90": 2.679824829101563, "max": 4.482128143310547, "pos_frac": 0.796875, "sample": [-0.4084129333496094, 0.2430877685546875, 3.720287322998047, 2.806549072265625, 1.476755142211914, -0.40914154052734375, 1.4994049072265625, 0.9459152221679688, 0.5630149841308594, 1.42816162109375, 0.2926921844482422, 1.6801528930664062, 0.8136711120605469, 1.3805065155029297, -0.5411300659179688, 1.3887252807617188, 2.2841262817382812, -0.3558998107910156, 1.5146331787109375, -0.963287353515625, 0.7694778442382812, 1.694244384765625, 1.29998779296875, 0.7335205078125, -0.5020999908447266, 2.8184356689453125, 0.7459220886230469, 4.482128143310547, -0.0100860595703125, 2.736083984375, 0.038177490234375, 1.8480720520019531, 0.4238548278808594, 1.4707489013671875, 0.8591842651367188, -1.1313018798828125, 1.7954940795898438, 1.0428752899169922, 0.5885238647460938, -0.9650726318359375, 2.548553466796875, 1.2938995361328125, 0.6515426635742188, 0.7495574951171875, 0.45706939697265625, 1.6612167358398438, 1.0196685791015625, 0.9260978698730469, 1.3583526611328125, 0.52716064453125, 0.06638526916503906, -1.1126594543457031, -0.45977783203125, -0.9453487396240234, 1.5590362548828125, 0.6099853515625, 3.634857177734375, 0.19062042236328125, 0.9291801452636719, 1.349435806274414, -0.8481826782226562, 3.9911422729492188, 0.9080657958984375, 2.1553726196289062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000202.npy"}
{"epoch": 0.30536659108087677, "step": 203, "batch_size": 64, "mean": 0.7342634797096252, "std": 1.3490254878997803, "min": -2.6456832885742188, "p10": -0.9268798828124999, "median": 0.5932464599609375, "p90": 2.2457042694091798, "max": 4.242515563964844, "pos_frac": 0.734375, "sample": [1.5720443725585938, 0.16032791137695312, -2.6456832885742188, 1.00775146484375, -0.9768218994140625, 0.02167510986328125, 0.32285308837890625, 3.43389892578125, -0.220855712890625, 0.36556243896484375, 1.2355499267578125, 0.5596427917480469, 0.49939537048339844, 2.7127532958984375, 0.23687171936035156, 3.5681610107421875, 1.6365509033203125, -0.6564865112304688, 1.7741050720214844, 0.504180908203125, -1.698822021484375, -0.3795318603515625, 2.0056686401367188, -0.4191627502441406, -0.6061973571777344, -0.6254119873046875, 0.4345512390136719, 3.508544921875, 0.44670867919921875, 2.1981277465820312, 2.625457763671875, 1.3549880981445312, 1.8439140319824219, 0.6349658966064453, -1.0654754638671875, 0.7322769165039062, 1.4685440063476562, 1.5949363708496094, 1.758148193359375, 0.3107032775878906, 2.1384658813476562, -1.465362548828125, 1.8823699951171875, 1.0388412475585938, 0.8687362670898438, -0.27185821533203125, 1.20574951171875, 1.2426300048828125, 0.42801666259765625, 4.242515563964844, 0.6268501281738281, -0.007419586181640625, 0.0024871826171875, 1.615509033203125, 1.121002197265625, -1.5181083679199219, -1.2526473999023438, 1.7019195556640625, 2.266094207763672, 0.7894840240478516, 0.2659797668457031, -0.8103485107421875, -0.4467811584472656, 0.09432601928710938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000203.npy"}
{"epoch": 0.30687830687830686, "step": 204, "batch_size": 64, "mean": 0.7767831087112427, "std": 1.4708671569824219, "min": -2.6697845458984375, "p10": -1.0695524215698242, "median": 0.6499958038330078, "p90": 2.5071975708007814, "max": 5.571022033691406, "pos_frac": 0.734375, "sample": [5.571022033691406, 0.27008056640625, -1.0831050872802734, 1.3600997924804688, 0.4178791046142578, 4.5352935791015625, 0.69287109375, 0.7898845672607422, -0.5997543334960938, -1.517059326171875, 1.52410888671875, -1.4943313598632812, -0.6028919219970703, 2.061595916748047, 0.20175933837890625, 2.5404052734375, 1.7376174926757812, -0.19292449951171875, -1.4627437591552734, -1.1096267700195312, 1.3694496154785156, -0.32883453369140625, 0.6284637451171875, 2.1092758178710938, 0.31754112243652344, 0.10321807861328125, 0.9734954833984375, 0.7854137420654297, 0.3173866271972656, 1.4766120910644531, 3.2258243560791016, -0.19478607177734375, 1.1162605285644531, 2.4911117553710938, 1.2288970947265625, -0.17119598388671875, 0.6763420104980469, 3.5076351165771484, -0.8399295806884766, 2.3966522216796875, 0.1722259521484375, 0.3163719177246094, 1.5785903930664062, 1.4888916015625, -1.4262466430664062, 1.5193519592285156, 0.1194305419921875, -1.0379295349121094, 0.699066162109375, 0.6715278625488281, 2.2774810791015625, 2.5140914916992188, 0.5913562774658203, 1.3186569213867188, 0.28113555908203125, 2.5990333557128906, 1.8318252563476562, 0.19194412231445312, -0.43798828125, 0.45980072021484375, -2.6697845458984375, -0.4915294647216797, 2.0020809173583984, 0.3157196044921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000204.npy"}
{"epoch": 0.30839002267573695, "step": 205, "batch_size": 64, "mean": 0.5765775442123413, "std": 1.4284342527389526, "min": -4.070526123046875, "p10": -0.8778926849365234, "median": 0.43215274810791016, "p90": 2.5969207763671878, "max": 4.271575927734375, "pos_frac": 0.640625, "sample": [4.271575927734375, 0.0370025634765625, -0.10699844360351562, 1.2368965148925781, 0.5116653442382812, 3.1744346618652344, 0.45421791076660156, 0.41008758544921875, -4.070526123046875, -0.1901702880859375, 0.374114990234375, 0.009321212768554688, 2.8108749389648438, 1.1882247924804688, 1.4204769134521484, -0.896270751953125, -0.7112579345703125, -0.1910400390625, -0.713287353515625, 0.27077484130859375, 0.3953113555908203, 1.1897125244140625, -0.019247055053710938, 1.8266830444335938, 1.5744857788085938, -0.0589141845703125, -0.9599990844726562, -0.8343048095703125, -0.8350105285644531, 1.9318008422851562, 0.6277503967285156, -1.1086158752441406, 1.515655517578125, 1.2901153564453125, -0.6620330810546875, 2.518585205078125, 1.2662677764892578, 0.344757080078125, -0.17075729370117188, 2.5431785583496094, 1.02850341796875, 2.6539688110351562, 1.6807823181152344, 1.030670166015625, 3.266143798828125, 0.1870269775390625, 0.5346145629882812, -0.13367462158203125, 2.7561264038085938, -1.6844253540039062, -0.3143157958984375, 1.080718994140625, 1.4611358642578125, 1.1144561767578125, -1.437713623046875, 1.4373397827148438, 1.0959739685058594, -0.8023605346679688, 2.619953155517578, -0.7546005249023438, 0.94781494140625, -2.1653919219970703, -0.4960517883300781, 0.12872886657714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000205.npy"}
{"epoch": 0.30990173847316704, "step": 206, "batch_size": 64, "mean": 0.7163584232330322, "std": 1.3610858917236328, "min": -2.4708709716796875, "p10": -0.9132781982421874, "median": 0.7899646759033203, "p90": 2.6363357543945325, "max": 3.8477935791015625, "pos_frac": 0.65625, "sample": [-0.046733856201171875, -1.1777801513671875, -0.8134002685546875, -1.0243606567382812, -1.4188976287841797, -0.24907684326171875, 0.8700027465820312, 0.7671546936035156, 1.3792877197265625, 1.4486541748046875, 1.4733390808105469, 2.9038925170898438, 2.3518142700195312, -0.4716987609863281, 0.8891220092773438, 3.61822509765625, -0.7652091979980469, -0.23842620849609375, -0.014739990234375, 0.5839920043945312, 1.8127250671386719, 0.5406417846679688, 0.6435317993164062, 0.745208740234375, -0.838531494140625, 1.8465194702148438, 1.4638557434082031, -2.1351985931396484, 0.812774658203125, 1.4651870727539062, 1.94805908203125, 1.8663959503173828, 0.2266998291015625, 1.4937820434570312, 1.25946044921875, 0.1798095703125, -2.4708709716796875, -0.7301101684570312, -0.4393596649169922, 2.8315277099609375, 2.3582305908203125, 0.6830177307128906, 3.8477935791015625, -0.7492446899414062, -0.030467987060546875, -0.13684654235839844, 3.0505523681640625, 0.874969482421875, 1.087127685546875, 1.983673095703125, 2.755523681640625, 1.717926025390625, 0.8519287109375, -0.11311912536621094, 1.335296630859375, 1.1661911010742188, -0.2460918426513672, 0.3329906463623047, 0.8720321655273438, 2.9845733642578125, -1.6505260467529297, -0.9453125, 0.2817230224609375, 0.9477310180664062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000206.npy"}
{"epoch": 0.31141345427059713, "step": 207, "batch_size": 64, "mean": 0.7667145729064941, "std": 1.3212836980819702, "min": -1.6452560424804688, "p10": -0.7621931076049804, "median": 0.6451625823974609, "p90": 2.128364562988282, "max": 6.6520538330078125, "pos_frac": 0.734375, "sample": [6.6520538330078125, -0.2319183349609375, -0.8027000427246094, 0.09580230712890625, 0.17798233032226562, 0.6650791168212891, 0.065399169921875, 0.12598037719726562, -0.145599365234375, 1.1257781982421875, 1.25531005859375, 0.6330490112304688, -0.6676769256591797, 1.4057769775390625, -0.45343589782714844, 0.17279052734375, 0.9129791259765625, 1.84136962890625, 0.6448173522949219, 1.5812492370605469, 1.8143348693847656, 1.4642486572265625, 0.7582283020019531, 0.465545654296875, -0.13784027099609375, -1.3648681640625, -0.9817047119140625, 1.1270523071289062, -0.12220001220703125, 0.4671173095703125, 1.7485847473144531, 0.22724151611328125, 2.49212646484375, 1.1121139526367188, 2.2120208740234375, -0.43457794189453125, -0.9150314331054688, 1.4744491577148438, 3.46929931640625, 0.6601676940917969, 1.1129913330078125, 2.665618896484375, 0.22911834716796875, 0.49210357666015625, 0.518341064453125, 0.71453857421875, 0.8335418701171875, -0.37918853759765625, 1.7452278137207031, 1.7404022216796875, -1.6452560424804688, 1.93316650390625, -1.380859375, 0.5875816345214844, 1.46795654296875, 3.095123291015625, 1.21917724609375, 0.5487594604492188, -0.063385009765625, 0.9630813598632812, -0.9665260314941406, -0.44549560546875, 0.6455078125, 2.8478126525878906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000207.npy"}
{"epoch": 0.3129251700680272, "step": 208, "batch_size": 64, "mean": 0.6710573434829712, "std": 1.3329163789749146, "min": -1.8237495422363281, "p10": -1.0052360534667968, "median": 0.5619840621948242, "p90": 2.422793960571289, "max": 4.319427490234375, "pos_frac": 0.671875, "sample": [-0.22971153259277344, 0.3317146301269531, 2.466266632080078, -1.415506362915039, 3.7042922973632812, 3.055206298828125, 0.5507869720458984, -0.3283233642578125, 1.176177978515625, -1.3030509948730469, -0.02105712890625, -1.0218505859375, 1.7005157470703125, 1.534820556640625, 2.186817169189453, 0.6956081390380859, 1.2903060913085938, 1.916900634765625, -0.09975242614746094, -0.64984130859375, 1.4860916137695312, -0.9664688110351562, 0.5456333160400391, 0.9441299438476562, -0.03594779968261719, 2.4167137145996094, 1.8762130737304688, -0.8534164428710938, 1.2026824951171875, 2.132946014404297, 0.09549140930175781, 0.1610870361328125, 0.17024993896484375, 0.8662872314453125, -1.4732437133789062, 0.9658050537109375, 2.6293563842773438, -0.054656982421875, 0.0514678955078125, 0.5962963104248047, 2.4253997802734375, 0.57318115234375, 0.02202606201171875, -0.543914794921875, 1.8786849975585938, 1.0358505249023438, 1.217193603515625, -0.0003070831298828125, 1.3652534484863281, 0.5312824249267578, -1.7449111938476562, 1.7642288208007812, -0.940032958984375, 1.679656982421875, -1.8237495422363281, 4.319427490234375, 0.275970458984375, -0.9125099182128906, -0.4119224548339844, 0.9898109436035156, -1.1095199584960938, 1.401397705078125, 0.0213623046875, 2.6367721557617188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000208.npy"}
{"epoch": 0.3144368858654573, "step": 209, "batch_size": 64, "mean": 0.9283081293106079, "std": 1.4410808086395264, "min": -2.5231094360351562, "p10": -0.7936794281005859, "median": 0.7639780044555664, "p90": 2.982798576354982, "max": 4.181575775146484, "pos_frac": 0.734375, "sample": [2.0696754455566406, 2.6258277893066406, 0.3507499694824219, -0.09644699096679688, -1.0523834228515625, 2.1845703125, 1.6846847534179688, 1.4289703369140625, 0.4160308837890625, 3.7451324462890625, -0.8027420043945312, 0.6515350341796875, 0.58062744140625, 0.9863128662109375, 1.0747833251953125, 0.4189300537109375, -0.7306747436523438, 0.22448348999023438, -0.7725334167480469, 3.1755218505859375, 0.09362030029296875, -1.1005935668945312, -0.07378768920898438, 1.2673416137695312, 0.7394943237304688, 0.8544769287109375, 3.5814056396484375, 1.085174560546875, 1.9953842163085938, -0.0811614990234375, 0.47353553771972656, 3.8759765625, 0.7711391448974609, 2.3707656860351562, 0.433502197265625, 0.0011749267578125, 1.7551727294921875, -0.26769256591796875, -2.5231094360351562, 1.64337158203125, 0.40419769287109375, 0.7129077911376953, -0.29776763916015625, -0.3073883056640625, 2.549560546875, 1.3097476959228516, 4.025581359863281, 2.352170944213867, 0.938262939453125, 1.3494720458984375, 0.7568168640136719, -0.9884147644042969, -0.1063690185546875, -0.301666259765625, -1.83453369140625, 0.8347225189208984, 4.181575775146484, -1.4320068359375, 2.535736083984375, 1.0165958404541016, 1.6455650329589844, 3.1357860565185547, 0.531280517578125, 1.3416404724121094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000209.npy"}
{"epoch": 0.31594860166288735, "step": 210, "batch_size": 64, "mean": 1.014129400253296, "std": 1.433078646659851, "min": -1.6248626708984375, "p10": -0.8169235229492187, "median": 0.8181819915771484, "p90": 2.8467964172363285, "max": 4.531181335449219, "pos_frac": 0.71875, "sample": [2.7929153442382812, 0.11714935302734375, -1.2156982421875, 2.296295166015625, 0.1326007843017578, 1.96185302734375, 3.7059974670410156, 1.432037353515625, 4.531181335449219, 3.122589111328125, 2.3349037170410156, 2.8698883056640625, 2.7333831787109375, -0.2841033935546875, 3.6771240234375, 0.2622871398925781, -1.0553779602050781, -0.850372314453125, 2.001434326171875, -0.9643402099609375, 2.7430572509765625, 0.21875, 0.37717437744140625, -0.355865478515625, 2.3942108154296875, -0.5379867553710938, -0.3997669219970703, 0.14808082580566406, 3.2696456909179688, -0.10736083984375, 2.3564987182617188, 0.3285636901855469, 1.6355323791503906, -0.01995849609375, 1.5229377746582031, -0.2358856201171875, 0.22079849243164062, 0.13116836547851562, 0.7900733947753906, 2.8751583099365234, -0.9874553680419922, 2.6436767578125, 1.0927581787109375, 1.0387763977050781, 1.6000900268554688, 0.124481201171875, 1.9130058288574219, -0.21475791931152344, 2.095947265625, 0.35748291015625, 0.07571792602539062, 0.7082500457763672, 2.402637481689453, -1.1911945343017578, 0.9491348266601562, 2.4478378295898438, 1.6023483276367188, 0.8462905883789062, 1.3857307434082031, -1.6248626708984375, -0.09515762329101562, -0.7388763427734375, -0.126220703125, 1.6420726776123047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000210.npy"}
{"epoch": 0.31746031746031744, "step": 211, "batch_size": 64, "mean": 0.8127955198287964, "std": 1.089146614074707, "min": -1.3472938537597656, "p10": -0.5350456237792967, "median": 0.6727104187011719, "p90": 2.292338943481446, "max": 3.686767578125, "pos_frac": 0.828125, "sample": [0.21689605712890625, -1.3472938537597656, 1.6068496704101562, 0.9303874969482422, -0.37335968017578125, 0.42162513732910156, 0.0301055908203125, 1.133880615234375, -0.6748046875, 0.4385089874267578, 2.770355224609375, 0.02898406982421875, -0.03327178955078125, 1.9119033813476562, 0.3254737854003906, 1.3659076690673828, 0.6385536193847656, 0.2735557556152344, 1.2888641357421875, 0.9162712097167969, 0.6631298065185547, -0.604339599609375, 0.7054901123046875, -0.277679443359375, 0.6699142456054688, -1.0960750579833984, 0.3433380126953125, 2.1010665893554688, 3.3327903747558594, 0.83544921875, 1.3530769348144531, 1.9450225830078125, 0.41670799255371094, 0.03842926025390625, 1.0036849975585938, 0.8800163269042969, 1.8248291015625, 0.5211334228515625, 0.04598236083984375, 1.3194427490234375, 0.8062477111816406, 3.686767578125, 2.459869384765625, 0.2384490966796875, 0.05080413818359375, 0.3775520324707031, 1.8549690246582031, 0.675506591796875, 0.5112762451171875, 2.3254547119140625, -0.894287109375, 0.7388496398925781, 2.9381256103515625, 2.215068817138672, 2.7550125122070312, 1.5186614990234375, 0.34632110595703125, 1.9180488586425781, -0.3685016632080078, 0.7415618896484375, -0.6511001586914062, -1.2611007690429688, 0.3827552795410156, 0.7617988586425781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000211.npy"}
{"epoch": 0.31897203325774753, "step": 212, "batch_size": 64, "mean": 0.8448594212532043, "std": 1.2931257486343384, "min": -1.783660888671875, "p10": -0.9682533264160156, "median": 0.8463630676269531, "p90": 2.3220891952514653, "max": 5.643951416015625, "pos_frac": 0.71875, "sample": [2.5325546264648438, 1.3524589538574219, 1.6283607482910156, 1.946645736694336, 0.0006504058837890625, 0.9101486206054688, 2.214710235595703, 1.1500473022460938, -0.9100570678710938, 0.8870849609375, -0.3470420837402344, -1.783660888671875, 3.009082794189453, 5.643951416015625, 0.9635238647460938, 2.25885009765625, 1.9573173522949219, 0.8010787963867188, -0.993194580078125, 1.0839691162109375, 0.25733184814453125, -0.11379432678222656, 0.46149253845214844, 1.114450454711914, -0.9943161010742188, 0.206146240234375, 1.90264892578125, -1.230123519897461, 2.4614696502685547, 2.410614013671875, 0.8056411743164062, 1.8936996459960938, 1.4278335571289062, 1.26611328125, 2.1933021545410156, 0.12007904052734375, -0.09300613403320312, -0.00598907470703125, 0.47543907165527344, 0.5870265960693359, -0.24462127685546875, 0.080474853515625, 1.6581001281738281, -0.31815338134765625, 1.2153205871582031, 0.6738319396972656, 1.4428749084472656, 3.4365615844726562, -1.1548805236816406, -1.0435333251953125, -0.09666061401367188, 1.1453475952148438, 1.28204345703125, 0.07061767578125, -0.168701171875, 2.349191665649414, 1.469573974609375, 1.518890380859375, 1.723236083984375, 0.7930145263671875, 0.5829010009765625, -0.0924530029296875, -1.0467758178710938, -0.6577377319335938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000212.npy"}
{"epoch": 0.3204837490551776, "step": 213, "batch_size": 64, "mean": 0.9262403249740601, "std": 1.4119759798049927, "min": -2.6521644592285156, "p10": -0.9043975830078123, "median": 0.9942264556884766, "p90": 2.7819053649902346, "max": 4.240966796875, "pos_frac": 0.78125, "sample": [1.571706771850586, -0.6818771362304688, 0.4491119384765625, 1.905426025390625, 1.0483932495117188, 0.6810703277587891, -1.140228271484375, -0.40570640563964844, -1.711517333984375, 1.8915481567382812, 2.459228515625, 0.5127277374267578, 0.580474853515625, -0.9997634887695312, -2.6521644592285156, -1.0830230712890625, 1.266519546508789, 3.295074462890625, -0.5918769836425781, 1.4805526733398438, 2.330495834350586, -1.4976673126220703, 3.5156021118164062, 2.6393203735351562, 2.7145233154296875, 1.0748786926269531, -0.4496936798095703, -0.6247406005859375, 0.9994735717773438, 0.21614837646484375, -1.2766342163085938, -0.2983245849609375, 2.2263031005859375, 0.7266292572021484, 1.0355072021484375, 1.11212158203125, 0.5020847320556641, 1.5937786102294922, 2.242410659790039, 0.8679122924804688, 1.03704833984375, 0.28035736083984375, 3.750164031982422, 0.29901695251464844, 1.5312728881835938, 1.2810516357421875, 0.6978607177734375, 1.3738136291503906, 3.0877227783203125, 0.1719970703125, -0.16077423095703125, 1.0062332153320312, 1.0570831298828125, 0.10776519775390625, 0.9889793395996094, 0.7741546630859375, 0.32696533203125, 4.240966796875, 0.13640594482421875, 0.18084716796875, 1.1862258911132812, 3.807535171508789, 1.7800979614257812, 2.8107833862304688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000213.npy"}
{"epoch": 0.3219954648526077, "step": 214, "batch_size": 64, "mean": 0.9342004060745239, "std": 1.3737304210662842, "min": -2.7562255859375, "p10": -0.645408821105957, "median": 0.6593132019042969, "p90": 2.5796077728271487, "max": 5.330413818359375, "pos_frac": 0.78125, "sample": [1.8026657104492188, 0.5585708618164062, -0.93792724609375, 1.0374469757080078, 0.41770172119140625, 1.7009124755859375, 0.48439788818359375, 0.3732872009277344, 1.4388656616210938, 1.2704544067382812, 2.8060264587402344, 1.4692440032958984, 0.3299293518066406, 1.86822509765625, 0.0570220947265625, 3.84686279296875, 1.06103515625, 1.6213455200195312, 1.7357635498046875, 0.38548851013183594, -0.5107536315917969, 1.6547355651855469, 0.423065185546875, 0.63934326171875, -0.13261795043945312, 1.6009712219238281, -0.9168548583984375, 0.5103988647460938, 2.6081504821777344, 1.712890625, 0.8281936645507812, 0.21762466430664062, -1.1319198608398438, 0.7584686279296875, 1.4527435302734375, 1.6988525390625, -2.7562255859375, -0.00197601318359375, 2.9700050354003906, 5.330413818359375, 0.39589691162109375, -0.9407520294189453, 2.4473342895507812, 0.8073959350585938, 3.538799285888672, 4.0498199462890625, 0.5000953674316406, 0.32291412353515625, 0.21474456787109375, -0.6875591278076172, -0.54705810546875, 2.5130081176757812, 0.6580696105957031, -0.3256263732910156, 0.6994037628173828, -0.37279510498046875, 1.96209716796875, 0.6605567932128906, -0.864837646484375, 0.6259307861328125, 1.58843994140625, 2.405193328857422, -0.3962211608886719, 0.2511463165283203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000214.npy"}
{"epoch": 0.3235071806500378, "step": 215, "batch_size": 64, "mean": 1.0707132816314697, "std": 1.369892954826355, "min": -1.3231048583984375, "p10": -0.6166507720947265, "median": 0.8096437454223633, "p90": 3.1203884124755867, "max": 4.677562713623047, "pos_frac": 0.765625, "sample": [0.2931804656982422, 0.5086135864257812, -1.2395782470703125, 0.09360504150390625, 1.8012542724609375, 0.4192352294921875, 1.3352737426757812, -0.692169189453125, 2.9504432678222656, 2.2103958129882812, 0.14459991455078125, -0.3945045471191406, -0.6907958984375, 0.4767951965332031, 1.3422088623046875, 2.9421844482421875, -0.24036407470703125, 1.5974273681640625, 0.26522064208984375, -0.3542938232421875, 1.6053543090820312, 2.5958938598632812, -0.21928787231445312, 2.515289306640625, 2.0049667358398438, 0.3748455047607422, -0.1267242431640625, 0.5537338256835938, 0.7296600341796875, -0.5432472229003906, 0.077789306640625, 3.1932220458984375, 0.34978485107421875, 0.732391357421875, 0.5018310546875, -1.3231048583984375, 1.4406280517578125, 0.9084625244140625, 1.4775161743164062, 3.337493896484375, 0.8056888580322266, 0.8135986328125, -0.8580780029296875, -0.6481094360351562, 1.995269775390625, 3.553314208984375, 2.0960235595703125, 4.677562713623047, 2.5889720916748047, 2.578216552734375, 1.5720806121826172, 3.4125137329101562, 1.0883407592773438, -0.8749160766601562, 0.9639263153076172, 0.15796852111816406, 1.09088134765625, 3.7128448486328125, 1.93365478515625, -0.12573814392089844, 3.3276443481445312, 0.5285968780517578, 1.4792938232421875, -0.29913330078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000215.npy"}
{"epoch": 0.3250188964474679, "step": 216, "batch_size": 64, "mean": 1.174886703491211, "std": 2.431396722793579, "min": -2.0684051513671875, "p10": -1.00770263671875, "median": 0.7755851745605469, "p90": 2.9515121459960945, "max": 15.363494873046875, "pos_frac": 0.734375, "sample": [1.1880645751953125, 2.254791259765625, 1.255523681640625, 4.766357421875, 2.209545135498047, 0.6306190490722656, 0.5970783233642578, 0.1030426025390625, 2.070171356201172, 3.0319366455078125, 2.2738876342773438, 1.2819900512695312, 0.7685470581054688, -0.70220947265625, 1.8940582275390625, -1.8123779296875, 0.5934200286865234, 1.3290557861328125, -1.0302505493164062, 0.7069091796875, 1.1443939208984375, 1.7194671630859375, -1.0095291137695312, 1.8834762573242188, 1.5272750854492188, 0.782623291015625, 2.3438758850097656, 0.20386886596679688, 0.5584754943847656, 4.466156005859375, 1.7196578979492188, -0.6837539672851562, 0.6624050140380859, 1.23992919921875, -2.0684051513671875, 2.038909912109375, -1.9962081909179688, 0.23943328857421875, 2.068531036376953, -0.5051746368408203, -0.556304931640625, 15.363494873046875, 6.537109375, -1.0034408569335938, 0.34004974365234375, -1.8855209350585938, 1.9152488708496094, 0.1535358428955078, -0.2318267822265625, 2.76385498046875, 3.227436065673828, -0.9374961853027344, -0.7022628784179688, 0.12690162658691406, 3.0970916748046875, -0.6056785583496094, 0.1705608367919922, 1.4747467041015625, -1.1420974731445312, 2.71337890625, -0.12047767639160156, 2.01519775390625, 0.19350433349609375, 2.5401763916015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000216.npy"}
{"epoch": 0.32653061224489793, "step": 217, "batch_size": 64, "mean": 1.4241769313812256, "std": 1.6076887845993042, "min": -1.3749542236328125, "p10": -0.11366882324218744, "median": 0.9827289581298828, "p90": 3.1896741867065432, "max": 7.3515167236328125, "pos_frac": 0.859375, "sample": [1.0806655883789062, 4.578990936279297, -0.14080047607421875, 0.6660385131835938, 0.3712348937988281, -0.035991668701171875, 0.4650745391845703, 7.3515167236328125, 0.18707275390625, 1.0225830078125, 3.8750457763671875, 2.9699249267578125, 0.8832778930664062, 0.9255142211914062, 2.1819305419921875, 0.3801727294921875, 1.258575439453125, 0.6732597351074219, 0.520660400390625, 1.2418060302734375, 0.31534767150878906, 1.6417312622070312, 1.30718994140625, 2.069683074951172, 2.2215652465820312, 2.825559616088867, -1.095306396484375, 0.7579193115234375, 0.6714591979980469, 2.6309890747070312, 3.2326908111572266, 1.7944564819335938, 1.7061767578125, -0.39879608154296875, 1.9290924072265625, -0.5920124053955078, 0.6349678039550781, 3.640432357788086, 0.3357696533203125, 2.0410079956054688, -1.3749542236328125, 3.0893020629882812, 1.78863525390625, -0.7452774047851562, 0.6014404296875, 0.7373580932617188, 0.7672042846679688, 0.5484619140625, -0.05036163330078125, 6.7341156005859375, 0.6423568725585938, 2.699542999267578, 0.9428749084472656, 0.48839569091796875, 2.9149703979492188, 1.5478668212890625, 3.4287109375, 1.802337646484375, 0.6410598754882812, 3.0138931274414062, 0.5114898681640625, 1.6430511474609375, 1.6647758483886719, -1.0164031982421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000217.npy"}
{"epoch": 0.328042328042328, "step": 218, "batch_size": 64, "mean": 0.6365566253662109, "std": 1.4976263046264648, "min": -3.8475112915039062, "p10": -0.9580259323120117, "median": 0.5344429016113281, "p90": 2.494676208496094, "max": 4.3049163818359375, "pos_frac": 0.671875, "sample": [0.6456222534179688, 0.5123672485351562, -0.8757171630859375, 1.1901378631591797, 0.244354248046875, 0.4581451416015625, -0.00350189208984375, 0.08209228515625, -0.7557830810546875, 1.341949462890625, 0.6187591552734375, 0.9844646453857422, 1.9314193725585938, -2.1980743408203125, -0.9733009338378906, -0.9223842620849609, 0.48924827575683594, 2.5081253051757812, -1.897918701171875, -0.08649444580078125, 1.5904541015625, 1.3671798706054688, 0.2512969970703125, 0.038120269775390625, 1.6682891845703125, 4.3049163818359375, 2.1710968017578125, 3.037353515625, 1.182382583618164, 1.8496246337890625, -0.41832542419433594, 2.50830078125, 0.7846298217773438, -1.6903724670410156, -0.5037212371826172, -0.11679840087890625, 1.2741470336914062, 0.3627204895019531, 0.5565185546875, 2.030242919921875, 3.6532020568847656, 1.3787689208984375, -0.19704437255859375, -0.2625007629394531, 0.5922203063964844, -0.46523475646972656, 1.7095813751220703, 0.4995594024658203, -1.3440780639648438, -0.8038253784179688, 0.8268680572509766, -1.7215347290039062, 2.4632949829101562, 0.14483642578125, 1.2589340209960938, 1.997039794921875, 1.5026741027832031, 4.133941650390625, -3.8475112915039062, -0.2987632751464844, 0.22055435180664062, -0.422576904296875, 1.33941650390625, 2.8402328491210938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000218.npy"}
{"epoch": 0.3295540438397581, "step": 219, "batch_size": 64, "mean": 0.6701398491859436, "std": 1.1450579166412354, "min": -1.78839111328125, "p10": -0.6420206069946288, "median": 0.6526336669921875, "p90": 2.158463287353516, "max": 3.526153564453125, "pos_frac": 0.6875, "sample": [2.0390472412109375, 1.5705223083496094, 1.696044921875, 0.9442996978759766, 0.28728485107421875, 0.14581298828125, -0.32769775390625, -0.3012046813964844, 1.9973907470703125, -1.5439453125, -0.09509086608886719, 2.8416671752929688, -0.252716064453125, -0.3164386749267578, -0.33252716064453125, 1.4490509033203125, 0.5804519653320312, 1.2665081024169922, 0.45407867431640625, 0.13062286376953125, 0.42840003967285156, 0.35109710693359375, -1.4534912109375, 0.5905113220214844, 0.8939361572265625, -0.6914634704589844, 0.7638702392578125, -0.5952091217041016, 0.8196868896484375, 1.1639480590820312, 0.8120956420898438, 2.4395523071289062, 2.196990966796875, 2.498302459716797, 1.5862312316894531, 2.0685653686523438, -0.110504150390625, 0.9003753662109375, -0.40093994140625, 0.1101226806640625, -0.10047531127929688, 0.4668426513671875, -0.5410232543945312, 1.0658416748046875, 1.1253433227539062, -0.9478683471679688, 1.5465927124023438, -1.78839111328125, 0.7147560119628906, 3.526153564453125, 0.5753326416015625, 1.1130867004394531, 1.121734619140625, -0.39171600341796875, -1.5681495666503906, -0.11132431030273438, 2.2988739013671875, 1.44683837890625, 1.5479278564453125, -0.6620826721191406, 1.4381256103515625, 0.101898193359375, 2.8997840881347656, 1.405609130859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000219.npy"}
{"epoch": 0.3310657596371882, "step": 220, "batch_size": 64, "mean": 0.8476879596710205, "std": 1.4051867723464966, "min": -1.9467811584472656, "p10": -0.7706426620483398, "median": 0.7268657684326172, "p90": 2.509078025817871, "max": 4.473411560058594, "pos_frac": 0.703125, "sample": [-0.5475082397460938, -0.0671234130859375, 0.34104156494140625, 0.8621063232421875, 0.5296535491943359, 2.4346694946289062, 3.728515625, 0.78533935546875, 1.3099594116210938, 1.3288955688476562, -0.6272430419921875, 4.473411560058594, -0.003662109375, -1.1735763549804688, -0.2978973388671875, 0.2942466735839844, 3.4162216186523438, -0.46253204345703125, -0.5382881164550781, 0.5855751037597656, 1.8124103546142578, 0.45844268798828125, -0.6238956451416016, -1.188608169555664, 1.032562255859375, -0.7931995391845703, 0.3356037139892578, -1.7314376831054688, 2.15625, 1.4802398681640625, 1.74774169921875, 0.07378387451171875, -0.161102294921875, 2.3777599334716797, 3.0255393981933594, 0.6683921813964844, 1.129425048828125, 2.491514205932617, 2.972017288208008, 0.225677490234375, 0.2763938903808594, 1.3933906555175781, 0.39313507080078125, 1.0428733825683594, 2.3519210815429688, -0.7180099487304688, 1.9722747802734375, -0.09499549865722656, 1.7417926788330078, 1.4497947692871094, 1.4941825866699219, -1.9467811584472656, 1.9165058135986328, 0.36435508728027344, 0.6037750244140625, 0.9011077880859375, -1.8469352722167969, 2.5166053771972656, 1.6985015869140625, -0.32526397705078125, 3.7305908203125, -1.1072463989257812, 1.6792716979980469, 0.9038677215576172], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000220.npy"}
{"epoch": 0.3325774754346183, "step": 221, "batch_size": 64, "mean": 0.948039174079895, "std": 1.477738618850708, "min": -1.9459075927734375, "p10": -0.6698572158813474, "median": 0.8728065490722656, "p90": 2.934378814697266, "max": 5.3140411376953125, "pos_frac": 0.75, "sample": [-0.37714576721191406, 0.6223201751708984, -0.23433685302734375, 0.460662841796875, -0.16524314880371094, 1.7306900024414062, 0.7937507629394531, -0.819580078125, 1.7964859008789062, -0.45345306396484375, 0.14113235473632812, 1.5161819458007812, 1.1683311462402344, 0.8797149658203125, -1.1113204956054688, 0.10149002075195312, -0.39252471923828125, 0.4870643615722656, 1.0494651794433594, 0.87042236328125, 1.6704692840576172, 1.17840576171875, 2.86053466796875, 1.6446533203125, 3.079132080078125, 1.7006072998046875, 1.3950614929199219, 4.134593963623047, -1.2029647827148438, 0.22128677368164062, 1.915069580078125, 2.9660263061523438, 0.06554985046386719, 1.4756507873535156, 4.8355865478515625, 0.8751907348632812, 2.25616455078125, -1.1224555969238281, 1.054311752319336, 1.025177001953125, 1.4490585327148438, -1.2945442199707031, 1.1528282165527344, -0.04834556579589844, -0.03938102722167969, 0.030261993408203125, 0.12935447692871094, 4.22039794921875, 0.445953369140625, -0.2522468566894531, -0.34393310546875, -1.9459075927734375, 0.3155975341796875, 4.2557373046875, 1.444122314453125, -0.7626018524169922, 1.0712966918945312, 1.2279815673828125, 5.3140411376953125, 0.206573486328125, 0.0551910400390625, 1.0151596069335938, 2.2500076293945312, 0.6857452392578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000221.npy"}
{"epoch": 0.3340891912320484, "step": 222, "batch_size": 64, "mean": 1.08461332321167, "std": 1.3748630285263062, "min": -2.1365737915039062, "p10": -0.8131027221679686, "median": 1.043710708618164, "p90": 2.54303092956543, "max": 4.3541259765625, "pos_frac": 0.8125, "sample": [-0.8641872406005859, -2.1365737915039062, 1.527994155883789, 2.44342041015625, 1.8177108764648438, 2.5528717041015625, 2.520069122314453, 0.4982948303222656, 2.2256011962890625, 3.190937042236328, 1.191925048828125, 0.8674182891845703, -0.5675201416015625, 0.060306549072265625, 1.7695560455322266, -0.7007904052734375, 1.5952682495117188, 1.0024299621582031, 0.03663825988769531, 1.6323051452636719, 0.9340438842773438, 0.2700634002685547, 1.4611358642578125, 0.731292724609375, -1.4790992736816406, 1.1999588012695312, 0.7876663208007812, 4.152984619140625, -0.3063621520996094, 1.2070083618164062, 1.2268524169921875, 1.001800537109375, 1.6428070068359375, 2.3105335235595703, 2.3892059326171875, 2.12042236328125, 0.8251266479492188, 0.18239593505859375, -0.861236572265625, 0.2822113037109375, -1.4094619750976562, 2.1129379272460938, 0.21239471435546875, 2.1534652709960938, 1.084991455078125, 2.50384521484375, 0.6845932006835938, -0.14962005615234375, -0.9444580078125, 0.8994369506835938, 0.8113174438476562, 0.3950939178466797, 2.8935546875, 3.426687240600586, 0.629638671875, 0.5873870849609375, 1.2908248901367188, -0.5067977905273438, 2.11322021484375, 3.9089508056640625, 1.8982391357421875, 1.1054668426513672, 4.3541259765625, -1.3830718994140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000222.npy"}
{"epoch": 0.3356009070294785, "step": 223, "batch_size": 64, "mean": 1.1995022296905518, "std": 1.395774245262146, "min": -1.6523246765136719, "p10": -0.4955743789672852, "median": 1.3359479904174805, "p90": 2.6970088958740237, "max": 5.132236480712891, "pos_frac": 0.765625, "sample": [1.350748062133789, 1.7754383087158203, 2.0203933715820312, 1.9065780639648438, -0.772613525390625, 2.7119102478027344, -0.6796875, 0.7011566162109375, 0.9546585083007812, 1.3802165985107422, -0.22298049926757812, 4.4832611083984375, 1.3783206939697266, 1.6732254028320312, -1.6523246765136719, -0.4978065490722656, 0.21426963806152344, 1.241363525390625, 1.65045166015625, -0.19784927368164062, 1.4310646057128906, 1.911407470703125, -0.07803153991699219, 1.1014366149902344, 0.10646820068359375, 1.5651397705078125, 0.5761604309082031, 2.2902069091796875, -0.5172100067138672, -0.395111083984375, 1.1663932800292969, 2.2719192504882812, -0.22743988037109375, 1.3211479187011719, 1.8168487548828125, 3.8382911682128906, 2.6622390747070312, 2.442028045654297, 0.08333587646484375, -0.08626174926757812, -0.08990478515625, 0.0478973388671875, 1.60540771484375, 0.47383880615234375, 3.308868408203125, 0.5418624877929688, 4.3173828125, 2.463499069213867, 1.7722625732421875, 2.303365707397461, 5.132236480712891, -1.3150558471679688, 1.4274749755859375, 3.1828460693359375, 1.26190185546875, 1.8809967041015625, 1.63775634765625, 1.5068817138671875, 0.6897010803222656, -0.49036598205566406, -0.617340087890625, 2.5774993896484375, 0.18907928466796875, 0.26129150390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000223.npy"}
{"epoch": 0.3371126228269085, "step": 224, "batch_size": 64, "mean": 0.6505719423294067, "std": 1.1250476837158203, "min": -1.4904708862304688, "p10": -0.7208635330200194, "median": 0.4714698791503906, "p90": 2.275558853149415, "max": 4.156055450439453, "pos_frac": 0.703125, "sample": [0.7379570007324219, 2.6634292602539062, 2.442840576171875, 0.1920623779296875, 0.29583740234375, 2.4990501403808594, 1.4537506103515625, 0.60980224609375, 0.11254119873046875, 0.779022216796875, 1.5169868469238281, 0.7078647613525391, 1.9124526977539062, 2.893878936767578, -0.20727920532226562, 0.22520065307617188, 0.3216514587402344, 0.6729812622070312, 2.40191650390625, 0.526763916015625, 0.124603271484375, 1.8056793212890625, -0.0412139892578125, -1.2355918884277344, -0.771728515625, -1.0082740783691406, -0.03105926513671875, -1.3516082763671875, 0.968902587890625, 1.0422515869140625, 1.0436859130859375, -0.24050331115722656, -0.6021785736083984, -0.01111602783203125, -1.4904708862304688, -0.05621910095214844, 0.11263847351074219, -0.8223419189453125, 1.4046249389648438, 0.24692535400390625, -0.510284423828125, 0.9628982543945312, 0.8255424499511719, -0.4113597869873047, 0.4235382080078125, 0.5194015502929688, 2.0863819122314453, 0.3117561340332031, -0.4573516845703125, 0.2806282043457031, -0.30093955993652344, 0.9333744049072266, 1.890249252319336, 1.3649711608886719, 4.156055450439453, 0.39040374755859375, 0.78570556640625, 1.7514495849609375, -0.09685516357421875, 2.088176727294922, -0.956298828125, 2.355865478515625, 0.37359619140625, 1.0239830017089844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000224.npy"}
{"epoch": 0.3386243386243386, "step": 225, "batch_size": 64, "mean": 1.0904691219329834, "std": 1.5463502407073975, "min": -2.208465576171875, "p10": -0.8819355010986328, "median": 0.7603940963745117, "p90": 3.1672147750854496, "max": 5.6392669677734375, "pos_frac": 0.78125, "sample": [0.0511016845703125, 1.1262969970703125, -0.8820648193359375, -0.1891632080078125, 1.52435302734375, 3.4263916015625, 0.292877197265625, 0.028331756591796875, 0.7199344635009766, 1.8074951171875, 0.9692840576171875, 2.0689697265625, 0.577667236328125, 2.612640380859375, -0.8816337585449219, 0.7136726379394531, 0.9647254943847656, 2.3341903686523438, 0.35578346252441406, 2.608612060546875, 3.187082290649414, 1.8663101196289062, 3.1897354125976562, 0.8781051635742188, 5.6392669677734375, 0.5060386657714844, -0.4976806640625, 0.5344123840332031, 2.991422653198242, -0.5504913330078125, 0.47919464111328125, -2.208465576171875, 1.034515380859375, -0.8879280090332031, 2.8206558227539062, 1.6868133544921875, 0.22215652465820312, 4.5650634765625, 0.759185791015625, 0.08041954040527344, -0.476806640625, 0.5400314331054688, 3.2538185119628906, -0.07626152038574219, 3.7518768310546875, 0.4690093994140625, -0.92138671875, -0.9394721984863281, 3.018146514892578, 0.4276313781738281, 1.8442306518554688, 0.19410133361816406, 1.8247756958007812, 2.0791702270507812, 2.3889617919921875, -0.024965286254882812, 0.42726898193359375, 1.7966461181640625, 3.1208572387695312, 0.7616024017333984, -2.0251617431640625, 1.5083465576171875, 1.4031829833984375, -1.0808563232421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000225.npy"}
{"epoch": 0.3401360544217687, "step": 226, "batch_size": 64, "mean": 0.9399595260620117, "std": 1.4341615438461304, "min": -2.1148681640625, "p10": -0.6890628814697264, "median": 0.6285438537597656, "p90": 3.0030303955078126, "max": 5.119178771972656, "pos_frac": 0.765625, "sample": [0.57196044921875, -0.9158477783203125, 1.2222900390625, 1.684286117553711, 0.41016387939453125, 0.49716949462890625, 5.119178771972656, 0.9655933380126953, 0.2868671417236328, -1.2749691009521484, 1.161691665649414, 0.5045337677001953, 1.0206375122070312, 2.067960739135742, -0.7524871826171875, 0.09349822998046875, 1.431386947631836, 0.47003173828125, 3.096982955932617, 2.531402587890625, 3.1365890502929688, 0.510833740234375, 1.0374069213867188, 0.09059906005859375, 0.280609130859375, 0.29946136474609375, 0.4463672637939453, 3.4877471923828125, 1.9364166259765625, 2.9805679321289062, 0.2217998504638672, 1.4725189208984375, 0.5792655944824219, 0.39479827880859375, 3.0126571655273438, -0.49735260009765625, 2.569629669189453, -2.1148681640625, -0.30712890625, -0.17002105712890625, 0.480926513671875, 1.922515869140625, -0.3967170715332031, -1.6298675537109375, 2.7516403198242188, 3.071746826171875, -1.8379974365234375, 1.9110107421875, -1.3046379089355469, 0.7948684692382812, 1.087310791015625, 0.6778221130371094, 0.4916648864746094, 1.4755859375, 1.364654541015625, 3.7499771118164062, 1.643310546875, 1.7038955688476562, -0.21019744873046875, -0.1819782257080078, 2.876068115234375, -0.5410728454589844, -0.14708328247070312, 0.8437347412109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000226.npy"}
{"epoch": 0.3416477702191988, "step": 227, "batch_size": 64, "mean": 0.8698740005493164, "std": 1.521647334098816, "min": -2.713653564453125, "p10": -0.8364332199096677, "median": 0.6679573059082031, "p90": 2.755305862426758, "max": 4.682086944580078, "pos_frac": 0.6875, "sample": [2.4487457275390625, 0.9828453063964844, 0.17800140380859375, 2.1898651123046875, 0.36391448974609375, -0.4340324401855469, 1.88299560546875, 3.792165756225586, -1.2505645751953125, -0.9393939971923828, 2.7561721801757812, -0.08023834228515625, -0.5278110504150391, 2.753284454345703, 2.822643280029297, 0.9477653503417969, -0.10426139831542969, -0.04729461669921875, 2.4912872314453125, 0.6601104736328125, 0.675079345703125, 1.8284683227539062, 1.466522216796875, 0.7196712493896484, 3.3858261108398438, 1.3471221923828125, -0.42118072509765625, 1.3231048583984375, -0.39594078063964844, -0.03335762023925781, 1.9027557373046875, 3.662029266357422, 1.7263946533203125, -1.2223968505859375, -2.713653564453125, 1.5263099670410156, 4.682086944580078, -0.59619140625, 1.25439453125, 2.3047409057617188, 0.43474578857421875, 0.6608352661132812, 0.440399169921875, 0.18379974365234375, 1.7755584716796875, -0.12793731689453125, 0.17940521240234375, 1.026458740234375, -2.0013885498046875, -0.05986785888671875, -0.467041015625, 1.316732406616211, 2.34735107421875, 1.78460693359375, 0.343048095703125, 0.006072998046875, 0.1572723388671875, 0.3281822204589844, 1.7988052368164062, -2.1087188720703125, -0.12928009033203125, -1.7524795532226562, 3.737855911254883, 2.489532470703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000227.npy"}
{"epoch": 0.3431594860166289, "step": 228, "batch_size": 64, "mean": 0.960975706577301, "std": 1.6266067028045654, "min": -2.7865447998046875, "p10": -1.1777320861816405, "median": 0.9455509185791016, "p90": 3.0976135253906256, "max": 4.890565872192383, "pos_frac": 0.703125, "sample": [-0.008859634399414062, 1.6195545196533203, 1.5803604125976562, 4.890565872192383, 1.5366287231445312, 0.4160308837890625, 0.9152507781982422, 3.436269760131836, 0.538421630859375, -1.0826873779296875, 1.0387401580810547, -0.2931671142578125, 1.9886474609375, -2.7865447998046875, 0.8100051879882812, 0.22800636291503906, 1.2892799377441406, -0.85345458984375, 1.8301124572753906, 0.9779872894287109, -0.17411422729492188, 2.5584564208984375, -0.2670440673828125, 1.958770751953125, -1.206390380859375, 0.9411582946777344, 1.5741748809814453, 4.6319732666015625, 0.6017093658447266, 2.2791061401367188, 0.5147571563720703, -1.1108627319335938, 0.13544845581054688, -0.24962615966796875, 1.8833980560302734, -1.398366928100586, 1.218597412109375, -0.17951393127441406, -2.496551513671875, -1.3603572845458984, 1.265655517578125, 1.3345794677734375, 4.01019287109375, -1.2130622863769531, -0.0806884765625, 0.5159416198730469, 2.3868026733398438, 3.1605377197265625, -1.8245105743408203, 1.7055282592773438, 0.9413909912109375, 0.4925498962402344, 4.1472320556640625, 2.9543495178222656, -0.33956146240234375, 2.0157527923583984, 3.1478118896484375, 2.9804840087890625, 1.814208984375, 0.8006248474121094, 1.4622726440429688, 0.9497108459472656, 1.6759986877441406, -0.7272262573242188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000228.npy"}
{"epoch": 0.34467120181405897, "step": 229, "batch_size": 64, "mean": 0.8716517090797424, "std": 1.384008765220642, "min": -3.036510467529297, "p10": -0.9056106567382809, "median": 0.8274326324462891, "p90": 2.684086608886719, "max": 3.950958251953125, "pos_frac": 0.765625, "sample": [1.9121246337890625, 2.283926010131836, 1.281494140625, 0.6792678833007812, 1.6616764068603516, 2.704315185546875, 1.023702621459961, 0.46733665466308594, 0.1452503204345703, -1.0613555908203125, 0.24408721923828125, -0.4107666015625, 1.6665802001953125, -1.2202224731445312, 0.1688690185546875, 0.9007530212402344, 0.1141815185546875, -0.5360679626464844, 2.010986328125, 1.2689323425292969, 0.5172538757324219, 0.19463539123535156, -0.3450889587402344, 0.7818679809570312, -0.1829376220703125, 1.9756278991699219, -1.10577392578125, 0.3900184631347656, 0.7042865753173828, -1.5335159301757812, 1.507476806640625, 3.950958251953125, -3.036510467529297, 3.4738292694091797, -0.542205810546875, 0.6057758331298828, 1.8110275268554688, -1.2889060974121094, 1.6136245727539062, 1.363250732421875, 3.105255126953125, 1.7163066864013672, 0.4919261932373047, -0.140045166015625, 2.0007476806640625, -0.40831756591796875, 2.9440059661865234, 1.689718246459961, 1.8210735321044922, 0.8729972839355469, 1.0692214965820312, -0.3345375061035156, 1.0072784423828125, 0.573638916015625, 2.6368865966796875, 2.0242919921875, 0.5375595092773438, 0.4238395690917969, 2.9505748748779297, 3.58203125, 1.991943359375, 1.1377029418945312, 0.05138397216796875, -2.119539260864258], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000229.npy"}
{"epoch": 0.34618291761148906, "step": 230, "batch_size": 64, "mean": 0.9182896614074707, "std": 1.6196167469024658, "min": -2.6916275024414062, "p10": -0.6324577331542969, "median": 0.8276462554931641, "p90": 2.6940984725952157, "max": 5.9725341796875, "pos_frac": 0.765625, "sample": [-0.4204559326171875, 1.23468017578125, 0.3743324279785156, 1.0145072937011719, 0.46282958984375, -2.674102783203125, -0.5075054168701172, -0.6174240112304688, 1.5596466064453125, 1.8872146606445312, 1.8266525268554688, 2.3092269897460938, 1.2648735046386719, 2.8034210205078125, 0.8271942138671875, 1.0927562713623047, 1.7355880737304688, 1.9661216735839844, -1.2203750610351562, 0.8563671112060547, -2.6916275024414062, 0.13305282592773438, 0.615997314453125, 2.2786598205566406, 1.72821044921875, -1.1112747192382812, 1.4718017578125, 1.2646713256835938, 3.3221969604492188, 1.5676956176757812, 2.4390125274658203, 0.885711669921875, -0.5249404907226562, 0.1463336944580078, -0.33001708984375, -0.6389007568359375, 0.8280982971191406, 0.04313087463378906, 5.9725341796875, 0.688934326171875, 0.6976299285888672, 1.3363189697265625, -0.0054168701171875, 0.1923370361328125, 3.6634063720703125, 2.0330276489257812, -2.059804916381836, 5.528572082519531, 0.0073699951171875, 0.6512794494628906, 4.5059356689453125, -2.1140594482421875, 1.3344249725341797, 2.866891860961914, 0.6488513946533203, 1.4650192260742188, 0.9001235961914062, -0.037136077880859375, 0.5682334899902344, 0.6162643432617188, 0.7365169525146484, 1.5012359619140625, -0.5259628295898438, 0.3946495056152344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000230.npy"}
{"epoch": 0.3476946334089191, "step": 231, "batch_size": 64, "mean": 1.3306910991668701, "std": 1.3668016195297241, "min": -1.6827239990234375, "p10": -0.2606021881103515, "median": 1.3011131286621094, "p90": 3.1067817687988284, "max": 4.425609588623047, "pos_frac": 0.859375, "sample": [1.5527801513671875, -0.23825454711914062, 1.6469306945800781, 1.5004997253417969, 0.17116546630859375, 2.179973602294922, 1.1439247131347656, 2.328704833984375, -0.27017974853515625, 0.4149188995361328, 1.104959487915039, 0.3025665283203125, 0.07520294189453125, 0.18363189697265625, -1.5935821533203125, 1.2719955444335938, 1.5394134521484375, 2.9942665100097656, 1.4743804931640625, -0.8364105224609375, 3.8773345947265625, -0.3261528015136719, 0.8169784545898438, 2.0458412170410156, 0.6350955963134766, 3.0236053466796875, -0.6163787841796875, 0.04247093200683594, 4.425609588623047, 1.6013908386230469, 2.1566009521484375, 0.4017524719238281, 3.088287353515625, 3.61590576171875, 2.780193328857422, 2.394561767578125, 0.4112415313720703, 1.2961044311523438, 3.15179443359375, 0.29790496826171875, 3.091064453125, 1.306121826171875, 1.5251293182373047, 2.1878662109375, 3.1135177612304688, 0.46300506591796875, 2.3182144165039062, -0.22416114807128906, 0.5367012023925781, -0.6647300720214844, 2.0226306915283203, 1.824737548828125, 1.0708084106445312, 2.1427993774414062, 1.5706710815429688, 4.005683898925781, 1.257537841796875, 0.6864299774169922, 0.35565185546875, 0.3874664306640625, -1.6827239990234375, 0.05561065673828125, 2.0477657318115234, 3.6993961334228516], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000231.npy"}
{"epoch": 0.3492063492063492, "step": 232, "batch_size": 64, "mean": 1.062428593635559, "std": 1.6490226984024048, "min": -2.717742919921875, "p10": -1.0155879974365232, "median": 0.7918376922607422, "p90": 3.246869659423828, "max": 5.3212890625, "pos_frac": 0.734375, "sample": [0.43135833740234375, 3.282672882080078, 1.9264907836914062, 4.484886169433594, 1.189504623413086, 3.84185791015625, 0.01663970947265625, 3.260753631591797, 0.41623687744140625, -1.4589996337890625, -0.8423423767089844, 0.860870361328125, -0.12295150756835938, 0.9046554565429688, 0.8787193298339844, 0.55224609375, 0.5485286712646484, 1.1938533782958984, 0.8773269653320312, 0.7777442932128906, -0.2425079345703125, 4.47003173828125, 1.713531494140625, 2.2591552734375, 2.579803466796875, 0.8685111999511719, -1.3397941589355469, 3.2144737243652344, -0.6634330749511719, 1.7791824340820312, 0.11685371398925781, 2.7920379638671875, -1.3161182403564453, -0.1118621826171875, 2.081085205078125, 0.421173095703125, 0.5770072937011719, -1.0898361206054688, 2.3583526611328125, -0.044952392578125, 1.1528968811035156, 3.044769287109375, 0.03923797607421875, 5.3212890625, 2.6701812744140625, 4.427825927734375, -1.1466922760009766, -0.15821075439453125, -0.0655975341796875, 0.6436233520507812, 0.8059310913085938, 2.2335433959960938, 2.7921905517578125, 1.2333526611328125, 0.7337226867675781, -1.8478164672851562, -0.08484268188476562, -0.171417236328125, 0.158966064453125, 0.6067352294921875, 2.0381927490234375, -2.717742919921875, 2.200592041015625, 0.6419563293457031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000232.npy"}
{"epoch": 0.3507180650037793, "step": 233, "batch_size": 64, "mean": 0.7533849477767944, "std": 1.626981258392334, "min": -1.9990234375, "p10": -1.0078712463378907, "median": 0.3811817169189453, "p90": 3.0085388183593764, "max": 7.2542724609375, "pos_frac": 0.703125, "sample": [7.2542724609375, 0.30214691162109375, 3.315704345703125, -0.058246612548828125, 0.8048820495605469, 2.314363479614258, 1.3354415893554688, 0.7081165313720703, 0.7815742492675781, -1.0078659057617188, 1.0800094604492188, 3.96746826171875, 0.010135650634765625, -0.18753814697265625, -1.3852005004882812, 2.023345947265625, 1.5997676849365234, 0.3403797149658203, -0.41442108154296875, 0.9195690155029297, 1.67681884765625, 1.2435760498046875, 1.3521270751953125, 0.45269012451171875, -1.2919921875, 1.1372756958007812, -1.7841606140136719, 1.4116783142089844, -1.6747970581054688, -1.1818313598632812, 2.628204345703125, 0.39142608642578125, 0.8865127563476562, 0.3226661682128906, 3.171539306640625, -1.9990234375, 1.7096939086914062, 0.1633625030517578, 0.12776947021484375, 0.12256431579589844, 1.247772216796875, -0.768585205078125, 4.583404541015625, -0.51556396484375, 0.23506927490234375, 0.043670654296875, -1.00787353515625, 0.0389251708984375, 2.0679931640625, 1.12835693359375, -0.7535247802734375, -0.2711372375488281, 0.2655487060546875, 2.5714035034179688, 3.471160888671875, 3.3359527587890625, 0.3838081359863281, -0.5563087463378906, 0.3785552978515625, 0.1879730224609375, -0.12318038940429688, -0.5623512268066406, -0.17908477783203125, 0.44464874267578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000233.npy"}
{"epoch": 0.35222978080120937, "step": 234, "batch_size": 64, "mean": 0.575881838798523, "std": 1.4742646217346191, "min": -2.2451705932617188, "p10": -1.3583438873291016, "median": 0.533172607421875, "p90": 2.4988830566406253, "max": 4.2462158203125, "pos_frac": 0.609375, "sample": [0.03810882568359375, -1.5404205322265625, -0.9994583129882812, -0.30279541015625, 0.4936962127685547, -0.8703460693359375, 0.6927299499511719, -1.3291015625, 1.551055908203125, -1.7328224182128906, 1.449249267578125, 0.6423664093017578, 0.9948348999023438, 1.7449874877929688, 2.1401290893554688, -0.4445915222167969, -1.3708763122558594, -0.7975654602050781, 1.8934478759765625, -0.22110939025878906, 0.6485004425048828, 0.5267143249511719, 3.5393524169921875, 4.2462158203125, -0.3681755065917969, -0.9628753662109375, 1.9748497009277344, -0.23601341247558594, -2.2451705932617188, -0.45423126220703125, 2.4567031860351562, 1.5071334838867188, 3.0311241149902344, 2.5169601440429688, -0.0071544647216796875, -1.4884681701660156, 0.798797607421875, 0.03643035888671875, 0.36051368713378906, -0.8284149169921875, 2.3386993408203125, 1.6877365112304688, 0.5396308898925781, 3.0721511840820312, 2.1506805419921875, 0.5964584350585938, 0.18352127075195312, 2.1427268981933594, 2.541412353515625, -0.5124530792236328, -0.1224212646484375, 0.5131378173828125, -2.1478195190429688, 3.3031654357910156, -0.604705810546875, 0.6619052886962891, 1.0724639892578125, 1.3176345825195312, -1.7820358276367188, 0.6303253173828125, -0.26630401611328125, 1.3194084167480469, 1.5198020935058594, -0.38299560546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000234.npy"}
{"epoch": 0.35374149659863946, "step": 235, "batch_size": 64, "mean": 1.2960199117660522, "std": 1.20405912399292, "min": -1.0869483947753906, "p10": -0.18640594482421874, "median": 1.082275390625, "p90": 2.9561157226562504, "max": 4.336881637573242, "pos_frac": 0.84375, "sample": [1.2663917541503906, 2.3540725708007812, 0.8886184692382812, 2.4340991973876953, 3.0099945068359375, 0.396148681640625, 0.675994873046875, 3.812530517578125, 1.5000457763671875, 2.3892669677734375, -0.19077301025390625, 0.8903579711914062, 0.6965904235839844, 1.0800437927246094, -0.08160400390625, 2.2408084869384766, 2.350341796875, -0.3550071716308594, 0.9731502532958984, 0.5445938110351562, 1.2840499877929688, 1.0845069885253906, -0.74090576171875, 0.9553985595703125, 3.0697708129882812, 1.2769622802734375, -0.330474853515625, -0.14948272705078125, 4.336881637573242, 0.4620246887207031, 2.634593963623047, 0.5901622772216797, 1.6971874237060547, 3.7049560546875, 1.6995620727539062, 0.542266845703125, 2.266864776611328, 0.3407440185546875, 3.2899627685546875, -0.333251953125, 0.1890125274658203, 2.6388816833496094, 0.5072021484375, 2.018688201904297, 2.313751220703125, 1.39825439453125, 0.8376998901367188, 2.245899200439453, 2.8303985595703125, 0.7875804901123047, 1.1512985229492188, 0.899993896484375, 3.3993072509765625, 0.38802337646484375, 0.50750732421875, -0.4375038146972656, 1.5381851196289062, 1.4258918762207031, 1.585540771484375, -0.17621612548828125, 2.4810867309570312, 0.6372451782226562, 0.3070487976074219, -1.0869483947753906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000235.npy"}
{"epoch": 0.35525321239606955, "step": 236, "batch_size": 64, "mean": 1.062842607498169, "std": 1.8277244567871094, "min": -2.9657440185546875, "p10": -0.8905277252197266, "median": 0.8220357894897461, "p90": 3.1517974853515627, "max": 7.9812774658203125, "pos_frac": 0.734375, "sample": [1.2951927185058594, -1.0684738159179688, -0.5480575561523438, 2.9510765075683594, 1.8523120880126953, -0.897247314453125, 1.3388328552246094, 3.2398223876953125, 0.388580322265625, 0.8772926330566406, 1.3585891723632812, 1.0289306640625, -0.19016647338867188, 1.3397216796875, -0.6222114562988281, 0.69830322265625, 0.17452621459960938, 0.1069488525390625, 2.1896133422851562, 3.6289520263671875, 0.4329948425292969, 0.06140899658203125, 0.6416091918945312, 0.3267021179199219, 2.3291549682617188, 1.0612335205078125, 2.1186485290527344, 3.159332275390625, 3.7105865478515625, 2.480926513671875, 3.13421630859375, -1.1950912475585938, -0.593658447265625, 1.321807861328125, 5.6190338134765625, -0.6732635498046875, -0.593231201171875, 7.9812774658203125, 0.8126087188720703, 0.8314628601074219, -1.5137653350830078, 0.19537734985351562, 2.093158721923828, 1.9606208801269531, 0.8743515014648438, 0.7259292602539062, 0.6887130737304688, -0.8830642700195312, 2.1518707275390625, 0.07850837707519531, -0.01653289794921875, -2.89801025390625, -2.9657440185546875, 3.490337371826172, 2.8064193725585938, 2.9055404663085938, 3.0055389404296875, -0.8937263488769531, -0.19521331787109375, -0.3122062683105469, 2.287891387939453, 0.6531829833984375, 1.2282581329345703, 0.44419097900390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000236.npy"}
{"epoch": 0.35676492819349964, "step": 237, "batch_size": 64, "mean": 0.9867702722549438, "std": 1.70320725440979, "min": -3.50689697265625, "p10": -1.3750694274902342, "median": 0.9371910095214844, "p90": 3.242818450927735, "max": 6.4207916259765625, "pos_frac": 0.78125, "sample": [0.8443374633789062, 1.5106735229492188, 1.4515380859375, 2.7381019592285156, 0.6579132080078125, -1.8632049560546875, 0.7420578002929688, 3.138458251953125, 0.7266159057617188, 1.7567806243896484, 1.5145721435546875, 0.7305755615234375, -1.1968917846679688, -1.7129974365234375, 1.3661727905273438, 1.6211013793945312, -0.1797466278076172, 3.4995193481445312, -1.665496826171875, 1.035369873046875, -1.6811904907226562, 1.6447772979736328, 2.6428070068359375, 1.9227447509765625, 3.7048492431640625, -0.6966609954833984, -0.780914306640625, 1.5929031372070312, 1.0828857421875, 0.15176010131835938, 1.4013080596923828, 0.7111053466796875, 0.6383514404296875, -1.4514312744140625, 0.4170246124267578, 0.3795204162597656, 0.4823951721191406, -2.239166259765625, 4.061065673828125, -0.11838722229003906, 0.8513298034667969, 0.9621047973632812, 0.5800933837890625, 0.18445587158203125, 0.9122772216796875, 0.03169822692871094, 2.542001724243164, 1.3194122314453125, 4.5472412109375, 3.2875442504882812, 1.7057418823242188, 1.1901702880859375, 3.7052993774414062, 6.4207916259765625, -3.50689697265625, 1.2066841125488281, -0.31842613220214844, 2.5210647583007812, 1.4327583312988281, 0.6733245849609375, 0.127288818359375, 0.984588623046875, -0.277801513671875, 1.4893531799316406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000237.npy"}
{"epoch": 0.35827664399092973, "step": 238, "batch_size": 64, "mean": 0.903631329536438, "std": 1.4803690910339355, "min": -1.8264904022216797, "p10": -1.0633178710937496, "median": 0.8500118255615234, "p90": 2.5964637756347657, "max": 4.3355865478515625, "pos_frac": 0.71875, "sample": [-0.5577583312988281, 0.0523223876953125, 1.7007865905761719, -1.5943832397460938, 1.9289436340332031, 4.3355865478515625, 0.7161865234375, 0.2929534912109375, 1.7808761596679688, 0.2611827850341797, -0.70733642578125, 0.349761962890625, 1.6902923583984375, 1.122528076171875, 1.6497764587402344, 2.1673545837402344, -0.3333549499511719, -0.4878559112548828, 2.2218246459960938, 1.9203815460205078, 2.3486557006835938, 4.116310119628906, 1.8889999389648438, 1.5622138977050781, -0.16884422302246094, 0.8873405456542969, 1.1418914794921875, 1.20220947265625, -1.4049415588378906, 2.5929336547851562, 1.763763427734375, 2.222837448120117, 0.2126026153564453, 0.560302734375, -0.4407463073730469, 0.30524444580078125, -0.647796630859375, -0.11578941345214844, 2.5979766845703125, 3.304767608642578, 1.1814384460449219, -1.1704349517822266, 0.9905624389648438, -0.5193634033203125, 2.4738693237304688, 0.81268310546875, 3.9418487548828125, 3.2367782592773438, 0.09241294860839844, -0.82440185546875, -1.16571044921875, 1.6887664794921875, 2.467855453491211, -1.8264904022216797, -1.577310562133789, 0.1643810272216797, -1.5581512451171875, 1.3707046508789062, 2.83905029296875, 0.4858856201171875, 2.2073974609375, 0.2977142333984375, -0.6101112365722656, 0.3930320739746094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000238.npy"}
{"epoch": 0.35978835978835977, "step": 239, "batch_size": 64, "mean": 1.072688341140747, "std": 1.6260581016540527, "min": -3.6605224609375, "p10": -0.7466999053955078, "median": 0.9453887939453125, "p90": 3.3503173828125, "max": 4.795440673828125, "pos_frac": 0.75, "sample": [1.2468986511230469, 3.3276519775390625, -0.16094970703125, 0.6915130615234375, 2.4884185791015625, 3.5344772338867188, -0.7108993530273438, 0.31366729736328125, 0.43869781494140625, -0.1752471923828125, -1.24993896484375, 1.5982666015625, 0.5562171936035156, 2.250640869140625, 3.3600311279296875, -0.534149169921875, -0.5409126281738281, -1.646188735961914, 3.733673095703125, 1.68292236328125, 0.5573062896728516, 0.46471405029296875, 2.222513198852539, 3.2641220092773438, 3.236034393310547, 0.4893951416015625, 2.0597896575927734, 0.6692237854003906, 0.7486534118652344, 1.3645401000976562, 1.8054389953613281, 1.157857894897461, -0.5541839599609375, -0.22602272033691406, 1.0941276550292969, 4.08233642578125, 0.2492961883544922, 0.3801155090332031, -3.6605224609375, 1.8649444580078125, -0.7620429992675781, 1.6438255310058594, 1.1917953491210938, 0.88214111328125, -1.4376068115234375, 1.7781257629394531, 3.4463577270507812, 4.795440673828125, 3.6195220947265625, -0.5976982116699219, -0.30678749084472656, 1.008636474609375, -2.1663265228271484, 2.8868637084960938, -0.826904296875, 1.1366729736328125, 2.157564163208008, 0.6700000762939453, 0.3862762451171875, 1.83453369140625, 0.8552646636962891, 2.0081634521484375, 2.61962890625, 0.3541412353515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000239.npy"}
{"epoch": 0.36130007558578986, "step": 240, "batch_size": 64, "mean": 1.2287919521331787, "std": 1.7610327005386353, "min": -3.2599411010742188, "p10": -0.7645828247070312, "median": 0.9985980987548828, "p90": 3.2620079040527346, "max": 7.101320266723633, "pos_frac": 0.828125, "sample": [0.9958457946777344, -0.75592041015625, 0.11297607421875, 1.0013504028320312, 0.5555877685546875, 3.81658935546875, 0.62884521484375, 3.1776504516601562, -0.7682952880859375, 0.07599258422851562, 2.526214599609375, -0.3820362091064453, 1.2142372131347656, 4.048887252807617, 4.667423248291016, 4.843330383300781, 1.7076835632324219, -2.1974563598632812, 2.4849929809570312, 2.2546348571777344, -1.1781387329101562, 0.845916748046875, 0.7701549530029297, -0.9353866577148438, 1.6945953369140625, 7.101320266723633, 1.23760986328125, 1.2844696044921875, 3.5467147827148438, 2.3869476318359375, 2.6997222900390625, 2.4402008056640625, 0.43491363525390625, 1.35015869140625, 1.1559295654296875, 1.1771087646484375, 1.6582412719726562, 0.990386962890625, 1.4186782836914062, 0.411712646484375, 0.9733734130859375, 0.3714256286621094, -1.4199256896972656, 1.3997802734375, 2.683990478515625, -3.2599411010742188, 0.5089454650878906, 0.8280868530273438, 0.980682373046875, 0.5900039672851562, 1.983713150024414, 1.6486930847167969, -0.2114715576171875, 0.511077880859375, 0.08478546142578125, 0.5016880035400391, 2.8604698181152344, 3.2291793823242188, -0.656768798828125, 0.19444847106933594, 3.2760772705078125, 0.34090423583984375, -2.202892303466797, 2.9265670776367188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000240.npy"}
{"epoch": 0.36281179138321995, "step": 241, "batch_size": 64, "mean": 0.5624747276306152, "std": 1.390181064605713, "min": -2.403900146484375, "p10": -1.0651851654052735, "median": 0.39249610900878906, "p90": 2.3079929351806645, "max": 4.1405487060546875, "pos_frac": 0.625, "sample": [-1.6593475341796875, -0.469635009765625, 1.9544010162353516, -0.6197872161865234, 0.21887969970703125, -1.6868743896484375, 1.0952568054199219, 2.064350128173828, 1.6092910766601562, 4.1405487060546875, -0.8193283081054688, 2.344768524169922, -1.1411514282226562, 0.2927227020263672, 0.44506072998046875, 2.5807266235351562, 0.03173065185546875, 0.9991321563720703, -1.6804122924804688, -2.403900146484375, 1.9478111267089844, 2.1787872314453125, -0.7878570556640625, 0.9038963317871094, 1.2853164672851562, -0.8832874298095703, 1.778594970703125, 0.27222633361816406, -0.0325775146484375, -0.43487548828125, 2.2221832275390625, 1.5083160400390625, 3.12567138671875, 0.3854408264160156, -0.2207794189453125, -1.58349609375, -0.6082229614257812, 2.030517578125, -0.742645263671875, 1.0860862731933594, 0.87066650390625, 0.27367401123046875, -0.0706024169921875, 1.8134078979492188, 0.6950912475585938, 2.5007400512695312, -1.0625762939453125, 3.787078857421875, 1.1729354858398438, -0.7267532348632812, -0.77642822265625, 1.0689926147460938, 1.5010604858398438, -0.3328857421875, 2.59136962890625, 0.060649871826171875, -1.0663032531738281, 0.9444866180419922, 0.3995513916015625, 0.6046657562255859, 0.1982097625732422, 1.094228744506836, -0.19256591796875, -0.07784843444824219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000241.npy"}
{"epoch": 0.36432350718065004, "step": 242, "batch_size": 64, "mean": 1.2643779516220093, "std": 1.6315840482711792, "min": -1.8927230834960938, "p10": -0.47511405944824203, "median": 0.9411067962646484, "p90": 3.081828308105469, "max": 7.1363525390625, "pos_frac": 0.78125, "sample": [-1.8927230834960938, 1.0243911743164062, -0.00958251953125, 0.850006103515625, 2.4028244018554688, -1.2491645812988281, 3.0749282836914062, 2.907665252685547, 0.5551109313964844, 2.204803466796875, -0.5290374755859375, 1.5537853240966797, 0.28957557678222656, 2.9643020629882812, 3.031238555908203, -0.086334228515625, 3.5582714080810547, -0.05139923095703125, 0.9802932739257812, 0.8886070251464844, 0.6261367797851562, 2.9446640014648438, 3.411008834838867, 0.7697334289550781, 0.6508712768554688, 2.0123214721679688, -0.70269775390625, 0.2760009765625, 2.2834701538085938, 1.6401214599609375, 1.5241050720214844, 0.162689208984375, 5.7827301025390625, 1.2568435668945312, 1.5131359100341797, 3.3945999145507812, 0.39646148681640625, -0.3492927551269531, 7.1363525390625, 0.6865005493164062, 0.02338409423828125, 1.7522773742675781, 0.6628494262695312, -0.2630157470703125, 1.5342445373535156, -0.048290252685546875, 1.796966552734375, 2.5673370361328125, 2.024871826171875, 2.2544326782226562, 4.1293182373046875, 1.7433128356933594, 3.0847854614257812, -1.097097396850586, -0.041622161865234375, 1.24072265625, 0.7932281494140625, 0.8233928680419922, 1.3259124755859375, -0.9668197631835938, 0.01325225830078125, 0.3393669128417969, -1.5578575134277344, 0.9019203186035156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000242.npy"}
{"epoch": 0.36583522297808013, "step": 243, "batch_size": 64, "mean": 1.0429232120513916, "std": 1.522492527961731, "min": -1.9838409423828125, "p10": -0.6094781875610351, "median": 0.9249496459960938, "p90": 2.986466979980469, "max": 5.0324249267578125, "pos_frac": 0.734375, "sample": [-0.11956405639648438, 2.6651268005371094, 0.927276611328125, -1.9838409423828125, 4.302637100219727, 0.5348396301269531, -0.4842376708984375, 0.1334686279296875, 0.9560699462890625, -0.4498138427734375, 5.0324249267578125, 1.1240692138671875, 0.207550048828125, 1.3857269287109375, -0.30230712890625, 2.8313217163085938, 0.05904388427734375, 0.63671875, 1.3347415924072266, 0.915924072265625, 0.18651580810546875, 3.9483489990234375, 1.017547607421875, -1.3112468719482422, -0.77093505859375, 2.266782760620117, -0.05242156982421875, 1.3652267456054688, 1.7043838500976562, -0.081085205078125, 0.2025909423828125, -1.1864280700683594, 1.7358169555664062, -0.33953857421875, 1.5983943939208984, 2.7224960327148438, 1.8281707763671875, 2.7963638305664062, 0.157379150390625, 0.62542724609375, 3.515655517578125, 0.9226226806640625, 0.43688201904296875, 2.739866256713867, 2.9520416259765625, -0.12345504760742188, -0.2354869842529297, 1.2731761932373047, 3.231964111328125, 2.6846237182617188, 0.1764068603515625, 0.4296112060546875, 1.0101737976074219, 3.6899948120117188, -0.519378662109375, 2.4856338500976562, 1.5334510803222656, 1.1994514465332031, -0.6480922698974609, -1.4728431701660156, -1.6613616943359375, 3.001220703125, 1.618133544921875, 0.385833740234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000243.npy"}
{"epoch": 0.3673469387755102, "step": 244, "batch_size": 64, "mean": 1.2712348699569702, "std": 1.3493722677230835, "min": -1.7318496704101562, "p10": -0.08661060333251953, "median": 1.0394649505615234, "p90": 2.8977550506591805, "max": 6.856697082519531, "pos_frac": 0.859375, "sample": [2.5202484130859375, -1.7318496704101562, 1.5530776977539062, 0.5795669555664062, 2.2949981689453125, 0.9899101257324219, -0.5560417175292969, 2.5044174194335938, 0.9418067932128906, -0.051074981689453125, 0.7317943572998047, 1.3975181579589844, -1.056732177734375, 3.195343017578125, 1.19683837890625, 2.692718505859375, 0.6142921447753906, 4.314365386962891, -0.5025405883789062, 1.5353813171386719, 1.4185943603515625, 1.089019775390625, -0.8578834533691406, 1.5021991729736328, 1.5003471374511719, 1.3106613159179688, 0.853485107421875, 2.6942977905273438, 1.6525497436523438, 0.4002513885498047, 3.3124923706054688, 0.7868881225585938, -0.15146827697753906, 0.5831298828125, -0.08790206909179688, 0.3545398712158203, -0.08359718322753906, 1.2978782653808594, 2.02777099609375, 6.856697082519531, 0.3475151062011719, 1.332427978515625, 0.7849845886230469, 0.5462112426757812, 0.054775238037109375, 2.9849510192871094, 1.774139404296875, 3.1944751739501953, 0.5376205444335938, 0.4804649353027344, 0.25240135192871094, 3.0257797241210938, 1.9523162841796875, 1.2986373901367188, 2.162872314453125, 0.7901382446289062, 0.35880279541015625, 2.6202545166015625, 0.9622306823730469, 0.9882354736328125, 0.48638916015625, 0.8817214965820312, 2.628498077392578, 1.2891998291015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000244.npy"}
{"epoch": 0.3688586545729403, "step": 245, "batch_size": 64, "mean": 1.3489388227462769, "std": 1.7353373765945435, "min": -3.779571533203125, "p10": -0.49880867004394525, "median": 1.2143049240112305, "p90": 3.5949485778808596, "max": 5.662422180175781, "pos_frac": 0.796875, "sample": [0.7288131713867188, -0.630645751953125, 1.1625862121582031, 5.3232574462890625, 1.9974327087402344, 0.15557289123535156, 0.11219406127929688, 1.1314411163330078, 1.2589645385742188, 0.6203994750976562, 0.9965438842773438, 1.6620635986328125, 2.2164974212646484, 1.8814353942871094, 3.8026466369628906, 4.09063720703125, 0.030740737915039062, 2.0880680084228516, -0.800079345703125, 5.003726959228516, 3.6267929077148438, 1.1505355834960938, -0.112518310546875, 2.041706085205078, 1.8904304504394531, 2.2852630615234375, 0.5391845703125, -0.2585906982421875, 0.75250244140625, -0.4407005310058594, 3.5206451416015625, -1.2265682220458984, -0.047027587890625, 1.5001296997070312, 0.7737197875976562, -0.3544349670410156, -2.072662353515625, 1.56109619140625, 2.115863800048828, 4.307792663574219, 1.1375808715820312, 0.556427001953125, 5.662422180175781, 1.1454544067382812, -0.40924644470214844, 1.118124008178711, 3.419771194458008, 3.36474609375, -0.523712158203125, 1.24603271484375, 1.748077392578125, 1.1957664489746094, -3.779571533203125, 3.420106887817383, 3.0648956298828125, 1.5946159362792969, -1.8923110961914062, 2.0270137786865234, 1.470947265625, 1.2328433990478516, 1.8934745788574219, 1.56494140625, 0.5618896484375, 1.1263427734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000245.npy"}
{"epoch": 0.37037037037037035, "step": 246, "batch_size": 64, "mean": 1.3396670818328857, "std": 1.702824592590332, "min": -1.9295501708984375, "p10": -0.6131196975708006, "median": 1.2122259140014648, "p90": 3.7914127349853537, "max": 5.676116943359375, "pos_frac": 0.765625, "sample": [2.2656288146972656, 2.1844310760498047, 3.244182586669922, 4.178068161010742, 0.3563385009765625, 0.6438140869140625, -1.5895652770996094, 4.5873870849609375, 0.7166404724121094, -0.6544780731201172, 2.4246673583984375, 1.88238525390625, 0.11745452880859375, -0.3933372497558594, 2.77001953125, 1.3809471130371094, 1.2392215728759766, -0.7864780426025391, -1.24249267578125, -0.2120361328125, 0.920562744140625, 2.2720565795898438, 1.2957000732421875, 2.9274425506591797, 4.976531982421875, -0.9149932861328125, 1.7514801025390625, 5.501617431640625, 0.4146575927734375, 1.6060333251953125, 0.1724987030029297, 1.6899795532226562, 0.9660491943359375, 0.3806915283203125, 4.161712646484375, 1.141632080078125, 0.3641357421875, 3.066953659057617, 4.02593994140625, -0.216827392578125, 1.1852302551269531, -1.9295501708984375, -1.5725326538085938, 2.1024932861328125, 2.203632354736328, -0.1700897216796875, 1.3707122802734375, 2.5581283569335938, 0.98370361328125, -0.4071044921875, 0.7384033203125, -0.036041259765625, 1.7351913452148438, 5.676116943359375, 0.8429946899414062, 0.6680526733398438, 1.8500175476074219, 3.223846435546875, -0.5166168212890625, 2.267303466796875, 1.4974231719970703, 0.2857704162597656, 1.803802490234375, -0.2388458251953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000246.npy"}
{"epoch": 0.37188208616780044, "step": 247, "batch_size": 64, "mean": 0.6254061460494995, "std": 1.6183052062988281, "min": -4.246677398681641, "p10": -1.1742364883422851, "median": 0.6720008850097656, "p90": 2.3311519622802743, "max": 5.9844970703125, "pos_frac": 0.6875, "sample": [-0.3067474365234375, 0.6554336547851562, -1.1965713500976562, 0.9729385375976562, 1.5973243713378906, -2.3793869018554688, 1.0448760986328125, 2.7143096923828125, 3.233875274658203, 0.577545166015625, 2.8022918701171875, 0.8983688354492188, 1.8615398406982422, 1.6725387573242188, 0.1879425048828125, -0.436767578125, 1.0973033905029297, 2.162616729736328, 0.3178577423095703, 0.822296142578125, 5.9844970703125, 1.508087158203125, -2.8713722229003906, -1.122121810913086, 0.3731231689453125, 0.5579299926757812, 1.75225830078125, -0.2628593444824219, -1.370361328125, 0.183258056640625, 2.114593505859375, 1.1022758483886719, 0.13588714599609375, 0.321563720703125, 0.688568115234375, 0.49178123474121094, 1.046356201171875, -4.246677398681641, -1.0105133056640625, -0.7744388580322266, -0.45819091796875, 0.8508796691894531, -1.102142333984375, 3.0027999877929688, 4.1060791015625, -0.033168792724609375, 1.7415733337402344, 0.3020439147949219, -0.1756134033203125, 1.2315597534179688, -0.394622802734375, 1.3803157806396484, -0.07006072998046875, 0.24827003479003906, -1.6585311889648438, 1.7267704010009766, 0.9820384979248047, 2.0576953887939453, 1.1929550170898438, -0.5155029296875, -2.0177001953125, 1.3951797485351562, 2.40338134765625, 0.9285621643066406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000247.npy"}
{"epoch": 0.37339380196523053, "step": 248, "batch_size": 64, "mean": 1.0933786630630493, "std": 1.5615214109420776, "min": -1.755615234375, "p10": -0.8020988464355469, "median": 0.985600471496582, "p90": 3.066625213623047, "max": 5.248016357421875, "pos_frac": 0.765625, "sample": [1.3116531372070312, 0.6245651245117188, -1.755615234375, -1.3186187744140625, 3.7784957885742188, 1.3132553100585938, 2.8151702880859375, 1.4420223236083984, 1.0109119415283203, -1.140512466430664, 0.1250019073486328, 2.3064708709716797, 1.7609176635742188, -0.728790283203125, 1.6647109985351562, 0.8647842407226562, 3.04931640625, -0.6220779418945312, -1.671783447265625, 0.39626312255859375, -0.61871337890625, 0.35968780517578125, 1.3086624145507812, 1.1822967529296875, 3.03900146484375, 1.1063117980957031, 1.4023666381835938, 0.33173179626464844, 1.5521049499511719, 2.70391845703125, 0.5229110717773438, 3.7407455444335938, 0.05171012878417969, -0.4452953338623047, 2.931488037109375, 0.8033561706542969, -0.1708984375, 1.9802398681640625, 0.33046722412109375, 0.9602890014648438, 2.1257705688476562, 3.885019302368164, 0.6221389770507812, 5.248016357421875, 3.0740432739257812, 1.3430023193359375, 0.5332279205322266, -0.4433708190917969, 0.37944984436035156, 2.5295181274414062, 4.781211853027344, 0.8250083923339844, 1.9478607177734375, 0.48938751220703125, -1.1948280334472656, 1.5490169525146484, 1.06646728515625, 3.0971221923828125, 2.6309127807617188, -0.8102035522460938, 0.19627952575683594, -0.240875244140625, -1.17327880859375, -0.7831878662109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000248.npy"}
{"epoch": 0.3749055177626606, "step": 249, "batch_size": 64, "mean": 0.4600982069969177, "std": 1.6151224374771118, "min": -3.491912841796875, "p10": -1.9617694854736327, "median": 0.7012519836425781, "p90": 2.120216751098633, "max": 4.544563293457031, "pos_frac": 0.703125, "sample": [2.1894989013671875, -1.7984371185302734, 0.6974563598632812, 0.30279541015625, -0.1293621063232422, -1.9206390380859375, -1.9793968200683594, 1.6863021850585938, 0.652618408203125, 1.5408706665039062, 0.7812747955322266, 1.5890426635742188, 0.09229278564453125, 1.0192947387695312, -1.761138916015625, -2.3348541259765625, 0.16497802734375, -2.6701812744140625, -0.2669830322265625, -1.1519699096679688, 0.07782745361328125, -0.57232666015625, 0.768829345703125, 1.1996822357177734, 1.0240516662597656, 0.6165733337402344, 0.027189254760742188, -0.3444862365722656, 4.544563293457031, -0.7776870727539062, -0.95965576171875, 0.7687244415283203, 0.4841728210449219, 1.2926197052001953, 2.1353721618652344, -3.491912841796875, 0.6493453979492188, -0.43927001953125, 1.4813785552978516, -3.3914947509765625, 1.0661430358886719, 2.4492340087890625, 1.6410064697265625, 3.0836105346679688, 1.8192214965820312, 1.2988052368164062, -2.0729827880859375, 1.2295265197753906, 2.6125717163085938, 0.705047607421875, 0.2978401184082031, 1.4408378601074219, 2.028350830078125, 0.48372459411621094, 1.6918067932128906, 2.0848541259765625, 0.8445549011230469, 1.2672176361083984, 3.2382431030273438, -2.66766357421875, 0.3730621337890625, -0.6993026733398438, 1.5568313598632812, 1.8767852783203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000249.npy"}
{"epoch": 0.3764172335600907, "step": 250, "batch_size": 64, "mean": 1.2496273517608643, "std": 1.8470813035964966, "min": -3.7891845703125, "p10": -0.5717639923095702, "median": 0.9798851013183594, "p90": 3.5321687698364266, "max": 6.685604095458984, "pos_frac": 0.78125, "sample": [1.2537670135498047, -0.44420814514160156, -0.31637001037597656, 1.1568527221679688, 0.559234619140625, 3.372314453125, 1.8146495819091797, 3.6843414306640625, 2.46014404296875, -0.01557159423828125, 1.1970252990722656, 0.6111679077148438, 3.5940933227539062, 0.15877914428710938, 1.561676025390625, 0.5613994598388672, 0.845916748046875, -3.7891845703125, 1.330678939819336, 0.6291275024414062, 0.8309135437011719, -2.2232818603515625, 1.2659187316894531, -0.7822608947753906, 0.3458080291748047, -0.414215087890625, 3.754131317138672, 2.405832290649414, 0.11589813232421875, 5.0216064453125, -0.4567680358886719, 4.444793701171875, 3.3876781463623047, 0.41733551025390625, 2.4797306060791016, 0.15624237060546875, 0.2240142822265625, 1.5634078979492188, 6.685604095458984, 2.4146575927734375, -0.8674087524414062, -1.3011741638183594, 0.11285018920898438, 0.05157470703125, 3.2234649658203125, 0.8536167144775391, 6.420806884765625, 3.2835311889648438, 1.8617668151855469, 2.303434371948242, 0.962493896484375, -0.9291915893554688, 1.8646392822265625, -0.1848888397216797, -0.6210479736328125, 1.6871490478515625, 2.69635009765625, 1.6090774536132812, 2.424419403076172, 0.14596176147460938, -0.4069175720214844, 0.9972763061523438, 0.9071025848388672, 1.0183868408203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000250.npy"}
{"epoch": 0.3779289493575208, "step": 251, "batch_size": 64, "mean": 0.9481081962585449, "std": 1.7119187116622925, "min": -2.2786788940429688, "p10": -0.8613883972167968, "median": 0.5766134262084961, "p90": 3.64650802612305, "max": 5.828330993652344, "pos_frac": 0.71875, "sample": [0.1691722869873047, 4.360660552978516, 3.9604034423828125, -0.06354713439941406, 0.576202392578125, -0.06283950805664062, -1.5655288696289062, 1.8648796081542969, 0.38277435302734375, 4.940765380859375, -0.36049652099609375, -0.8710861206054688, 1.8358001708984375, -0.8740386962890625, 0.3472423553466797, -1.235586166381836, 0.4661407470703125, 4.412628173828125, 0.5770244598388672, -0.8387603759765625, 1.2522201538085938, 5.828330993652344, 1.22955322265625, -1.886871337890625, 0.767791748046875, -0.6306228637695312, 2.9140853881835938, 1.0838165283203125, -0.39007568359375, 0.149810791015625, 0.5563812255859375, 2.371795654296875, 0.15554428100585938, 0.7931251525878906, 1.2838211059570312, 1.31903076171875, 2.533782958984375, 0.23938751220703125, -0.6856746673583984, -0.47727203369140625, 2.823659896850586, 1.1806488037109375, 0.0924224853515625, 5.034021377563477, 0.5955352783203125, 0.5155487060546875, 1.3160476684570312, -0.4958038330078125, 0.9225006103515625, 1.24005126953125, 1.778228759765625, 2.00421142578125, 4.087635040283203, -1.005035400390625, 0.36052703857421875, -0.6635398864746094, 2.0153236389160156, -2.2786788940429688, 1.4582347869873047, -0.6865310668945312, 0.01282501220703125, 1.29400634765625, 2.145427703857422, 0.5018882751464844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000251.npy"}
{"epoch": 0.3794406651549509, "step": 252, "batch_size": 64, "mean": 0.98729407787323, "std": 1.7898131608963013, "min": -5.814056396484375, "p10": -0.6704027175903319, "median": 1.001999855041504, "p90": 3.1752021789550784, "max": 4.50982666015625, "pos_frac": 0.765625, "sample": [1.437570571899414, -4.14947509765625, -0.5269374847412109, -2.402587890625, 1.7466201782226562, 0.38077545166015625, 1.5981330871582031, 1.6678314208984375, 0.46407127380371094, 2.4193115234375, 4.3639678955078125, 1.2647590637207031, 0.01070404052734375, 2.8540420532226562, 3.474395751953125, 1.1524200439453125, 3.5660667419433594, -0.2445240020751953, 0.8200836181640625, -0.04368782043457031, -1.427621841430664, 0.1467876434326172, 3.2026519775390625, 1.6524829864501953, 1.5963287353515625, 1.2587966918945312, 0.9691009521484375, -0.7318878173828125, 2.690397262573242, -0.9090728759765625, 1.1417865753173828, 2.6299285888671875, 2.3251113891601562, 2.1817855834960938, 0.8150825500488281, 0.6841259002685547, 3.098175048828125, 1.0524749755859375, 0.8443202972412109, 0.020626068115234375, 0.4034080505371094, -0.23344039916992188, -0.052959442138671875, 0.5090675354003906, 1.3523178100585938, 4.3416748046875, -0.03749847412109375, 1.7063064575195312, -5.814056396484375, -0.28235435485839844, -0.46712684631347656, 0.52020263671875, 0.16152572631835938, 1.0348987579345703, 0.6802520751953125, 0.351165771484375, 2.5086517333984375, 3.3628158569335938, 1.0750885009765625, 1.5643653869628906, 4.50982666015625, 3.1111526489257812, -0.7388210296630859, 0.5254364013671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000252.npy"}
{"epoch": 0.38095238095238093, "step": 253, "batch_size": 64, "mean": 1.0598115921020508, "std": 1.5687987804412842, "min": -1.6081066131591797, "p10": -0.746432876586914, "median": 0.9173717498779297, "p90": 3.5113136291503912, "max": 5.53265380859375, "pos_frac": 0.71875, "sample": [0.9230308532714844, -0.9245376586914062, 3.5682411193847656, 0.15651321411132812, -0.36353492736816406, 1.7801399230957031, 0.592041015625, 2.5218353271484375, 1.3298149108886719, -0.12227821350097656, 1.5975475311279297, -1.09906005859375, 3.3784828186035156, 0.6952896118164062, 0.5502891540527344, -0.7509498596191406, -0.39947509765625, 1.5160655975341797, -1.3206787109375, 2.2582550048828125, 5.53265380859375, 1.1751880645751953, -0.4481964111328125, 1.1375999450683594, 0.6505546569824219, 0.638763427734375, 2.028566360473633, 1.69866943359375, 0.0260162353515625, 2.8203811645507812, 2.2697296142578125, 1.1465835571289062, 1.685302734375, 0.021915435791015625, 1.0326919555664062, 4.523468017578125, 0.3131866455078125, -1.2000770568847656, 3.765411376953125, 0.9335727691650391, 0.911712646484375, -0.36878204345703125, -0.1304779052734375, -1.6081066131591797, 2.0849838256835938, -0.28978538513183594, 0.2637615203857422, 1.3405723571777344, 4.56437873840332, -0.16045379638671875, 1.433380126953125, 3.8469619750976562, 4.168678283691406, 0.4763641357421875, 1.9275989532470703, 0.3576202392578125, 1.1728477478027344, -0.06780052185058594, 1.9700393676757812, -0.72821044921875, -0.7358932495117188, -0.8928928375244141, 0.6412391662597656, 2.0111923217773438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000253.npy"}
{"epoch": 0.382464096749811, "step": 254, "batch_size": 64, "mean": 1.184436559677124, "std": 1.8647323846817017, "min": -3.02679443359375, "p10": -1.1104373931884766, "median": 0.8932113647460938, "p90": 3.586948966979981, "max": 6.141794204711914, "pos_frac": 0.734375, "sample": [-0.3781280517578125, 0.954559326171875, 5.0911102294921875, 0.423004150390625, 0.21254730224609375, 0.6422805786132812, 2.598705291748047, 1.9227371215820312, 0.06669235229492188, -0.0259857177734375, -0.8019790649414062, 1.7130775451660156, 3.4383087158203125, 0.8691844940185547, 2.0194091796875, 2.075885772705078, 3.3426971435546875, 2.1039657592773438, -1.0729598999023438, 1.4748916625976562, 2.7858505249023438, 6.141794204711914, -1.392547607421875, 4.732372283935547, 0.915283203125, -1.4062118530273438, 3.72760009765625, 1.4500102996826172, 3.6506519317626953, 0.07541275024414062, 3.9563369750976562, -1.5228424072265625, 0.7222061157226562, -0.5323810577392578, -1.1264991760253906, 1.2303695678710938, -0.3344688415527344, 0.5320053100585938, -0.1598682403564453, 0.7932472229003906, 2.4943695068359375, 0.33391761779785156, 2.690563201904297, -1.3488197326660156, 0.7722377777099609, 2.8521957397460938, -1.0042915344238281, 3.3942489624023438, 1.761566162109375, 0.24180030822753906, 1.1249237060546875, -0.8265228271484375, 0.36618804931640625, 0.9462432861328125, 5.0780487060546875, 1.9999008178710938, 0.30005645751953125, 0.8711395263671875, 3.1762008666992188, 3.2456207275390625, 1.1060295104980469, -3.02679443359375, -0.36865234375, -1.2845611572265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000254.npy"}
{"epoch": 0.3839758125472411, "step": 255, "batch_size": 64, "mean": 1.044198751449585, "std": 1.654715895652771, "min": -2.980823516845703, "p10": -0.7420028686523437, "median": 0.9365997314453125, "p90": 3.590581512451172, "max": 4.984153747558594, "pos_frac": 0.71875, "sample": [4.001729965209961, -0.9269485473632812, 3.2227783203125, 0.19073486328125, 2.271636962890625, -1.628021240234375, -0.21954727172851562, 1.12469482421875, -0.22498512268066406, 1.3156280517578125, -0.772613525390625, -0.21763229370117188, 0.04149627685546875, 1.9891281127929688, -0.6705780029296875, 1.1268653869628906, 1.3017654418945312, 1.8101348876953125, 0.45520782470703125, -2.980823516845703, 3.3367462158203125, -0.26102447509765625, 1.075164794921875, 0.6288604736328125, 2.2620391845703125, 1.5219879150390625, -0.19530487060546875, 0.1287078857421875, 3.5739669799804688, 0.8924636840820312, 4.7677764892578125, 2.3850326538085938, -1.1778945922851562, 0.8611793518066406, 1.337249755859375, 1.1325416564941406, 1.9690685272216797, 3.6529006958007812, 3.5977020263671875, 1.0833797454833984, -0.019540786743164062, 1.1300888061523438, 1.5577239990234375, -2.2472457885742188, 0.6827678680419922, -0.5805187225341797, 0.5496826171875, 4.322608947753906, 0.35906219482421875, 0.6825027465820312, 0.7313880920410156, 2.3085098266601562, 0.8094024658203125, 4.984153747558594, 0.9807357788085938, 1.408132553100586, 0.38373565673828125, 2.450092315673828, 3.813201904296875, -0.6376934051513672, -1.0743865966796875, -0.20980072021484375, 1.2536773681640625, -0.5927505493164062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000255.npy"}
{"epoch": 0.3854875283446712, "step": 256, "batch_size": 64, "mean": 1.1683729887008667, "std": 1.7781788110733032, "min": -4.211494445800781, "p10": -0.4627708435058594, "median": 0.9006595611572266, "p90": 3.2868144989013675, "max": 5.6181640625, "pos_frac": 0.8125, "sample": [0.13138580322265625, 0.7564239501953125, 1.6273384094238281, 1.0809135437011719, 4.1352081298828125, 0.657501220703125, 0.07527351379394531, 3.0973434448242188, 0.9136619567871094, 0.7227554321289062, 0.07352066040039062, 0.18493080139160156, -0.101959228515625, 3.2993240356445312, 3.89141845703125, 5.4575347900390625, -0.28847503662109375, 0.8876571655273438, -4.211494445800781, 2.124042510986328, 1.0652122497558594, 0.1217498779296875, -0.7132453918457031, 1.1958847045898438, -1.7536544799804688, 0.5175666809082031, 1.0583362579345703, -0.45743560791015625, 1.8853607177734375, 1.0528221130371094, 1.0749588012695312, 2.4607887268066406, -0.01653289794921875, 1.8910160064697266, 2.2195663452148438, 1.6659774780273438, 2.605907440185547, 3.2570648193359375, -1.8705902099609375, 2.4143543243408203, -0.9695358276367188, 1.1482048034667969, 4.802734375, 0.1131591796875, 3.145254135131836, 0.062496185302734375, 1.770416259765625, 0.5723114013671875, 0.0941925048828125, 5.6181640625, 0.6304473876953125, 2.3680038452148438, -0.465057373046875, 0.19610595703125, 2.41864013671875, 0.2918128967285156, 4.6854248046875, 0.00548553466796875, 0.3047027587890625, 3.2576255798339844, -0.31598854064941406, 1.7458534240722656, -1.3251419067382812, 0.4311485290527344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000256.npy"}
{"epoch": 0.3869992441421013, "step": 257, "batch_size": 64, "mean": 1.5996819734573364, "std": 2.1583168506622314, "min": -4.361202239990234, "p10": -0.5483734130859373, "median": 1.2583627700805664, "p90": 4.576708984375, "max": 7.396759033203125, "pos_frac": 0.796875, "sample": [-0.6209335327148438, 1.191436767578125, 1.2604827880859375, 7.105010986328125, 4.59271240234375, 3.4709510803222656, -0.16521835327148438, 3.098360061645508, 0.1587066650390625, 2.649831771850586, 1.6986160278320312, 1.1537704467773438, 1.2562427520751953, -0.6799507141113281, 3.6361083984375, 4.660686492919922, 4.53936767578125, -0.37906646728515625, -2.58538818359375, 0.7800140380859375, 0.020111083984375, 0.9778404235839844, 1.98583984375, 0.3854198455810547, 1.2039566040039062, 0.221771240234375, 1.4131393432617188, 4.243354797363281, 1.93792724609375, 0.38117218017578125, 2.279176712036133, 4.201969146728516, 0.9870681762695312, 1.1329841613769531, 2.1824569702148438, 0.80999755859375, 1.977874755859375, 2.429656982421875, 4.642967224121094, 0.7020378112792969, -4.361202239990234, 7.396759033203125, 0.9597740173339844, 0.61505126953125, -0.051361083984375, -0.025934219360351562, -0.03650665283203125, 0.05719757080078125, 1.5032272338867188, 2.7483444213867188, 2.5011348724365234, 1.341156005859375, 2.79241943359375, -0.2306976318359375, -0.7236328125, 6.861812591552734, 0.5011558532714844, -1.00042724609375, 5.225433349609375, 2.12835693359375, 1.9943408966064453, 1.7881088256835938, 1.646148681640625, -2.1894760131835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000257.npy"}
{"epoch": 0.3885109599395314, "step": 258, "batch_size": 64, "mean": 1.1468994617462158, "std": 1.3903664350509644, "min": -2.23541259765625, "p10": -0.5590906143188474, "median": 1.0214691162109375, "p90": 2.905860900878908, "max": 4.894378662109375, "pos_frac": 0.84375, "sample": [1.64666748046875, 0.6431617736816406, -0.11257171630859375, -0.7356529235839844, 1.2636642456054688, 4.696170806884766, 3.4724502563476562, 1.377197265625, 3.3861236572265625, 0.1882915496826172, 0.7456264495849609, 0.6105422973632812, 0.2016143798828125, 1.9785499572753906, 0.1512451171875, 1.9207305908203125, 0.6789703369140625, -0.38930702209472656, 0.6033477783203125, -1.437246322631836, 0.7950706481933594, 0.7262115478515625, -0.94000244140625, 1.8165283203125, 1.210205078125, 1.0816726684570312, 1.5757598876953125, 1.3071956634521484, 0.8131675720214844, 0.1187896728515625, 1.43359375, 1.2702217102050781, 0.9612655639648438, 2.0726585388183594, 0.39374446868896484, 0.051239013671875, 1.3685321807861328, -0.6318550109863281, -1.109649658203125, 4.894378662109375, 1.5135574340820312, -0.6693191528320312, 2.4041061401367188, 1.7984466552734375, 3.1055145263671875, 1.7807502746582031, 3.595184326171875, 1.2044429779052734, -0.22380828857421875, 2.164827346801758, 0.5108108520507812, 1.4914169311523438, 0.23201751708984375, 0.5770034790039062, 0.5848007202148438, 0.733917236328125, 2.44000244140625, -2.23541259765625, 2.273387908935547, 0.48160743713378906, 2.3177928924560547, 2.2946701049804688, 4.409013748168945, 0.5185356140136719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000258.npy"}
{"epoch": 0.3900226757369615, "step": 259, "batch_size": 64, "mean": 1.8398234844207764, "std": 1.9336974620819092, "min": -1.631063461303711, "p10": -0.7948665618896484, "median": 2.006075859069824, "p90": 4.3827880859375, "max": 6.8597412109375, "pos_frac": 0.78125, "sample": [0.05812835693359375, 1.20050048828125, -0.8169708251953125, 2.6394615173339844, 1.530242919921875, 2.3631515502929688, 4.167182922363281, 4.3180389404296875, 3.2178955078125, -1.1635284423828125, 3.6031417846679688, -1.0604629516601562, 0.3688812255859375, 2.89288330078125, 2.706756591796875, 1.0792007446289062, -1.4862041473388672, -0.12660789489746094, 4.8544921875, 3.9888763427734375, 0.43315887451171875, 3.2983169555664062, 4.53386116027832, 2.786884307861328, 3.0119895935058594, -0.7432899475097656, 0.48163604736328125, -0.20557022094726562, 0.7864456176757812, 1.347635269165039, 2.1633834838867188, 0.8242950439453125, -1.631063461303711, 3.557098388671875, -0.4861297607421875, 1.6413116455078125, 3.7026596069335938, 1.938232421875, 3.9326629638671875, 2.9365692138671875, 3.617755889892578, 2.0739192962646484, 0.28882598876953125, 6.8597412109375, 1.6063079833984375, 1.7749786376953125, -0.9322433471679688, -0.96783447265625, -0.4661102294921875, 0.06124114990234375, 4.4105377197265625, 4.587772369384766, 2.2227020263671875, -0.088348388671875, 0.07767486572265625, 4.763984680175781, 1.4258270263671875, 2.4721298217773438, -0.4005279541015625, 2.8394012451171875, 2.0831146240234375, 4.5482025146484375, 2.80023193359375, 3.4442691802978516], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000259.npy"}
{"epoch": 0.3915343915343915, "step": 260, "batch_size": 64, "mean": 1.292044997215271, "std": 1.8331257104873657, "min": -2.1224327087402344, "p10": -0.9692523956298827, "median": 1.207188606262207, "p90": 3.5848859786987304, "max": 6.151771545410156, "pos_frac": 0.703125, "sample": [2.8679676055908203, -1.3042831420898438, 2.2317066192626953, -0.9426002502441406, -0.9293251037597656, 6.151771545410156, 0.9423751831054688, 2.2626495361328125, 3.1264724731445312, -0.7606658935546875, 3.248779296875, 0.9247665405273438, 2.7489471435546875, 3.0284500122070312, 1.6511917114257812, -0.9197196960449219, 3.9913101196289062, 0.6640529632568359, 2.6912155151367188, 4.755531311035156, 4.867118835449219, -0.12938690185546875, 1.8027572631835938, 3.8829727172851562, -0.8101654052734375, -0.3721160888671875, 3.581026077270508, 3.5865402221679688, -0.781707763671875, 1.77490234375, 2.402271270751953, 2.602508544921875, 2.1623687744140625, 0.6121482849121094, 1.5051021575927734, 0.6286506652832031, -0.29761505126953125, 0.5807723999023438, -2.1224327087402344, 1.9013633728027344, 0.18681716918945312, 2.028034210205078, 0.2958660125732422, -0.3145599365234375, 2.145050048828125, 1.6513042449951172, 3.1104774475097656, -0.9806747436523438, 3.5542640686035156, -1.190765380859375, -0.2530040740966797, 0.8700580596923828, 0.4022674560546875, 1.19342041015625, 1.220956802368164, 1.4893035888671875, -1.176513671875, 1.554351806640625, 4.41162109375, -1.371002197265625, 1.0940589904785156, -1.2113113403320312, -0.8446731567382812, 1.0178604125976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000260.npy"}
{"epoch": 0.3930461073318216, "step": 261, "batch_size": 64, "mean": 1.174607753753662, "std": 1.8421928882598877, "min": -3.4385986328125, "p10": -1.2132545471191405, "median": 1.0385570526123047, "p90": 3.5181564331054695, "max": 5.4355926513671875, "pos_frac": 0.734375, "sample": [-0.17953109741210938, 1.807464599609375, 1.759613037109375, -0.26143646240234375, -1.3564910888671875, 3.149394989013672, -0.1808757781982422, 2.9202518463134766, 2.8174190521240234, -0.03490638732910156, -2.5557785034179688, -0.43157958984375, -1.3110809326171875, 3.3538055419921875, 0.3959217071533203, 0.2411041259765625, 0.32854461669921875, 3.2547607421875, 0.31519126892089844, 4.525291442871094, 1.0959930419921875, 1.8184986114501953, 0.7206077575683594, -1.856781005859375, 3.588592529296875, 1.1015853881835938, 5.4355926513671875, 1.5749320983886719, -0.050872802734375, 0.9193439483642578, 1.6234607696533203, 1.6987972259521484, 4.211851119995117, 3.0464324951171875, 0.8601398468017578, -0.4620037078857422, 5.42218017578125, 0.50469970703125, -0.9849929809570312, 0.7252655029296875, 1.4028778076171875, 0.2366008758544922, 1.2470788955688477, 1.8785018920898438, 1.3618297576904297, -1.4901199340820312, 0.21137237548828125, -3.4385986328125, -0.7863883972167969, 0.9811210632324219, 2.012939453125, 0.46929168701171875, 3.69219970703125, 4.819097518920898, 0.8545913696289062, -1.3520126342773438, 1.5923576354980469, 1.9149703979492188, 0.7793655395507812, 3.0007781982421875, -0.024627685546875, 1.1143951416015625, 3.239288330078125, 1.9075794219970703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000261.npy"}
{"epoch": 0.3945578231292517, "step": 262, "batch_size": 64, "mean": 1.274604320526123, "std": 1.7239521741867065, "min": -1.8100128173828125, "p10": -0.5419954299926757, "median": 0.8512344360351562, "p90": 3.8990047454833987, "max": 5.870574951171875, "pos_frac": 0.8125, "sample": [-1.2753829956054688, 2.021392822265625, 0.8711090087890625, 0.8833484649658203, -1.8100128173828125, 1.3065071105957031, 1.1255340576171875, 0.007965087890625, 0.4692230224609375, 3.7491989135742188, -0.29449462890625, 3.0973281860351562, 0.37128448486328125, 0.03444099426269531, 4.348453521728516, -1.0376834869384766, 0.6699714660644531, 5.372474670410156, 4.067291259765625, 0.5791721343994141, 0.6459121704101562, 0.14957046508789062, 0.68963623046875, -0.4020843505859375, 2.1638565063476562, 4.912086486816406, 0.209259033203125, 2.9253921508789062, 1.05743408203125, 0.026067733764648438, 3.9363021850585938, -0.12651443481445312, 1.0442962646484375, 1.6071643829345703, -1.0067367553710938, 2.4631881713867188, 0.9886894226074219, 3.725238800048828, 0.514007568359375, 1.1544265747070312, -0.6019573211669922, 2.2403564453125, -0.22494125366210938, 2.3520126342773438, 5.870574951171875, 0.6817817687988281, 0.35457611083984375, 0.5608673095703125, -1.3807830810546875, 1.4160575866699219, 1.1803741455078125, -0.14513778686523438, 4.535888671875, 0.26745033264160156, 3.8119773864746094, 0.3928337097167969, 2.34893798828125, 2.8634109497070312, 0.83135986328125, 1.91162109375, 0.3317413330078125, 0.13753509521484375, 1.4684066772460938, -0.8645858764648438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000262.npy"}
{"epoch": 0.3960695389266818, "step": 263, "batch_size": 64, "mean": 1.031212329864502, "std": 1.779293417930603, "min": -2.6142711639404297, "p10": -1.3339553833007811, "median": 1.230093002319336, "p90": 2.8896100997924807, "max": 5.929609298706055, "pos_frac": 0.6875, "sample": [1.1365394592285156, 2.0822219848632812, -2.0605010986328125, -0.8589553833007812, -1.2806243896484375, -0.09132194519042969, -0.7415924072265625, 1.2152023315429688, 1.9311580657958984, 5.372453689575195, 2.170867919921875, -0.5476284027099609, -0.24735260009765625, 2.173351287841797, 0.5392227172851562, -1.6311721801757812, 2.3308982849121094, 1.311431884765625, 1.433624267578125, -1.3938140869140625, 0.9446334838867188, 1.2575149536132812, 2.4218826293945312, 1.2449836730957031, -0.012371063232421875, 0.34180259704589844, 3.6256637573242188, 1.6626434326171875, 2.709808349609375, 1.6410484313964844, 4.455934524536133, 1.6146011352539062, -1.2458419799804688, -0.00958251953125, -1.2697925567626953, -0.3441162109375, -1.7865142822265625, 3.839488983154297, 1.8760948181152344, 1.8478851318359375, 1.6948699951171875, -0.1877593994140625, 0.554962158203125, 5.929609298706055, -1.3568115234375, 0.7364959716796875, 2.3301849365234375, 1.9866390228271484, 2.0906219482421875, 0.5885696411132812, -0.6184234619140625, 4.0678558349609375, 0.13134765625, 0.5791358947753906, 2.50787353515625, 0.6316413879394531, 2.9088058471679688, 1.879251480102539, 1.3543586730957031, -1.7365837097167969, 0.4398345947265625, -2.6142711639404297, 2.844820022583008, 1.5947895050048828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000263.npy"}
{"epoch": 0.3975812547241119, "step": 264, "batch_size": 64, "mean": 1.3868227005004883, "std": 2.2119553089141846, "min": -3.8345489501953125, "p10": -1.1861618041992188, "median": 1.3943958282470703, "p90": 4.273367309570313, "max": 6.165071487426758, "pos_frac": 0.71875, "sample": [4.280059814453125, 0.2093048095703125, 0.19194793701171875, 0.5615329742431641, 0.5591506958007812, -0.8116703033447266, 1.497283935546875, 0.03130340576171875, 2.32427978515625, 3.2320556640625, -0.4887714385986328, 1.2915077209472656, -3.01263427734375, 2.8200931549072266, -0.31536102294921875, 3.9326629638671875, 3.3820571899414062, 0.3844757080078125, -1.1627273559570312, 3.4208908081054688, 1.618408203125, 1.6179065704345703, 2.612762451171875, 4.5749359130859375, 1.7751922607421875, 6.137462615966797, -0.23879051208496094, 2.428924560546875, -0.47393798828125, 0.6040077209472656, 1.9322280883789062, -0.169525146484375, 4.152320861816406, 1.0318336486816406, -0.06707382202148438, 0.36090850830078125, 4.575603485107422, -1.2726364135742188, 1.0024032592773438, 2.5485076904296875, -1.1962051391601562, -2.576305389404297, 1.5071029663085938, 2.0333099365234375, 4.822959899902344, 3.1517486572265625, -1.62127685546875, 2.78118896484375, -3.8345489501953125, 1.0523757934570312, -1.2995147705078125, 2.6583709716796875, 6.165071487426758, 1.7216758728027344, -1.1052970886230469, -0.7656841278076172, 0.403533935546875, 4.25775146484375, 3.7612762451171875, 0.002422332763671875, -1.13067626953125, 5.864288330078125, 1.9732208251953125, 3.0509796142578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000264.npy"}
{"epoch": 0.39909297052154197, "step": 265, "batch_size": 64, "mean": 1.4208089113235474, "std": 1.9875743389129639, "min": -4.2475738525390625, "p10": -0.6097343444824218, "median": 1.2175636291503906, "p90": 3.8416736602783206, "max": 5.1387786865234375, "pos_frac": 0.765625, "sample": [1.3801193237304688, 0.9761199951171875, -0.4858055114746094, -2.770843505859375, 1.3495502471923828, 1.1131362915039062, -0.05081748962402344, 3.4834518432617188, -1.6172943115234375, 4.0768280029296875, 0.25086402893066406, 0.8887596130371094, 1.8981208801269531, 2.484344482421875, -4.2475738525390625, 1.0409622192382812, 0.8369731903076172, 2.130247116088867, 3.115764617919922, -2.0276756286621094, 0.7388381958007812, 3.5037841796875, 2.4271392822265625, 1.0865955352783203, 0.13378524780273438, 0.48394012451171875, 5.11436653137207, 1.2344589233398438, 2.0173568725585938, -0.5310802459716797, 3.8574676513671875, 3.562408447265625, -0.09384918212890625, 4.266731262207031, 1.7383804321289062, 5.1387786865234375, 1.3180980682373047, 0.5246772766113281, -0.018911361694335938, -0.5436553955078125, 0.7039756774902344, 1.1489925384521484, 1.2771434783935547, 2.8727798461914062, 4.89813232421875, 5.120574951171875, 3.659564971923828, 1.2801437377929688, 0.6384658813476562, -1.8087024688720703, 2.9662628173828125, 0.8415985107421875, -0.6380538940429688, 3.4353981018066406, 3.5081024169921875, 3.804821014404297, 3.332916259765625, -2.2572402954101562, 0.8880558013916016, -0.03437042236328125, 2.3646469116210938, -0.30754852294921875, 2.2509002685546875, 1.2006683349609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000265.npy"}
{"epoch": 0.40060468631897206, "step": 266, "batch_size": 64, "mean": 1.299785852432251, "std": 2.1949498653411865, "min": -2.82177734375, "p10": -1.0908180236816405, "median": 1.2037477493286133, "p90": 3.818754577636719, "max": 9.148567199707031, "pos_frac": 0.71875, "sample": [0.2546234130859375, 3.7783050537109375, 5.93707275390625, 0.9253978729248047, 1.9189987182617188, -0.1685943603515625, -1.678497314453125, -1.1731491088867188, 1.2187671661376953, -0.8003616333007812, 2.52520751953125, 1.5731201171875, 0.18778610229492188, 1.5312824249267578, 1.302572250366211, 1.5690765380859375, -0.2169342041015625, 9.148567199707031, -0.34996795654296875, 0.78973388671875, 5.686370849609375, 2.2073440551757812, 0.0171661376953125, 4.428106307983398, 1.8207111358642578, 2.5770111083984375, -1.7697906494140625, 3.836090087890625, 1.5731945037841797, 0.298431396484375, 2.6081905364990234, 0.1212158203125, 2.7767486572265625, -0.5310134887695312, 1.3156185150146484, -0.898712158203125, 1.7146453857421875, 2.90863037109375, 1.0542984008789062, -0.014553070068359375, -0.12616729736328125, -0.6304779052734375, 0.886474609375, 2.2044410705566406, -2.2894668579101562, 0.8348464965820312, 0.8066787719726562, 1.3473052978515625, 1.8007354736328125, 1.1887283325195312, -2.82177734375, 0.3060894012451172, -2.2385406494140625, -2.3563232421875, 2.3151588439941406, 1.0564308166503906, 7.02191162109375, -0.15959930419921875, 4.18988037109375, 1.902435302734375, 3.5942249298095703, 1.942556381225586, 2.7453956604003906, -0.3373565673828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000266.npy"}
{"epoch": 0.4021164021164021, "step": 267, "batch_size": 64, "mean": 1.0033204555511475, "std": 1.9762498140335083, "min": -5.462589263916016, "p10": -1.1329080581665039, "median": 1.187638282775879, "p90": 3.215518379211427, "max": 6.203582763671875, "pos_frac": 0.65625, "sample": [0.6054916381835938, 1.1354694366455078, -0.044647216796875, -2.886148452758789, 2.1140594482421875, -0.8901710510253906, 2.348377227783203, 2.457611083984375, 2.2582855224609375, 0.18265533447265625, -0.3976917266845703, 1.9592437744140625, 0.42376136779785156, 6.203582763671875, -0.0450592041015625, -2.379863739013672, -0.06754302978515625, -0.24207019805908203, 1.316986083984375, -2.25433349609375, -1.1409664154052734, -2.0931243896484375, 0.49721527099609375, 1.3342018127441406, 4.747711181640625, 3.3401851654052734, -0.9889602661132812, -0.023525238037109375, 3.4337158203125, -1.7192726135253906, 0.9976425170898438, 1.092935562133789, 3.881620407104492, 1.820852279663086, -0.15894317626953125, 1.8036537170410156, 2.2573089599609375, 1.657073974609375, 1.0469131469726562, 5.7266998291015625, 1.7351455688476562, -0.18449783325195312, 3.36676025390625, 0.11730384826660156, 2.1213531494140625, 1.6757183074951172, 2.9246292114257812, 1.7125701904296875, -5.462589263916016, 2.357818603515625, 2.5019607543945312, 1.23980712890625, -0.5422859191894531, 0.3050422668457031, -1.114105224609375, -0.09246063232421875, 1.8732624053955078, 2.3619556427001953, 1.63653564453125, -0.1343841552734375, 1.7133140563964844, 2.7705116271972656, 2.0262222290039062, -0.008012771606445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000267.npy"}
{"epoch": 0.4036281179138322, "step": 268, "batch_size": 64, "mean": 1.2268199920654297, "std": 1.848544955253601, "min": -2.8935165405273438, "p10": -0.7733455657958984, "median": 0.860595703125, "p90": 3.6207557678222657, "max": 6.7309417724609375, "pos_frac": 0.75, "sample": [1.2986907958984375, 5.467872619628906, 0.09285926818847656, -0.570526123046875, 0.813720703125, 2.851776123046875, 1.9192733764648438, 0.8194122314453125, -0.8212184906005859, 4.906551361083984, 1.5283355712890625, 0.1759796142578125, -0.957489013671875, 0.977813720703125, 0.41916656494140625, 0.8140830993652344, 1.2944984436035156, -0.536468505859375, 0.17123794555664062, 0.42517852783203125, 2.2207984924316406, 2.6460723876953125, -0.16411399841308594, 0.482269287109375, -2.8935165405273438, -0.449615478515625, 1.82666015625, 0.43701171875, 1.5392532348632812, 1.0144195556640625, 3.0894317626953125, 0.7803955078125, 0.4942131042480469, 2.2146034240722656, -0.2755889892578125, 4.023773193359375, 0.433868408203125, -1.0638313293457031, -1.7030029296875, 1.235565185546875, 1.2272262573242188, 0.453887939453125, -0.747344970703125, 6.7309417724609375, 2.621826171875, 1.686981201171875, 3.2887039184570312, -0.2843284606933594, 0.9017791748046875, -0.6090660095214844, -1.0381317138671875, -0.7844886779785156, 3.0800933837890625, 0.09642982482910156, 5.79461669921875, 1.9598941802978516, 2.7440338134765625, 3.570709228515625, 4.1707305908203125, 1.0259475708007812, 0.5044345855712891, 3.6422042846679688, 1.6404495239257812, -0.14046287536621094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000268.npy"}
{"epoch": 0.4051398337112623, "step": 269, "batch_size": 64, "mean": 1.4018274545669556, "std": 1.807553768157959, "min": -2.1738662719726562, "p10": -1.014002227783203, "median": 1.0919342041015625, "p90": 3.8505287170410156, "max": 5.0216064453125, "pos_frac": 0.765625, "sample": [0.97247314453125, -1.5571670532226562, -0.1405181884765625, 2.5798873901367188, 1.11724853515625, -0.7076416015625, -0.7521209716796875, 4.209043502807617, 2.3886032104492188, 0.5971603393554688, -2.1738662719726562, 2.7974700927734375, 1.3370513916015625, 1.9637985229492188, 3.938629150390625, 3.8239059448242188, 0.7590122222900391, 2.2503814697265625, 2.8468704223632812, -1.4229202270507812, 0.9241180419921875, -1.71112060546875, 3.687040328979492, 0.9287109375, -1.08831787109375, 4.839988708496094, 0.14570999145507812, 3.8619384765625, 0.0294036865234375, 3.1380386352539062, 1.7667160034179688, 1.0146217346191406, -0.29381370544433594, 2.4845542907714844, 1.066619873046875, 0.7003650665283203, 0.6236038208007812, 0.8589019775390625, -0.8405990600585938, 2.4025840759277344, 4.185396194458008, 3.0629940032958984, 0.73724365234375, 2.3723373413085938, 3.5212860107421875, -0.5034713745117188, 1.6750717163085938, 0.6267929077148438, -1.2233657836914062, 0.241119384765625, 5.0216064453125, -0.10471153259277344, 3.6442108154296875, -0.06402587890625, 1.6422653198242188, -2.130035400390625, 4.536468505859375, 3.4020004272460938, 0.5473480224609375, 2.0992889404296875, 1.7430953979492188, 1.0241622924804688, 2.8426132202148438, 1.4509010314941406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000269.npy"}
{"epoch": 0.40665154950869237, "step": 270, "batch_size": 64, "mean": 1.0275520086288452, "std": 1.7323219776153564, "min": -4.43701171875, "p10": -0.7240415573120117, "median": 1.1022491455078125, "p90": 3.0242263793945323, "max": 6.391334533691406, "pos_frac": 0.765625, "sample": [3.1224803924560547, 1.9026508331298828, -0.7245502471923828, 4.724891662597656, 0.6948890686035156, 3.1152801513671875, 1.5887203216552734, -0.6489906311035156, 3.56707763671875, -0.400604248046875, 6.391334533691406, 1.4879226684570312, 0.8675613403320312, 1.1506423950195312, 0.51788330078125, -1.9075775146484375, 1.0618743896484375, 1.2172698974609375, 0.5209178924560547, 2.409191131591797, -0.3300628662109375, 0.9006805419921875, 0.5672874450683594, -0.19673538208007812, 0.9583816528320312, 1.8271636962890625, 1.7357959747314453, -4.43701171875, 2.588115692138672, 0.8833446502685547, 1.50042724609375, 1.290435791015625, 0.7024765014648438, 3.34808349609375, -0.18509674072265625, 2.1668624877929688, 1.136138916015625, 0.11972808837890625, 1.3916854858398438, -3.179119110107422, 2.811767578125, 3.9458694458007812, 2.6859512329101562, -1.5489063262939453, 1.6388378143310547, 1.2978057861328125, -0.4530181884765625, 0.8400421142578125, 1.5080642700195312, 0.9829463958740234, 2.4958324432373047, 0.1740131378173828, -0.9910430908203125, 1.7376785278320312, 0.6743927001953125, -0.7228546142578125, -0.6350173950195312, -2.0356674194335938, 1.63165283203125, 1.2115650177001953, 1.068359375, 0.1058197021484375, 1.9489212036132812, 1.94287109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000270.npy"}
{"epoch": 0.40816326530612246, "step": 271, "batch_size": 64, "mean": 0.8730854988098145, "std": 2.2127342224121094, "min": -3.5730743408203125, "p10": -1.6413597106933593, "median": 0.7840394973754883, "p90": 3.48338680267334, "max": 7.895660400390625, "pos_frac": 0.625, "sample": [2.18896484375, 1.9659957885742188, -2.5855789184570312, 1.536651611328125, 3.876129150390625, 1.8824195861816406, 0.8485908508300781, -0.24845504760742188, 3.0095481872558594, -1.4924163818359375, 3.6002349853515625, -2.5307445526123047, 3.49957275390625, -0.13220596313476562, 2.361034393310547, 4.63873291015625, 0.5493869781494141, 0.906982421875, 0.04703521728515625, 6.410209655761719, 1.192779541015625, 1.3982315063476562, 2.2455978393554688, -1.082010269165039, -0.7441558837890625, -0.6135025024414062, -0.7579231262207031, 2.1238250732421875, 0.9425811767578125, 0.7194881439208984, 0.16382408142089844, 2.195606231689453, -0.559967041015625, -1.2972297668457031, 0.31414794921875, 3.0189743041992188, 1.6705474853515625, -0.5746307373046875, 1.3467636108398438, 3.445619583129883, 0.5883636474609375, -2.4009857177734375, 7.895660400390625, -1.4482879638671875, 1.270294189453125, -0.7445831298828125, -0.44858551025390625, -3.093975067138672, -3.5730743408203125, -1.7051925659179688, 0.24100112915039062, -0.14948272705078125, 1.4528350830078125, 2.6670665740966797, 2.5877132415771484, 2.2142333984375, -2.335540771484375, 2.2754287719726562, -0.3435516357421875, 0.6077079772949219, -0.6603527069091797, -1.4499053955078125, 5.222618103027344, 1.7274093627929688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000271.npy"}
{"epoch": 0.40967498110355255, "step": 272, "batch_size": 64, "mean": 1.613443374633789, "std": 1.8451019525527954, "min": -0.9204330444335938, "p10": -0.4045207977294921, "median": 1.3522911071777344, "p90": 4.101523208618165, "max": 6.9940948486328125, "pos_frac": 0.78125, "sample": [0.4033851623535156, -0.7487106323242188, -0.8754043579101562, 1.7600650787353516, 0.5125503540039062, 4.131381988525391, 1.010650634765625, 3.533050537109375, 4.031852722167969, 1.1174240112304688, 1.9352798461914062, 1.9036216735839844, -0.1841716766357422, -0.5637435913085938, 2.4947280883789062, 0.7669219970703125, 2.57635498046875, -0.8667068481445312, 3.397369384765625, 5.0197296142578125, 1.8274574279785156, 0.8800144195556641, 1.7880687713623047, 1.8501014709472656, 5.181634902954102, 0.820526123046875, 1.7743568420410156, -0.6957931518554688, 2.0567169189453125, 4.732673645019531, 0.32413482666015625, 6.519382476806641, 1.859130859375, 0.22995758056640625, 2.749969482421875, -0.3409461975097656, -0.22507095336914062, 0.6208095550537109, 2.9036865234375, -0.3441963195800781, 2.4982376098632812, 5.654022216796875, 2.8916397094726562, 0.6184349060058594, 0.0743408203125, -0.25104713439941406, 6.9940948486328125, -0.4303741455078125, 2.2486610412597656, 2.7131271362304688, 0.6171836853027344, 3.233673095703125, -0.9204330444335938, 1.4896392822265625, 1.2149429321289062, 1.18505859375, 2.3043441772460938, 2.3767929077148438, -0.16207504272460938, -0.25099754333496094, 0.31365966796875, 0.40273284912109375, 2.1953163146972656, 0.38115692138671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000272.npy"}
{"epoch": 0.41118669690098264, "step": 273, "batch_size": 64, "mean": 1.4361896514892578, "std": 2.0160410404205322, "min": -3.4556617736816406, "p10": -0.8875942230224608, "median": 1.2713470458984375, "p90": 3.4199960708618167, "max": 8.00396728515625, "pos_frac": 0.8125, "sample": [-0.9709701538085938, 1.5058517456054688, 1.93023681640625, -1.8878936767578125, 3.8101806640625, 1.279052734375, 0.81744384765625, 1.65374755859375, -1.7293472290039062, 0.9255218505859375, -0.8107833862304688, -0.5198822021484375, 1.6347198486328125, 0.27864646911621094, 2.577178955078125, 2.0664443969726562, 2.7613983154296875, 0.6092567443847656, 3.3899593353271484, 0.9139175415039062, 2.2745895385742188, 1.2807273864746094, -0.22222900390625, -0.18884658813476562, -1.7010955810546875, 2.8368453979492188, 2.9023208618164062, 1.836639404296875, -0.0503997802734375, 0.5287971496582031, 1.02569580078125, 8.00396728515625, 1.3346939086914062, -3.4556617736816406, 0.41739654541015625, 1.5913848876953125, 2.603046417236328, 2.8885498046875, 0.690399169921875, 1.017242431640625, -1.0316848754882812, 0.8370361328125, 2.42498779296875, 0.3103179931640625, 0.4016609191894531, 6.793975830078125, 6.478143692016602, 2.510211944580078, 2.434539794921875, 2.5393218994140625, 0.6126651763916016, 0.054080963134765625, 0.009988784790039062, 0.286529541015625, 1.263641357421875, 3.4328689575195312, 1.6456947326660156, 2.3432693481445312, 0.6160888671875, 0.8325653076171875, 4.2568359375, 5.831686019897461, -0.9205131530761719, 2.1034774780273438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000273.npy"}
{"epoch": 0.4126984126984127, "step": 274, "batch_size": 64, "mean": 1.3664849996566772, "std": 1.9967597723007202, "min": -3.6913604736328125, "p10": -0.8516019821166992, "median": 1.3093948364257812, "p90": 3.576737976074219, "max": 6.5279541015625, "pos_frac": 0.71875, "sample": [2.532001495361328, -0.897308349609375, -0.31803321838378906, 1.76953125, -0.100433349609375, -3.6913604736328125, 3.5484542846679688, 3.1627197265625, 0.5279045104980469, 0.6276702880859375, 2.9357032775878906, 1.649261474609375, 0.5866165161132812, -0.780426025390625, 1.2394084930419922, 4.7781524658203125, 5.08210563659668, -2.2892227172851562, 1.239614486694336, 1.4544296264648438, 3.259521484375, 2.5551509857177734, 3.2010154724121094, 2.1740589141845703, 1.2984771728515625, -0.14060211181640625, 4.25933837890625, 6.5279541015625, 0.7825126647949219, -1.5517120361328125, -0.12923240661621094, 0.21514511108398438, 2.2655105590820312, 0.28202056884765625, -0.8622455596923828, 3.5888595581054688, -1.518463134765625, -0.8267669677734375, 1.6384048461914062, 1.3203125, -0.2802715301513672, 2.2755279541015625, 0.5084724426269531, 2.071807861328125, 0.25604820251464844, 2.1336746215820312, 0.7506980895996094, -3.070709228515625, 2.2797088623046875, -0.016483306884765625, 4.669677734375, 2.7969894409179688, 1.5455074310302734, -0.22344970703125, 3.2278900146484375, -0.24805641174316406, 1.0105705261230469, 0.7901592254638672, 5.582695007324219, 3.1126766204833984, -0.11550521850585938, 1.521636962890625, 2.8277053833007812, 2.652017593383789], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000274.npy"}
{"epoch": 0.41421012849584277, "step": 275, "batch_size": 64, "mean": 1.5252916812896729, "std": 2.085993528366089, "min": -3.0422325134277344, "p10": -0.492416763305664, "median": 1.086029052734375, "p90": 4.258318901062013, "max": 6.766651153564453, "pos_frac": 0.765625, "sample": [2.307220458984375, 1.1794929504394531, 0.2638206481933594, 3.6912918090820312, 4.392635345458984, 0.8848361968994141, 0.502716064453125, -3.0422325134277344, 6.3017120361328125, 2.92291259765625, -0.23134613037109375, 1.27410888671875, 0.62823486328125, 6.766651153564453, -0.17833900451660156, 2.803956985473633, 1.7416915893554688, 1.7602405548095703, 6.033769607543945, -0.994049072265625, -2.4198455810546875, 5.627101898193359, 1.5053253173828125, 1.0620880126953125, 2.6266403198242188, 0.36896514892578125, -0.2670021057128906, 0.4017181396484375, -1.1598358154296875, 0.504364013671875, 1.1099700927734375, 5.804603576660156, -0.12703704833984375, 3.0114974975585938, 3.8148841857910156, 3.5249862670898438, 0.2999706268310547, 1.0534915924072266, -0.02745819091796875, 0.005157470703125, 2.925048828125, 0.6075363159179688, -0.5193252563476562, -0.24883651733398438, 1.1642475128173828, 2.7974090576171875, 3.0091018676757812, 1.8294219970703125, -1.5223865509033203, 0.9618873596191406, 0.9118576049804688, -0.007884979248046875, -0.4296302795410156, 1.7212142944335938, 3.944913864135742, 4.676544189453125, 0.06454849243164062, 1.510009765625, 0.3781108856201172, 3.3653030395507812, 3.5832862854003906, -1.0926666259765625, 0.11783027648925781, 2.1422119140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000275.npy"}
{"epoch": 0.41572184429327286, "step": 276, "batch_size": 64, "mean": 1.5846753120422363, "std": 1.8144917488098145, "min": -2.8390045166015625, "p10": -0.4171321868896483, "median": 1.56597900390625, "p90": 3.833950042724611, "max": 6.894536972045898, "pos_frac": 0.765625, "sample": [2.2658557891845703, -0.19430160522460938, -0.7506771087646484, 3.114990234375, 1.3915061950683594, 1.8173751831054688, 5.979841232299805, -1.0443115234375, 0.9961566925048828, 1.2729911804199219, 2.4070968627929688, -0.0713958740234375, 4.134136199951172, 2.2544403076171875, 5.627593994140625, 2.1897850036621094, 0.9571723937988281, -0.2802772521972656, 0.8853912353515625, 1.0854549407958984, -0.16322708129882812, 2.4657745361328125, 2.986888885498047, 1.937692642211914, 0.0007781982421875, -0.21191787719726562, 2.32733154296875, -0.4757843017578125, 2.7552452087402344, 3.276388168334961, 1.7342567443847656, 1.7306747436523438, 0.7365951538085938, -1.2896709442138672, 4.146087646484375, -0.71044921875, 6.894536972045898, -0.22036361694335938, 0.32781982421875, 2.4194259643554688, -2.8390045166015625, 0.90374755859375, 3.2321929931640625, 0.6152706146240234, 0.95538330078125, -1.711334228515625, 3.3750076293945312, 2.619964599609375, 2.9820404052734375, 1.6921615600585938, -0.233184814453125, 1.720001220703125, 2.9933929443359375, 0.7268486022949219, 2.6440277099609375, 4.0306396484375, 2.248760223388672, 0.8805274963378906, 1.2712326049804688, 4.178749084472656, 0.9404239654541016, 1.4397964477539062, 2.1176223754882812, -0.07195281982421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000276.npy"}
{"epoch": 0.41723356009070295, "step": 277, "batch_size": 64, "mean": 1.2154929637908936, "std": 2.2260913848876953, "min": -3.2322311401367188, "p10": -1.0814422607421874, "median": 1.0338897705078125, "p90": 3.992561531066897, "max": 8.55181884765625, "pos_frac": 0.765625, "sample": [1.3318099975585938, -1.649627685546875, 2.144969940185547, 0.3375511169433594, 1.5405082702636719, 0.3255577087402344, 1.089935302734375, 0.18976593017578125, 2.497915267944336, 3.024852752685547, -1.1009254455566406, -1.0473861694335938, 1.024566650390625, 0.1405487060546875, -0.0411834716796875, 1.1829776763916016, 0.8477554321289062, -0.71783447265625, 0.5341567993164062, 0.35799407958984375, 1.3241424560546875, 1.64569091796875, 1.0622024536132812, 1.078155517578125, 0.31903076171875, 6.091552734375, 0.3858795166015625, 0.3728485107421875, -1.0573883056640625, 2.4478416442871094, -0.28754615783691406, 1.48638916015625, -2.8554344177246094, 3.4030380249023438, 0.9325008392333984, 2.6505279541015625, -1.0154800415039062, -1.1447219848632812, 1.0950851440429688, 1.0912551879882812, 1.9799041748046875, -0.42174530029296875, 8.55181884765625, 5.370307922363281, 2.4429397583007812, 1.2352333068847656, 2.4664688110351562, 0.4930419921875, -3.2322311401367188, 5.578094482421875, 1.2003402709960938, -0.17897796630859375, 5.506561279296875, 2.006317138671875, -1.0917510986328125, 0.14756202697753906, 4.245214462280273, -2.438152313232422, 1.043212890625, 7.687129974365234, 0.8401222229003906, 0.4587249755859375, 0.14295005798339844, 2.718984603881836], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000277.npy"}
{"epoch": 0.41874527588813304, "step": 278, "batch_size": 64, "mean": 1.5503500699996948, "std": 1.9997109174728394, "min": -4.046714782714844, "p10": -0.682019805908203, "median": 1.4989166259765625, "p90": 4.361032104492187, "max": 5.824190139770508, "pos_frac": 0.796875, "sample": [-0.8414745330810547, 1.5883026123046875, 3.3142166137695312, 1.5778732299804688, 1.5916156768798828, -0.46002960205078125, -2.7622604370117188, -0.202789306640625, 4.428375244140625, 5.078468322753906, 1.4382247924804688, -0.5962448120117188, 3.8565444946289062, 5.824190139770508, 4.361320495605469, -0.33529090881347656, 0.79876708984375, 0.5376071929931641, 0.49080657958984375, 3.248676300048828, 0.6849498748779297, 2.088104248046875, 1.7928695678710938, 0.4862823486328125, -4.046714782714844, 4.950897216796875, 0.5659637451171875, 3.042795181274414, 4.325958251953125, 1.5668258666992188, 1.2933120727539062, 1.1531906127929688, -0.718780517578125, -0.4821434020996094, 4.333648681640625, 1.0317153930664062, 1.5305557250976562, 2.151092529296875, 1.9026679992675781, 2.2571487426757812, 5.573966979980469, 4.360359191894531, -1.2191810607910156, 1.4672775268554688, 0.9317398071289062, 3.4325942993164062, 1.8793411254882812, 0.8113880157470703, -1.8010711669921875, 0.5065536499023438, 2.4064064025878906, 2.1986541748046875, -1.272979736328125, -0.4699363708496094, 0.5458755493164062, 3.281219482421875, 4.3894195556640625, 2.972564697265625, 1.0064888000488281, 0.6234207153320312, 0.4034156799316406, 0.22587203979492188, 2.341228485107422, 1.7805519104003906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000278.npy"}
{"epoch": 0.42025699168556313, "step": 279, "batch_size": 64, "mean": 1.7702279090881348, "std": 1.7881921529769897, "min": -3.3963680267333984, "p10": -0.26242256164550776, "median": 1.6339635848999023, "p90": 3.9136680603027347, "max": 6.696163177490234, "pos_frac": 0.875, "sample": [0.05702400207519531, 2.0218772888183594, 6.696163177490234, 1.1466407775878906, -3.3963680267333984, 3.1287384033203125, 0.2784614562988281, 0.9759521484375, 0.2294921875, 1.5031394958496094, 2.4046401977539062, 3.4074249267578125, 0.6956558227539062, 3.1805992126464844, 2.757617950439453, 5.9716796875, 1.3841400146484375, 3.781787872314453, -0.7326164245605469, -0.22139358520507812, -0.3942680358886719, 1.86688232421875, 0.6527481079101562, 0.6604537963867188, 1.5500106811523438, 1.500579833984375, 1.7371997833251953, 0.3836402893066406, 0.016815185546875, 4.05181884765625, 1.0391693115234375, 2.536235809326172, 2.8249664306640625, 0.6760711669921875, -0.430267333984375, 2.892169952392578, 3.5180206298828125, 2.708953857421875, 0.2704887390136719, 5.2897796630859375, 3.9701881408691406, 3.1367244720458984, 4.254787445068359, 1.00946044921875, -1.3523025512695312, 1.7964286804199219, 0.7000579833984375, 1.6552009582519531, 3.462982177734375, 3.1623077392578125, 1.7822704315185547, -0.28000640869140625, 1.6127262115478516, 3.0029869079589844, 0.6030445098876953, 0.7774810791015625, 0.949920654296875, 1.844390869140625, 1.9791984558105469, 1.7510261535644531, 2.4306678771972656, -0.6680908203125, 5.494434356689453, 1.5965728759765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000279.npy"}
{"epoch": 0.4217687074829932, "step": 280, "batch_size": 64, "mean": 1.806646704673767, "std": 1.7506554126739502, "min": -2.1275253295898438, "p10": -0.1439743041992187, "median": 1.5605087280273438, "p90": 4.519846725463867, "max": 6.820709228515625, "pos_frac": 0.859375, "sample": [2.6838111877441406, 6.820709228515625, 1.831064224243164, 1.8282852172851562, 1.4934425354003906, 5.150232315063477, 0.21512794494628906, -0.497283935546875, 0.4423656463623047, 2.5132369995117188, 2.0870513916015625, 2.305553436279297, 0.7658843994140625, 2.1788787841796875, 3.31658935546875, 3.3857421875, 1.0317306518554688, 1.1215744018554688, 2.7787017822265625, 0.39267730712890625, 1.007232666015625, 1.5087032318115234, -0.3603973388671875, 1.1739788055419922, 3.842376708984375, 0.08203125, 2.2224903106689453, 1.6033248901367188, 4.576019287109375, 4.728271484375, 1.1658763885498047, -1.166351318359375, 1.7828826904296875, -0.0074310302734375, 1.0265312194824219, 2.702972412109375, 6.39349365234375, -0.3605194091796875, 4.473781585693359, 2.114898681640625, 0.0279083251953125, -2.1275253295898438, -0.16699981689453125, 0.384765625, 0.5345268249511719, 3.105510711669922, 1.249643325805664, -0.32822418212890625, -0.09024810791015625, 1.9063472747802734, 4.830165863037109, 0.9383029937744141, 1.4377861022949219, 3.5095348358154297, 2.3266353607177734, 1.3432769775390625, 1.7480125427246094, 0.9760818481445312, 0.28782081604003906, 2.0715866088867188, 1.5176925659179688, 2.1520919799804688, 4.539588928222656, 3.0955657958984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000280.npy"}
{"epoch": 0.42328042328042326, "step": 281, "batch_size": 64, "mean": 1.6353683471679688, "std": 2.139556884765625, "min": -2.7988452911376953, "p10": -1.5945384979248045, "median": 1.6437921524047852, "p90": 4.244819641113282, "max": 6.5676422119140625, "pos_frac": 0.78125, "sample": [-1.7275314331054688, 3.592611312866211, 0.623779296875, 0.03204345703125, 5.644512176513672, 3.841796875, 1.116598129272461, 2.7961578369140625, 1.4525604248046875, 6.5676422119140625, 0.898895263671875, 2.7250499725341797, 1.7941265106201172, 1.5212249755859375, 0.502593994140625, 1.3330116271972656, 1.7226734161376953, 5.203086853027344, -0.7409095764160156, 2.2339324951171875, 3.2628173828125, 1.6168575286865234, 4.3198699951171875, 3.8776168823242188, 2.0725555419921875, 1.1882095336914062, 1.8760433197021484, 0.3941078186035156, 0.8620529174804688, -2.7988452911376953, 2.7562294006347656, -0.23447608947753906, -1.39752197265625, 1.652984619140625, 3.6780166625976562, -2.096170425415039, 4.0697021484375, 5.2720184326171875, -0.11627197265625, 0.0006961822509765625, 5.099891662597656, 2.0784759521484375, 5.422906875610352, 2.772064208984375, 1.9821414947509766, 3.2765884399414062, 0.7068977355957031, 0.6734085083007812, 2.3635501861572266, 1.6345996856689453, -0.2913970947265625, 0.1325531005859375, 3.5987930297851562, -2.1545982360839844, 2.152252197265625, 3.8534698486328125, 3.173625946044922, -2.54473876953125, 1.347869873046875, 1.8318557739257812, -0.3556709289550781, -1.6886920928955078, -0.11364555358886719, -1.6789741516113281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000281.npy"}
{"epoch": 0.42479213907785335, "step": 282, "batch_size": 64, "mean": 1.5590956211090088, "std": 2.1133594512939453, "min": -2.366230010986328, "p10": -1.1045642852783202, "median": 1.4331865310668945, "p90": 4.391258430480958, "max": 7.690273284912109, "pos_frac": 0.765625, "sample": [1.4515228271484375, -1.629547119140625, -0.8357944488525391, 0.43972015380859375, 0.06374359130859375, 2.262714385986328, 1.7128791809082031, 0.843475341796875, -0.3557262420654297, 5.0778656005859375, 4.062736511230469, 4.818706512451172, 0.9672489166259766, 0.490081787109375, 4.1005401611328125, 2.268341064453125, 4.515851974487305, 0.20129013061523438, -0.033538818359375, 1.8985614776611328, -0.089019775390625, -0.6696319580078125, 2.5293636322021484, 3.481048583984375, 0.5171642303466797, 0.6964263916015625, 0.78009033203125, 2.5864830017089844, 7.690273284912109, 2.2424888610839844, -0.9349250793457031, -1.4680976867675781, -2.366230010986328, 3.2111072540283203, 0.3276844024658203, -1.4048995971679688, 3.261425018310547, 0.43585205078125, -1.3543167114257812, 1.4671173095703125, 1.03875732421875, 3.600017547607422, 1.6693115234375, 2.1470184326171875, 4.8292694091796875, 6.4380645751953125, 1.7523155212402344, 0.8514518737792969, 1.013458251953125, 2.372650146484375, -0.9954376220703125, 0.30035400390625, -0.3256378173828125, 0.19649505615234375, -1.3142166137695312, 5.759355545043945, 3.606475830078125, 1.4148502349853516, -1.1513328552246094, 2.7689208984375, 2.432863235473633, 2.714141845703125, 3.8167762756347656, 1.5861167907714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000282.npy"}
{"epoch": 0.42630385487528344, "step": 283, "batch_size": 64, "mean": 1.3614988327026367, "std": 2.2736306190490723, "min": -3.9004669189453125, "p10": -0.8225013732910156, "median": 0.7803573608398438, "p90": 4.578487014770509, "max": 7.445060729980469, "pos_frac": 0.734375, "sample": [3.615142822265625, -2.178438186645508, -1.967926025390625, -0.24442291259765625, 1.4865798950195312, 0.8173141479492188, 0.9030475616455078, 0.22507476806640625, 6.3202972412109375, -0.8626174926757812, -0.6411895751953125, -0.0066680908203125, 0.8968048095703125, 0.147918701171875, 1.824310302734375, 1.7338981628417969, -0.0097503662109375, 3.1818084716796875, -3.9004669189453125, -0.021759033203125, 4.699481964111328, 3.1428451538085938, 4.296165466308594, 0.35801124572753906, 2.7295684814453125, 4.797817230224609, 7.445060729980469, 1.9878730773925781, 5.637687683105469, 0.19297409057617188, 1.7908554077148438, 3.864837646484375, -0.0648193359375, 0.9211807250976562, -0.44516754150390625, 3.6953277587890625, 0.041748046875, 0.082733154296875, 2.7095184326171875, 3.19482421875, 5.105131149291992, -1.7411727905273438, 3.8721580505371094, 5.538352966308594, 0.23845672607421875, 0.06866455078125, -0.44437408447265625, -2.1590576171875, 0.26523399353027344, 0.118408203125, 1.9535713195800781, 0.7434005737304688, 1.3590087890625, 0.5324878692626953, 2.425567626953125, 2.8499832153320312, -0.01792144775390625, 0.421783447265625, 0.3030529022216797, 0.0723114013671875, -2.34307861328125, 3.9626922607421875, -0.7288970947265625, 2.3426761627197266], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000283.npy"}
{"epoch": 0.42781557067271353, "step": 284, "batch_size": 64, "mean": 1.663050651550293, "std": 2.1989142894744873, "min": -2.9397811889648438, "p10": -0.46555290222167967, "median": 1.2972888946533203, "p90": 4.191741943359375, "max": 8.779582977294922, "pos_frac": 0.8125, "sample": [2.47406005859375, 0.3370361328125, 1.5055732727050781, 2.830230712890625, 4.251533508300781, 1.019012451171875, 0.19773101806640625, -1.5300445556640625, 3.4866943359375, 0.7375297546386719, 0.7552700042724609, 3.4292831420898438, 0.6520938873291016, 2.365264892578125, 0.5888423919677734, -0.906646728515625, 0.9027843475341797, 2.3479557037353516, 0.494720458984375, 8.734039306640625, 0.132568359375, 3.8060684204101562, 1.7146759033203125, -0.9092254638671875, -0.2601318359375, 2.7249908447265625, 1.5458145141601562, 2.1966476440429688, 1.9468402862548828, 1.0339164733886719, 1.3668785095214844, 2.1550827026367188, 2.0828475952148438, -0.4544525146484375, 1.211944580078125, -0.14827346801757812, 2.4704437255859375, 8.779582977294922, 2.54754638671875, 0.7349014282226562, 1.4556503295898438, 6.240409851074219, 2.7752227783203125, 1.9115257263183594, 0.4739570617675781, 0.1432018280029297, 1.90582275390625, 3.8553600311279297, 1.1555938720703125, 4.13043212890625, 0.1303234100341797, -0.4703102111816406, 4.218017578125, -0.9921112060546875, 1.2276992797851562, -0.1532135009765625, 5.925254821777344, 0.15399169921875, 2.900127410888672, -2.4059715270996094, 4.867362976074219, -2.9397811889648438, -0.36431121826171875, 0.9393520355224609], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000284.npy"}
{"epoch": 0.4293272864701436, "step": 285, "batch_size": 64, "mean": 1.3046571016311646, "std": 1.9343936443328857, "min": -2.1572113037109375, "p10": -1.1719945907592773, "median": 0.9579763412475586, "p90": 3.9665000915527355, "max": 7.212657928466797, "pos_frac": 0.75, "sample": [1.5635910034179688, 0.6190357208251953, 0.685028076171875, 3.318490982055664, 0.206695556640625, -1.216135025024414, -1.2177276611328125, 0.9603958129882812, 0.3852500915527344, -2.1572113037109375, 1.9900588989257812, 5.397724151611328, 2.9310245513916016, 1.7146835327148438, 4.1678009033203125, -1.4161090850830078, 2.6414031982421875, 7.212657928466797, -0.0582275390625, 1.38531494140625, -0.27751922607421875, -0.03376579284667969, -1.3511085510253906, -0.83721923828125, 1.1236343383789062, 0.6924514770507812, -0.62994384765625, 1.1511688232421875, 2.1180419921875, -0.15236663818359375, -0.5078353881835938, 2.905853271484375, -1.9230117797851562, 4.099029541015625, 3.6572647094726562, 1.4803695678710938, 1.2373466491699219, 2.606689453125, 2.5778274536132812, 1.3243522644042969, 1.9253005981445312, 1.6256561279296875, -1.5991897583007812, 5.309135437011719, 3.6314697265625, 0.7500076293945312, 0.4393196105957031, 5.2154541015625, 0.44171142578125, 0.9555568695068359, -0.8039093017578125, 2.3397979736328125, 0.7961006164550781, 1.832122802734375, 0.9388427734375, 0.3496971130371094, 4.688304901123047, 3.0999069213867188, -1.069000244140625, 0.6050567626953125, 0.42606353759765625, 0.6624755859375, 0.60467529296875, 1.95849609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000285.npy"}
{"epoch": 0.4308390022675737, "step": 286, "batch_size": 64, "mean": 2.14786434173584, "std": 2.3842339515686035, "min": -5.534339904785156, "p10": -0.09195251464843741, "median": 1.92718505859375, "p90": 4.811384582519532, "max": 8.590728759765625, "pos_frac": 0.859375, "sample": [2.629913330078125, 0.5118122100830078, 0.2715415954589844, 2.6417465209960938, 3.1522903442382812, 1.7600326538085938, 1.2530364990234375, 2.9197216033935547, 3.5706214904785156, -0.2362823486328125, 4.5906829833984375, 1.89276123046875, 1.47705078125, 2.7553367614746094, 2.1849594116210938, 7.1474761962890625, 2.645862579345703, 0.5784206390380859, -2.8622512817382812, 1.5799179077148438, 1.5395431518554688, -0.6789340972900391, -0.3876838684082031, 6.376201629638672, 0.3368415832519531, -2.4208831787109375, 4.02655029296875, 3.0820274353027344, -0.0078277587890625, 2.6898040771484375, 1.96160888671875, 3.8055458068847656, 4.484546661376953, 4.146537780761719, 1.7000846862792969, 4.8342132568359375, 0.7269668579101562, 4.060018539428711, -0.1280059814453125, 8.590728759765625, 4.983100891113281, 4.6339111328125, -0.00554656982421875, 0.8932037353515625, 2.068695068359375, 1.7518653869628906, 0.21732711791992188, 1.642486572265625, 5.087226867675781, 3.9548187255859375, 4.75811767578125, 1.2665519714355469, 0.4481697082519531, 0.30590248107910156, 0.06531143188476562, 3.8999481201171875, 6.77032470703125, 0.7001762390136719, -5.534339904785156, 4.3697357177734375, 2.0354957580566406, 0.3903846740722656, 2.059539794921875, 1.4983673095703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000286.npy"}
{"epoch": 0.4323507180650038, "step": 287, "batch_size": 64, "mean": 1.9180021286010742, "std": 2.1308658123016357, "min": -3.1395645141601562, "p10": -0.5154443740844725, "median": 1.4831457138061523, "p90": 5.144898796081544, "max": 8.293869018554688, "pos_frac": 0.859375, "sample": [3.057220458984375, 1.8543548583984375, 0.9042892456054688, 1.6676292419433594, 5.341102600097656, 5.30230712890625, 0.9881668090820312, 1.4286212921142578, 1.4479141235351562, -0.1463165283203125, 1.756439208984375, 2.396512985229492, 0.8440189361572266, 2.4232940673828125, 5.653739929199219, 6.127708435058594, 5.344917297363281, 0.33417320251464844, 2.407695770263672, 1.2238082885742188, 2.1797866821289062, 1.992095947265625, 4.0431976318359375, 1.398122787475586, 1.4458198547363281, 3.230266571044922, 0.27228736877441406, -1.4877090454101562, 1.9015655517578125, 1.0011177062988281, 2.9071426391601562, 0.6687393188476562, -2.8527584075927734, -0.5580215454101562, -0.8110446929931641, 4.158180236816406, 1.4518241882324219, 0.4344367980957031, 0.94683837890625, 3.618162155151367, 3.7279129028320312, 0.3813934326171875, 3.7080039978027344, 2.763601303100586, -3.1395645141601562, 0.50042724609375, 8.293869018554688, 1.5143547058105469, 2.8806629180908203, 3.644083023071289, -0.41609764099121094, 0.38056182861328125, 1.6482601165771484, 1.4519367218017578, -0.6945457458496094, 0.9664382934570312, 5.686290740966797, 2.188884735107422, 0.9385223388671875, 0.8744926452636719, -0.8059616088867188, 4.65399169921875, 4.777612686157227, 0.5293617248535156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000287.npy"}
{"epoch": 0.43386243386243384, "step": 288, "batch_size": 64, "mean": 1.345275640487671, "std": 2.3748199939727783, "min": -3.404083251953125, "p10": -1.5623580932617185, "median": 1.009073257446289, "p90": 4.35995330810547, "max": 8.476432800292969, "pos_frac": 0.703125, "sample": [2.388296127319336, 0.9322280883789062, -1.24639892578125, -2.05181884765625, 4.481636047363281, 0.9637355804443359, 2.2244739532470703, -0.05278778076171875, 2.34576416015625, 0.6949386596679688, 2.0329360961914062, 1.5411624908447266, -0.009113311767578125, 3.704814910888672, -1.025604248046875, 4.8358154296875, -2.272979736328125, 1.3519821166992188, 0.493316650390625, 2.0389041900634766, -0.12282562255859375, 5.977180480957031, 8.476432800292969, 7.827972412109375, -0.9340057373046875, 4.767303466796875, 1.0544109344482422, 2.620868682861328, -2.3739547729492188, 0.41327476501464844, 5.17901611328125, 0.4245147705078125, 0.72314453125, 0.76055908203125, 3.4757232666015625, 1.3887100219726562, 2.0970001220703125, -1.0464935302734375, 1.907012939453125, -1.6977691650390625, -1.8073654174804688, -3.404083251953125, 4.076026916503906, 0.7552719116210938, 0.962615966796875, -2.6891326904296875, 1.9583358764648438, -1.09521484375, 1.6409835815429688, 2.04608154296875, 2.5398101806640625, 0.07665252685546875, 0.575958251953125, 3.294158935546875, -0.6500434875488281, -0.3383159637451172, 1.9625091552734375, 3.5830001831054688, -0.12320709228515625, 3.3582305908203125, -0.20137977600097656, 1.2313995361328125, 4.0261993408203125, 0.029773712158203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000288.npy"}
{"epoch": 0.43537414965986393, "step": 289, "batch_size": 64, "mean": 1.5024845600128174, "std": 2.53011417388916, "min": -3.00030517578125, "p10": -1.2431524276733399, "median": 1.1821422576904297, "p90": 4.39915828704834, "max": 9.341903686523438, "pos_frac": 0.75, "sample": [0.7167224884033203, 1.5747032165527344, 9.341903686523438, -1.2483348846435547, -1.0799484252929688, -2.1980133056640625, -0.07324790954589844, 1.3038864135742188, 4.6790771484375, 2.4477767944335938, 0.3695526123046875, 1.1119537353515625, 0.8987064361572266, 2.478729248046875, 3.0297164916992188, -0.05560493469238281, 1.8941650390625, -3.00030517578125, -1.0893287658691406, 4.0348358154296875, 0.996063232421875, 6.394144058227539, -0.8437004089355469, 0.610107421875, 2.2140731811523438, 0.5465412139892578, 3.489307403564453, 0.20684432983398438, 0.3359642028808594, 6.893749237060547, 1.8712081909179688, 1.2523307800292969, -0.9216880798339844, -2.24798583984375, 8.191970825195312, -0.8356437683105469, 0.24578475952148438, 8.119766235351562, 1.7385101318359375, 1.5343132019042969, 4.3502960205078125, 3.6620101928710938, -0.8412857055664062, 0.6715908050537109, 1.4133434295654297, -1.4703388214111328, 1.5975093841552734, 0.6339035034179688, 0.08872222900390625, -1.4931907653808594, 3.0368804931640625, 3.72454833984375, 1.0641136169433594, 1.0068511962890625, 3.1311187744140625, 4.420099258422852, 2.1356277465820312, 2.2721939086914062, 1.579254150390625, 1.4780826568603516, 0.7080039978027344, -2.216278076171875, 1.5084095001220703, -1.2310600280761719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000289.npy"}
{"epoch": 0.436885865457294, "step": 290, "batch_size": 64, "mean": 1.7088818550109863, "std": 2.414438247680664, "min": -5.035011291503906, "p10": -0.8762199401855468, "median": 1.18988037109375, "p90": 5.51025619506836, "max": 6.46204948425293, "pos_frac": 0.6875, "sample": [4.0404205322265625, 4.8153839111328125, 2.262928009033203, 6.42742919921875, 1.9774627685546875, 1.6215591430664062, -1.0582733154296875, 0.5461483001708984, 0.7631072998046875, 3.6007003784179688, 1.00457763671875, -0.5165462493896484, -0.873779296875, -1.3810195922851562, 3.4120635986328125, 1.1523590087890625, -0.13399505615234375, -0.8772659301757812, 0.9658889770507812, -0.23842620849609375, 2.285543441772461, 5.551963806152344, 5.2802276611328125, 3.6254806518554688, 0.6955451965332031, 0.19687652587890625, 1.7319889068603516, -1.04327392578125, 2.4409122467041016, -0.72198486328125, 5.526298522949219, 3.6443252563476562, 2.1318302154541016, -0.000881195068359375, 1.216827392578125, 2.6101913452148438, 5.4728240966796875, -0.2984809875488281, 0.7662124633789062, -1.3365478515625, 5.6216278076171875, 6.46204948425293, -0.3997459411621094, -5.035011291503906, 1.4586334228515625, 0.728851318359375, 0.07063865661621094, 6.044013977050781, 2.3541717529296875, 3.394502639770508, -0.16186141967773438, -0.63555908203125, -0.3073692321777344, 0.9005355834960938, -0.7376747131347656, 5.928375244140625, 3.2146835327148438, 4.748790740966797, 4.825103759765625, -0.13178253173828125, 1.6930580139160156, 1.793426513671875, 1.162933349609375, -0.910552978515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000290.npy"}
{"epoch": 0.4383975812547241, "step": 291, "batch_size": 64, "mean": 1.7776010036468506, "std": 2.4238593578338623, "min": -4.315387725830078, "p10": -1.0927429199218746, "median": 1.5226726531982422, "p90": 5.223172760009767, "max": 7.0482330322265625, "pos_frac": 0.765625, "sample": [4.0790863037109375, 3.6344223022460938, 0.36748504638671875, -1.85504150390625, 1.3790740966796875, 5.89117431640625, -0.11180496215820312, 1.1358222961425781, 5.329010009765625, -0.34865570068359375, 1.8224716186523438, 5.38191032409668, -1.6845741271972656, -1.3223800659179688, 4.589748382568359, 3.244548797607422, -0.45052337646484375, 0.9461822509765625, 3.421142578125, 1.5470352172851562, -0.13220977783203125, 0.5406150817871094, 2.5721664428710938, 2.6698150634765625, 3.3045501708984375, 2.3282699584960938, 7.0482330322265625, -0.0050144195556640625, 4.675319671630859, 1.6862030029296875, 1.48785400390625, 6.330436706542969, 0.4169349670410156, 0.5158576965332031, -2.9056167602539062, 0.8743801116943359, 4.075359344482422, 0.9210968017578125, 2.533447265625, 0.4454917907714844, 1.8876113891601562, 6.897834777832031, -4.315387725830078, 1.4983100891113281, 2.8884429931640625, 1.4778289794921875, 2.7889175415039062, 4.699975967407227, 1.7772960662841797, 6.397552490234375, -0.6309165954589844, 1.0040435791015625, -2.8616180419921875, 1.9072723388671875, -1.2906684875488281, 0.7417793273925781, 0.19967079162597656, 4.976219177246094, 2.1102294921875, 2.6202049255371094, 2.2233734130859375, 0.8948879241943359, -0.21245574951171875, -0.2932586669921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000291.npy"}
{"epoch": 0.4399092970521542, "step": 292, "batch_size": 64, "mean": 1.4120711088180542, "std": 2.1144566535949707, "min": -5.3042449951171875, "p10": -1.1959747314453124, "median": 1.3643083572387695, "p90": 3.9407423019409182, "max": 7.408599853515625, "pos_frac": 0.765625, "sample": [-5.3042449951171875, -1.2444992065429688, 1.9808521270751953, -0.29006385803222656, 4.599199295043945, 4.193872451782227, 0.266845703125, 7.408599853515625, 2.1986083984375, 2.203094482421875, 2.8166275024414062, 0.8240566253662109, 3.853708267211914, 2.0346450805664062, -0.3420257568359375, 0.5680961608886719, 3.0057449340820312, 1.137481689453125, 2.193267822265625, -0.08746337890625, 1.356475830078125, -0.031139373779296875, 0.9895133972167969, -1.412078857421875, 1.5352325439453125, 0.37530517578125, -0.4523963928222656, 3.9780426025390625, 0.1692047119140625, 3.6928157806396484, 1.4772491455078125, 2.0862483978271484, 1.042886734008789, 2.1729507446289062, -0.03812599182128906, 0.004150390625, 2.1631088256835938, -1.3859329223632812, 0.5349502563476562, 2.03753662109375, 0.3538818359375, 2.3176651000976562, 3.047945022583008, -2.165437698364258, 7.191864013671875, 3.1350784301757812, 1.7624969482421875, 0.8861007690429688, 1.372140884399414, 1.0031280517578125, 1.750244140625, 2.6253738403320312, 0.6390247344970703, 1.8631591796875, 1.8899364471435547, -0.08358001708984375, 2.11590576171875, 0.80419921875, 6.020282745361328, 1.1775741577148438, -1.1124267578125, -1.231781005859375, -1.7110366821289062, 4.408409118652344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000292.npy"}
{"epoch": 0.4414210128495843, "step": 293, "batch_size": 64, "mean": 2.0112390518188477, "std": 2.432154893875122, "min": -5.25494384765625, "p10": -1.0138633728027342, "median": 2.2579193115234375, "p90": 4.796032714843752, "max": 8.784698486328125, "pos_frac": 0.78125, "sample": [0.042385101318359375, 1.495758056640625, 2.6939048767089844, 3.463411331176758, 2.1647262573242188, 2.092784881591797, -1.651742935180664, 1.5784435272216797, 1.1617660522460938, 4.9969024658203125, 2.757049560546875, -0.3175926208496094, 3.4719314575195312, 2.4396514892578125, 2.7611846923828125, 0.9314422607421875, 3.9881629943847656, 3.7970428466796875, 3.1175765991210938, 2.5511550903320312, 4.054721832275391, -1.1670894622802734, -0.611572265625, -0.7853775024414062, 4.107311248779297, 1.0180778503417969, 2.3620223999023438, -0.6346015930175781, 0.8379745483398438, 7.5788726806640625, 0.5994625091552734, 5.4093017578125, 4.98126220703125, -1.388723373413086, 2.9081573486328125, -1.111785888671875, 4.36383056640625, -5.25494384765625, 1.6782951354980469, -0.05776214599609375, 3.3928146362304688, 7.6433868408203125, -2.1357994079589844, -0.15982818603515625, 0.229400634765625, -0.7309226989746094, 1.36676025390625, 2.3511123657226562, 8.784698486328125, 5.725486755371094, 3.213468551635742, 0.968414306640625, 2.379037857055664, 1.8098335266113281, 2.519927978515625, -1.1432876586914062, 2.6277313232421875, 1.211090087890625, 1.9587669372558594, 4.193359375, 2.79254150390625, 3.122081756591797, 1.1136016845703125, 3.0622406005859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000293.npy"}
{"epoch": 0.4429327286470144, "step": 294, "batch_size": 64, "mean": 1.932664155960083, "std": 2.366183042526245, "min": -1.4300804138183594, "p10": -0.5598020553588866, "median": 1.5318002700805664, "p90": 4.937440109252931, "max": 9.685951232910156, "pos_frac": 0.796875, "sample": [5.7874755859375, 1.6899032592773438, 4.1264801025390625, 2.34234619140625, 4.366657257080078, 2.0551223754882812, 4.732471466064453, 2.8438491821289062, -1.0563201904296875, 1.4348068237304688, 2.0396499633789062, -0.15008544921875, -1.1476631164550781, 5.0252838134765625, 1.1174163818359375, 0.8637065887451172, 5.589742660522461, 2.2809600830078125, 3.107044219970703, 3.6311721801757812, 2.9724960327148438, -0.2971916198730469, 1.5319843292236328, 1.8627128601074219, -0.42083740234375, 8.257938385009766, -1.4300804138183594, 1.5316162109375, -1.2558746337890625, 9.685951232910156, 5.50921630859375, 0.275909423828125, -0.4861888885498047, 8.219993591308594, -0.5913505554199219, 0.0092620849609375, 1.4380264282226562, -0.2293415069580078, 0.45446014404296875, 3.2171173095703125, 0.7215957641601562, 0.192626953125, 2.700429916381836, 0.4537944793701172, 2.8596553802490234, 3.470184326171875, 1.0833053588867188, 2.073518753051758, 0.7279739379882812, 4.493011474609375, -1.1945438385009766, 3.8496017456054688, 2.740631103515625, 0.4410400390625, 0.6711616516113281, 2.328126907348633, 0.45319366455078125, -0.6039485931396484, 0.2976970672607422, 0.7956390380859375, 2.2354507446289062, -0.37602996826171875, 2.1482810974121094, 0.1922760009765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000294.npy"}
{"epoch": 0.4444444444444444, "step": 295, "batch_size": 64, "mean": 1.5732043981552124, "std": 2.4603800773620605, "min": -2.5975494384765625, "p10": -1.362171173095703, "median": 1.296072006225586, "p90": 4.345140075683594, "max": 9.619842529296875, "pos_frac": 0.765625, "sample": [3.6287994384765625, -0.46681976318359375, -0.20703506469726562, 1.3049888610839844, 0.7885818481445312, 3.475494384765625, 3.0261154174804688, 0.3855152130126953, 2.32635498046875, 1.8440399169921875, 9.619842529296875, 0.2232074737548828, 7.9625091552734375, -1.115692138671875, 4.085920333862305, 3.3871192932128906, 0.5159454345703125, 1.2871551513671875, -1.410614013671875, 0.6627159118652344, 4.265560150146484, -1.2491378784179688, 0.3162345886230469, 1.3582515716552734, 3.8349609375, 0.1784820556640625, 1.1372528076171875, -2.5975494384765625, 3.9521636962890625, 1.3949127197265625, -1.494171142578125, -1.2421646118164062, 5.433219909667969, 1.0815887451171875, 1.9392318725585938, -0.6331882476806641, 2.7287940979003906, 1.4899654388427734, 3.708606719970703, 3.5624923706054688, 1.349822998046875, -2.2847366333007812, 1.0197601318359375, 2.103607177734375, 5.1405181884765625, 4.379245758056641, -0.9231414794921875, -0.7431106567382812, -2.44146728515625, 1.369516372680664, 0.18645095825195312, 1.5435905456542969, 0.8804168701171875, 0.7461318969726562, 0.9099159240722656, 1.4265403747558594, 4.085359573364258, 0.02050018310546875, 0.4595794677734375, -1.6166763305664062, 2.251659393310547, 4.568817138671875, 7.20012092590332, -1.4369926452636719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000295.npy"}
{"epoch": 0.4459561602418745, "step": 296, "batch_size": 64, "mean": 2.288297653198242, "std": 2.437836170196533, "min": -1.824066162109375, "p10": -0.6467330932617187, "median": 2.3744306564331055, "p90": 5.696950912475586, "max": 7.476375579833984, "pos_frac": 0.78125, "sample": [5.224571228027344, 4.880622863769531, 3.1600570678710938, 2.693958282470703, -1.1772537231445312, -0.11163330078125, 0.5886554718017578, 6.6264801025390625, 1.370147705078125, 2.982654571533203, 3.4353179931640625, 5.687351226806641, -1.6293373107910156, 5.879682540893555, 0.7494354248046875, 4.6974029541015625, 4.889183044433594, 3.351675033569336, 1.1449432373046875, 4.593696594238281, 2.5833568572998047, 0.5844688415527344, 5.7010650634765625, -1.824066162109375, -0.6702117919921875, 0.4177093505859375, 1.093109130859375, 1.8590717315673828, 1.3956756591796875, 2.8935623168945312, 0.27375221252441406, -0.7310962677001953, -0.591949462890625, 0.3013153076171875, 7.1692962646484375, -0.519866943359375, -0.5068130493164062, -0.4134178161621094, 3.328706741333008, -0.4000396728515625, 2.0933685302734375, -0.5399398803710938, 7.476375579833984, 1.26837158203125, 5.3858489990234375, 2.698883056640625, 2.4731483459472656, 2.2842159271240234, 2.522052764892578, 4.65985107421875, 2.4646453857421875, 0.7082061767578125, 3.5152149200439453, 1.8331470489501953, 6.698326110839844, 2.637969970703125, 0.5732498168945312, 5.363792419433594, 0.0853271484375, -1.1401596069335938, -0.76629638671875, 5.9114837646484375, 4.6313323974609375, 2.6313934326171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000296.npy"}
{"epoch": 0.4474678760393046, "step": 297, "batch_size": 64, "mean": 2.027672290802002, "std": 2.034177303314209, "min": -1.44561767578125, "p10": -0.6275093078613279, "median": 1.8741044998168945, "p90": 4.772409439086915, "max": 7.2500152587890625, "pos_frac": 0.8125, "sample": [1.1399116516113281, -0.7972335815429688, 2.2918319702148438, 3.749481201171875, 3.7906494140625, 4.934185028076172, 2.197162628173828, 3.9840469360351562, -1.1277236938476562, -1.3673534393310547, 1.196502685546875, 1.8697357177734375, 1.8784732818603516, 1.9269447326660156, 1.2084636688232422, 4.929708480834961, 3.7004165649414062, 3.653125762939453, -0.2718486785888672, 0.20013427734375, 2.4475860595703125, 0.7780609130859375, 0.298370361328125, -0.8956527709960938, -0.19800567626953125, 3.3208160400390625, 1.6878585815429688, 1.0650482177734375, 4.657707214355469, 0.778656005859375, 3.492124557495117, 4.027427673339844, 0.717803955078125, -1.0508460998535156, -0.7259101867675781, 2.363340377807617, 0.5563201904296875, 1.5662689208984375, 5.847785949707031, 0.99139404296875, -1.44561767578125, -0.3338432312011719, 3.5327529907226562, 4.821567535400391, 1.4974479675292969, 3.0031890869140625, 0.40238189697265625, -0.14502716064453125, 1.9779739379882812, 4.2498016357421875, 1.1765556335449219, 3.221099853515625, 2.247112274169922, 3.4099884033203125, -0.3979072570800781, 0.5066909790039062, 4.959197998046875, 7.2500152587890625, 1.0393524169921875, 5.326057434082031, 0.0399169921875, 3.9328479766845703, 4.610054016113281, 4.0766448974609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000297.npy"}
{"epoch": 0.4489795918367347, "step": 298, "batch_size": 64, "mean": 1.7846052646636963, "std": 2.612809419631958, "min": -4.4212188720703125, "p10": -1.6084888458251954, "median": 1.605210304260254, "p90": 5.234486770629883, "max": 8.385875701904297, "pos_frac": 0.734375, "sample": [2.4808425903320312, -4.4212188720703125, 8.385875701904297, 0.4339141845703125, 5.167171478271484, 1.0187911987304688, 2.4560680389404297, -0.27974700927734375, -0.40411376953125, 1.923788070678711, 5.738771438598633, 5.804790496826172, -1.7794342041015625, 5.263336181640625, -1.7713699340820312, 2.8261260986328125, 3.2633056640625, -0.8109588623046875, 2.6229476928710938, 1.9238739013671875, 0.4928550720214844, -1.6232414245605469, 3.6989822387695312, 3.834136962890625, 0.9000625610351562, 2.38555908203125, -2.1193370819091797, 2.3720836639404297, 3.6907501220703125, 4.604990005493164, 1.0359153747558594, 5.043567657470703, 0.9217453002929688, 4.968482971191406, 0.2999591827392578, 1.3858108520507812, 0.650177001953125, 5.355634689331055, 1.8246097564697266, 1.0424156188964844, -0.7500076293945312, -0.7036285400390625, -1.2587032318115234, 3.889862060546875, 0.7892475128173828, 4.642303466796875, 5.8130340576171875, 2.5385665893554688, 0.5637893676757812, 3.790863037109375, 1.9574317932128906, -0.42706298828125, 1.063934326171875, -2.0004444122314453, 0.0057220458984375, 2.0754623413085938, -0.29613494873046875, -0.0295257568359375, -1.574066162109375, -3.147237777709961, 4.299068450927734, 4.277553558349609, 1.2376365661621094, 6.849149703979492], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000298.npy"}
{"epoch": 0.4504913076341648, "step": 299, "batch_size": 64, "mean": 1.517815351486206, "std": 2.562458038330078, "min": -3.2376327514648438, "p10": -1.8122249603271483, "median": 1.0052051544189453, "p90": 4.597237968444825, "max": 8.674667358398438, "pos_frac": 0.75, "sample": [1.9838676452636719, 2.55340576171875, 0.27452659606933594, -2.5557327270507812, -0.8756027221679688, 3.7163124084472656, 2.6194610595703125, 1.460306167602539, 7.1617279052734375, -0.06380271911621094, -1.9121685028076172, 0.858367919921875, -3.1469039916992188, 0.30568695068359375, -2.241769790649414, 0.6055526733398438, 0.9347801208496094, 4.885625839233398, 4.67327880859375, -1.0231857299804688, 4.403438568115234, -0.003116607666015625, 1.7607784271240234, 1.9066085815429688, 0.0874481201171875, 0.3227996826171875, 1.0700035095214844, 0.4960365295410156, 8.674667358398438, -0.3168678283691406, 3.818084716796875, 0.0124053955078125, 2.1888275146484375, -3.2376327514648438, 0.41895294189453125, 0.630645751953125, 2.80108642578125, 4.419809341430664, 0.6564254760742188, 2.0232772827148438, 3.6854324340820312, 0.9404067993164062, 2.8770713806152344, 3.956880569458008, 7.4849700927734375, -1.870758056640625, 3.5010318756103516, 6.099693298339844, 1.786224365234375, 4.03570556640625, 0.35506439208984375, -0.5705356597900391, 2.1363162994384766, -1.1026153564453125, 1.9907684326171875, 2.2413406372070312, -2.359832763671875, 4.164039611816406, -0.5068778991699219, 0.407196044921875, 0.009983062744140625, 1.5016746520996094, -1.6756477355957031, 5.705238342285156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000299.npy"}
{"epoch": 0.4520030234315949, "step": 300, "batch_size": 64, "mean": 1.7839126586914062, "std": 2.5648248195648193, "min": -5.93292236328125, "p10": -0.9456542968749999, "median": 1.6926498413085938, "p90": 4.923440551757813, "max": 7.412025451660156, "pos_frac": 0.765625, "sample": [0.9116897583007812, 4.84783935546875, -0.30945587158203125, 4.49237060546875, 2.7308998107910156, 0.6075286865234375, 2.9146575927734375, 3.29681396484375, -0.15867233276367188, -2.901315689086914, 5.757080078125, 6.357810974121094, 2.9370365142822266, 3.52374267578125, 7.412025451660156, 3.0708789825439453, 3.291259765625, 0.22759628295898438, -0.1511077880859375, 0.7355880737304688, 0.8604564666748047, 4.308189392089844, 3.103790283203125, -0.3992900848388672, -1.758514404296875, 1.63226318359375, 0.8122367858886719, -0.901519775390625, 2.27545166015625, 2.891103744506836, 0.7144489288330078, -3.2525482177734375, 1.7530364990234375, 0.7567615509033203, -5.93292236328125, 1.6039810180664062, 0.26921653747558594, 6.977409362792969, 3.16412353515625, 4.966606140136719, 4.955841064453125, 2.658008575439453, -0.964569091796875, 3.7877960205078125, 2.296112060546875, -0.7920379638671875, 0.5902976989746094, 1.9435615539550781, -0.086669921875, 4.529426574707031, 3.1881256103515625, 1.0706920623779297, 1.4067840576171875, 0.7424354553222656, 1.4194602966308594, -1.7327327728271484, 4.076175689697266, 3.3496780395507812, 3.544170379638672, -3.3308582305908203, 0.509185791015625, 5.574947357177734, 2.87939453125, -0.88336181640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000300.npy"}
{"epoch": 0.45351473922902497, "step": 301, "batch_size": 64, "mean": 1.7021512985229492, "std": 2.8650832176208496, "min": -3.4617538452148438, "p10": -1.481070327758789, "median": 1.2128105163574219, "p90": 5.800811195373536, "max": 9.673065185546875, "pos_frac": 0.71875, "sample": [2.7683067321777344, 2.802490234375, 2.6545028686523438, 1.2532978057861328, 4.53076171875, -0.3595733642578125, 5.549079895019531, 4.3287506103515625, 4.143396377563477, 0.35642242431640625, 0.7840251922607422, 0.09812164306640625, 2.7664566040039062, -1.4218673706054688, -0.6624221801757812, -0.51885986328125, -0.6439437866210938, 2.0932769775390625, 2.0806350708007812, 0.7682914733886719, -1.2667865753173828, 1.166116714477539, 7.803821563720703, -2.8291854858398438, 4.367912292480469, 0.1433563232421875, 1.172323226928711, 5.572847366333008, 3.402496337890625, -3.3758773803710938, -0.5408744812011719, -1.5064430236816406, 5.898509979248047, 6.60357666015625, -2.0222702026367188, -0.133575439453125, -2.9639434814453125, 9.673065185546875, -0.6001758575439453, 0.24113845825195312, -0.7659091949462891, 0.33251953125, 2.6177101135253906, -2.0185813903808594, 0.7201633453369141, 5.342075347900391, 2.902524948120117, 2.1480560302734375, 1.4207763671875, 6.721794128417969, 1.4487075805664062, 5.905364990234375, 1.5661239624023438, 0.24238967895507812, -1.117645263671875, 3.123401641845703, 3.1883621215820312, 6.678337097167969, 0.8179855346679688, 1.8716888427734375, 0.20660400390625, -3.4617538452148438, 4.6711578369140625, 0.1986541748046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000301.npy"}
{"epoch": 0.455026455026455, "step": 302, "batch_size": 64, "mean": 1.140923023223877, "std": 2.3593590259552, "min": -4.490486145019531, "p10": -1.9700037002563473, "median": 1.088165283203125, "p90": 4.136277198791504, "max": 6.95654296875, "pos_frac": 0.703125, "sample": [0.621063232421875, 5.311519622802734, -1.0663909912109375, 6.95654296875, 1.957437515258789, 1.6100807189941406, 4.9894866943359375, -2.0730514526367188, 3.9974136352539062, 2.381732940673828, 1.38751220703125, 1.5263519287109375, 0.542022705078125, 1.2021770477294922, 5.180713653564453, 3.6591243743896484, 0.2030487060546875, 0.4929656982421875, 0.8805580139160156, -2.4345550537109375, 1.3784980773925781, -0.6644973754882812, -2.6839752197265625, 2.5506591796875, 3.7179718017578125, -2.3479175567626953, 0.5965423583984375, 1.8153228759765625, -0.8705024719238281, 0.6945419311523438, -0.0530548095703125, 2.8042755126953125, 4.155723571777344, 0.7167263031005859, -2.4057750701904297, 2.6307106018066406, 4.72906494140625, 0.5572738647460938, 1.1243820190429688, 1.3844261169433594, 1.8295154571533203, 4.090902328491211, -0.17879867553710938, 1.674346923828125, -1.44085693359375, 2.6161041259765625, -4.219455718994141, -0.6530075073242188, 2.8016204833984375, 1.7464752197265625, 3.2135696411132812, -1.2052688598632812, 2.02410888671875, -0.31899261474609375, 0.8025360107421875, -1.7295589447021484, -0.32354736328125, 6.48358154296875, -0.1727142333984375, 0.5653877258300781, -4.490486145019531, 0.2607879638671875, 1.0519485473632812, 1.4347209930419922], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000302.npy"}
{"epoch": 0.4565381708238851, "step": 303, "batch_size": 64, "mean": 2.498141050338745, "std": 3.0551769733428955, "min": -2.8364486694335938, "p10": -1.3841398239135743, "median": 2.393190383911133, "p90": 6.059007263183594, "max": 10.587730407714844, "pos_frac": 0.765625, "sample": [0.23248291015625, 0.8159236907958984, 3.9152984619140625, 3.0807247161865234, 10.106224060058594, 6.1174774169921875, 2.8672027587890625, -1.3247032165527344, 4.349893569946289, 1.385101318359375, 5.226739883422852, -2.0043869018554688, 2.6616783142089844, 1.2022151947021484, -1.0300216674804688, -1.2771644592285156, -1.9675521850585938, 4.227935791015625, -1.6740474700927734, -0.9574432373046875, -0.45888710021972656, 3.158071517944336, 6.34539794921875, 5.243770599365234, -1.7466506958007812, 1.2724418640136719, 4.3610076904296875, -2.8364486694335938, -0.9585132598876953, 4.7337493896484375, 2.4077224731445312, 2.26776123046875, 2.3786582946777344, -0.49875640869140625, 0.307373046875, 1.0003833770751953, 4.7349853515625, 1.4871368408203125, 3.0196800231933594, 4.083038330078125, 0.9531097412109375, 3.5774574279785156, 2.830221176147461, 3.6572418212890625, 1.2850494384765625, 4.565162658691406, 9.888076782226562, 1.60443115234375, 3.8986358642578125, 5.779388427734375, -2.060546875, 0.29487037658691406, -0.9005012512207031, 5.054290771484375, 5.922576904296875, 2.0236053466796875, 1.5631885528564453, 1.5587005615234375, 4.7816619873046875, 6.723466873168945, 10.587730407714844, -1.4096126556396484, 4.597869873046875, 6.849449157714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000303.npy"}
{"epoch": 0.4580498866213152, "step": 304, "batch_size": 64, "mean": 2.341661214828491, "std": 2.2225170135498047, "min": -2.7184906005859375, "p10": -0.2656495094299315, "median": 2.139862060546875, "p90": 5.666485404968265, "max": 7.7034454345703125, "pos_frac": 0.84375, "sample": [3.7196693420410156, -0.8196907043457031, 3.8364410400390625, 3.4890899658203125, 1.2202072143554688, 1.7734317779541016, 2.721435546875, 4.224845886230469, 0.9061012268066406, 1.5377349853515625, 0.3283042907714844, 6.526908874511719, 3.6286163330078125, 2.0057296752929688, 2.935453414916992, 1.9671745300292969, -0.8476753234863281, 1.2628707885742188, 5.9794464111328125, -2.7184906005859375, 1.465118408203125, 3.988433837890625, 2.803436279296875, 1.6601200103759766, 2.2146377563476562, 0.7324981689453125, 0.276519775390625, 4.881793975830078, 2.7974014282226562, 4.936243057250977, 3.4778976440429688, 0.4720611572265625, 6.385406494140625, 2.7919769287109375, -0.09940338134765625, -1.2338104248046875, 1.366952896118164, 2.4821853637695312, 1.3936176300048828, 3.259490966796875, -0.3368978500366211, -1.1910648345947266, 7.22662353515625, 3.5257720947265625, 1.6946258544921875, 0.7926406860351562, 2.3268165588378906, 6.707366943359375, 3.649932861328125, 4.130268096923828, -0.0455474853515625, 0.271881103515625, 6.334037780761719, 0.6357746124267578, 2.0650863647460938, -0.7526893615722656, 0.6351375579833984, 7.7034454345703125, 2.9968338012695312, 2.0277938842773438, 3.3145294189453125, -0.000797271728515625, 3.973846435546875, 2.450725555419922], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000304.npy"}
{"epoch": 0.4595616024187453, "step": 305, "batch_size": 64, "mean": 1.872122883796692, "std": 2.9483909606933594, "min": -5.27618408203125, "p10": -1.5555946350097656, "median": 1.2071609497070312, "p90": 5.410926818847656, "max": 9.7940673828125, "pos_frac": 0.796875, "sample": [4.699851989746094, 1.24554443359375, -1.4652481079101562, -3.72216796875, -1.1994781494140625, -0.09946823120117188, 9.7940673828125, -2.007761001586914, 4.0739898681640625, 4.613746643066406, 1.0946407318115234, 5.739948272705078, 0.6800537109375, 2.293285369873047, 0.5425262451171875, -0.2884063720703125, 0.06937408447265625, 0.2768211364746094, -2.364501953125, 5.4486236572265625, 0.4268054962158203, 4.0304412841796875, 1.331817626953125, 1.6101837158203125, 1.2073211669921875, 1.207000732421875, 4.933433532714844, 0.5861968994140625, 2.755828857421875, -5.27618408203125, 2.959217071533203, 3.4414501190185547, 6.170166015625, 3.9701614379882812, 0.607940673828125, 1.1161766052246094, 7.562225341796875, 0.6868438720703125, 0.7394275665283203, 0.845123291015625, 1.0249786376953125, 0.19445037841796875, 4.74786376953125, 6.806177139282227, 5.322967529296875, 0.2880058288574219, 0.9512100219726562, -2.914215087890625, 3.734344482421875, 1.6086578369140625, 2.2128372192382812, 2.880126953125, 2.480510711669922, 8.721725463867188, 4.3445281982421875, 5.171966552734375, -1.5943145751953125, 0.8927001953125, -2.962879180908203, 3.709901809692383, -1.0071487426757812, 0.22512245178222656, 3.72479248046875, -1.0854644775390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000305.npy"}
{"epoch": 0.46107331821617537, "step": 306, "batch_size": 64, "mean": 1.6640576124191284, "std": 2.8380537033081055, "min": -4.462150573730469, "p10": -1.2092933654785152, "median": 1.064509391784668, "p90": 5.357369995117189, "max": 10.921524047851562, "pos_frac": 0.765625, "sample": [7.008689880371094, 5.043312072753906, 0.499420166015625, 5.491966247558594, 2.4399337768554688, 2.8125457763671875, 0.19393157958984375, 1.7790946960449219, 1.064535140991211, 3.8823699951171875, 3.801666259765625, 1.7376861572265625, -4.462150573730469, 1.1148681640625, -0.4688568115234375, -3.3419036865234375, 3.0632781982421875, 0.939483642578125, 0.8520736694335938, 0.48418235778808594, 3.3947219848632812, 9.54788589477539, 2.298675537109375, -0.4783744812011719, 2.9953460693359375, 0.6972808837890625, -0.68011474609375, 3.1716461181640625, -0.07421875, 0.631317138671875, -4.0543975830078125, 4.490478515625, -0.5387210845947266, 1.217000961303711, 1.064483642578125, 0.7660598754882812, 4.700885772705078, 6.8855438232421875, -0.8631439208984375, 0.3316993713378906, -0.03684234619140625, 0.9491481781005859, 0.4356803894042969, 1.5011062622070312, -1.3576431274414062, 0.3404083251953125, -0.3759117126464844, 5.601829528808594, 1.240203857421875, 5.620796203613281, 3.5748214721679688, 1.2141036987304688, 0.4726104736328125, 0.5779647827148438, 3.8302993774414062, 1.4848709106445312, 3.650684356689453, -1.4477767944335938, 0.46912384033203125, 10.921524047851562, -2.947042465209961, 0.652801513671875, 2.3775177001953125, -1.690774917602539], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000306.npy"}
{"epoch": 0.46258503401360546, "step": 307, "batch_size": 64, "mean": 1.5966609716415405, "std": 3.0036072731018066, "min": -4.7295379638671875, "p10": -2.0046113967895507, "median": 1.3832683563232422, "p90": 5.607156372070313, "max": 10.739212036132812, "pos_frac": 0.640625, "sample": [3.935272216796875, -1.2681159973144531, 1.8809280395507812, 0.12235450744628906, -4.7295379638671875, 2.982696533203125, 10.739212036132812, 0.5393600463867188, 5.15631103515625, 4.2202911376953125, 0.1759033203125, 3.571043014526367, 1.2294120788574219, 6.17193603515625, -2.4605865478515625, 5.32598876953125, -0.40936279296875, -1.1624679565429688, 2.894451141357422, -1.1755218505859375, 2.8196868896484375, -2.6737232208251953, -2.0546798706054688, -0.260955810546875, 2.049755096435547, -1.8841018676757812, -2.2764053344726562, 1.5825920104980469, 4.356170654296875, -0.4286670684814453, 1.5371246337890625, 3.9382705688476562, -0.3862953186035156, -1.259246826171875, -2.7066650390625, 2.3326416015625, 0.0974884033203125, 2.7900161743164062, 4.130975723266602, 0.045318603515625, 1.043539047241211, -0.4048004150390625, 0.22607040405273438, 5.9810943603515625, 5.562049865722656, -0.4591522216796875, 1.664712905883789, 4.17083740234375, 5.665855407714844, 3.4689559936523438, -2.94140625, 5.662223815917969, 4.214012145996094, -0.12072944641113281, 4.601226806640625, 4.57330322265625, 0.3774833679199219, 5.640037536621094, -0.713043212890625, -1.1157608032226562, -1.8877849578857422, 3.7186851501464844, 5.626487731933594, -1.8564605712890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000307.npy"}
{"epoch": 0.46409674981103555, "step": 308, "batch_size": 64, "mean": 2.210214614868164, "std": 2.7334656715393066, "min": -4.6641845703125, "p10": -0.9575643539428711, "median": 1.9985933303833008, "p90": 5.654467010498047, "max": 9.4891357421875, "pos_frac": 0.78125, "sample": [3.582265853881836, 2.200359344482422, 2.017730712890625, -1.012674331665039, 4.9024505615234375, 4.450630187988281, -0.18114852905273438, 2.009002685546875, -1.098114013671875, 0.3956451416015625, 9.4891357421875, 3.545501708984375, 8.703865051269531, 3.202016830444336, 2.336883544921875, -0.9678897857666016, 2.6012954711914062, 1.08349609375, 4.1892547607421875, 1.1842460632324219, 3.3542022705078125, -1.717691421508789, 5.708763122558594, 3.326265335083008, 2.4241256713867188, 5.7197723388671875, -0.13322830200195312, 2.616058349609375, 2.46038818359375, -0.9334716796875, 5.678062438964844, -0.7252655029296875, 1.7787036895751953, -0.39507102966308594, 4.362480163574219, 0.9968185424804688, 3.549591064453125, 1.0959625244140625, 1.372589111328125, -4.6641845703125, 1.9322662353515625, 1.9881839752197266, 6.8964691162109375, 5.021389007568359, -0.0820770263671875, 1.0421829223632812, 2.8372650146484375, 0.6260223388671875, 3.7589263916015625, -0.04695892333984375, 5.476734161376953, 1.44061279296875, 0.0605316162109375, 5.017707824707031, 8.856369018554688, 0.285797119140625, 5.5994110107421875, -2.9415435791015625, 0.4911041259765625, 2.2003097534179688, 0.2757453918457031, -1.3264923095703125, 1.8851280212402344, 1.6498222351074219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000308.npy"}
{"epoch": 0.4656084656084656, "step": 309, "batch_size": 64, "mean": 2.0769472122192383, "std": 2.762580156326294, "min": -2.998321533203125, "p10": -1.182487106323242, "median": 1.869100570678711, "p90": 5.525543975830078, "max": 10.48270034790039, "pos_frac": 0.8125, "sample": [1.7759628295898438, 10.48270034790039, 0.6481094360351562, 5.83259391784668, 3.483064651489258, 0.5631637573242188, 4.678077697753906, -1.8377151489257812, 0.8739242553710938, 1.563364028930664, 4.3868865966796875, 0.2354450225830078, -1.2473773956298828, 0.18717575073242188, 2.41070556640625, -1.8291587829589844, 2.434967041015625, 3.9675140380859375, -0.2386932373046875, 0.6547317504882812, 1.0001029968261719, 4.192493438720703, 4.350435256958008, -1.031076431274414, 3.867462158203125, 3.2896957397460938, 0.2645740509033203, 3.1713428497314453, -1.409515380859375, 0.26953125, 0.13200950622558594, -2.53094482421875, 0.2910919189453125, 2.297332763671875, 1.358154296875, 2.6314468383789062, 3.2615203857421875, 3.4317054748535156, -0.4120063781738281, 6.5225982666015625, 0.3886871337890625, 5.6339111328125, 5.256244659423828, 3.3602428436279297, 5.551963806152344, -1.489959716796875, 5.463897705078125, 2.9831809997558594, 0.3015880584716797, 3.5704612731933594, -1.0056610107421875, 0.9298019409179688, 2.034698486328125, 4.29339599609375, 9.324066162109375, 0.8708877563476562, 2.0308704376220703, 2.457406997680664, -0.959991455078125, -2.998321533203125, 0.16314697265625, 0.465789794921875, 1.9622383117675781, 8.3626708984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000309.npy"}
{"epoch": 0.4671201814058957, "step": 310, "batch_size": 64, "mean": 1.2512991428375244, "std": 2.664551019668579, "min": -3.156888961791992, "p10": -1.8941452026367187, "median": 0.7184133529663086, "p90": 4.395466232299805, "max": 7.7552337646484375, "pos_frac": 0.625, "sample": [3.5505142211914062, 0.4278564453125, 2.8428916931152344, 0.8926639556884766, 4.454795837402344, -1.90765380859375, -0.6034812927246094, 3.5039520263671875, 3.5386009216308594, 0.0179290771484375, -0.06450080871582031, -3.156888961791992, 3.136016845703125, 6.7284393310546875, 2.739990234375, -0.7958869934082031, -0.5705585479736328, -0.093170166015625, 6.033172607421875, -1.8379554748535156, 0.3475990295410156, 2.5919151306152344, -1.6516914367675781, 7.7552337646484375, -1.5848541259765625, -1.3619003295898438, 3.2203826904296875, -0.0015010833740234375, 0.2925262451171875, 4.197614669799805, -1.0285263061523438, 4.2520294189453125, 3.501617431640625, 3.62872314453125, -1.8626251220703125, 0.68475341796875, 0.2801666259765625, 1.1489906311035156, 2.9636383056640625, 2.232452392578125, -0.5462112426757812, 2.8025360107421875, -2.1356353759765625, -2.5257568359375, 2.8282012939453125, 0.8392448425292969, 0.6019744873046875, 3.425748825073242, -1.2978248596191406, -2.86041259765625, -0.9329071044921875, 6.045936584472656, 2.075244903564453, -2.0444869995117188, 0.3560199737548828, 0.7520732879638672, -1.353363037109375, 4.257030487060547, 5.49437141418457, -2.381103515625, -1.7169418334960938, 6.6511383056640625, 1.2613086700439453, 2.0436859130859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000310.npy"}
{"epoch": 0.46863189720332576, "step": 311, "batch_size": 64, "mean": 1.790733814239502, "std": 3.0456743240356445, "min": -7.0464935302734375, "p10": -1.5201555252075194, "median": 1.5092716217041016, "p90": 5.682363510131838, "max": 10.44012451171875, "pos_frac": 0.75, "sample": [3.9175682067871094, 0.02875518798828125, 0.01572418212890625, -2.7852630615234375, -2.1107559204101562, -0.6617603302001953, 1.0097465515136719, 4.764427185058594, -1.1139354705810547, 5.822925567626953, 2.15838623046875, -7.0464935302734375, 2.265869140625, 3.3537635803222656, 3.5304718017578125, -0.6762237548828125, 2.5265655517578125, 1.6314544677734375, 6.269317626953125, 1.8021621704101562, 1.0566329956054688, 1.1505584716796875, 1.3870887756347656, 0.6402130126953125, 2.6758193969726562, 1.0454292297363281, -1.539529800415039, -0.9762687683105469, 0.63800048828125, 4.552459716796875, -2.061614990234375, 5.3543853759765625, 10.197799682617188, 0.7613391876220703, 2.036956787109375, 3.3815765380859375, 2.1649703979492188, -0.00147247314453125, 1.671173095703125, 1.259033203125, 10.44012451171875, 1.0086517333984375, -1.3276901245117188, 6.606422424316406, -2.0167694091796875, 1.157958984375, 1.240570068359375, -1.4749488830566406, 3.3519744873046875, 2.5729217529296875, 8.014163970947266, 4.10943603515625, 0.712677001953125, -3.415904998779297, 2.6632957458496094, 2.4785327911376953, 0.9841670989990234, -0.735260009765625, -0.8427047729492188, 3.0943756103515625, 2.126972198486328, 6.229804992675781, 5.312347412109375, 2.2185821533203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000311.npy"}
{"epoch": 0.47014361300075586, "step": 312, "batch_size": 64, "mean": 2.134312629699707, "std": 2.832718849182129, "min": -6.3168182373046875, "p10": -0.7801078796386718, "median": 1.7711143493652344, "p90": 5.954224395751954, "max": 9.6201171875, "pos_frac": 0.84375, "sample": [1.5287895202636719, 1.4792709350585938, 0.1963043212890625, 2.1056594848632812, 3.2992286682128906, 3.3454742431640625, 2.334705352783203, 0.09885025024414062, 1.0534591674804688, -0.025341033935546875, 7.8820648193359375, 4.82757568359375, 2.8246078491210938, 3.5801944732666016, 0.2324981689453125, 3.6448440551757812, 0.8425521850585938, 0.08076286315917969, 1.3865718841552734, 7.54486083984375, 6.379142761230469, 0.8917770385742188, -1.2910518646240234, 2.7633895874023438, 1.2830276489257812, 6.0360107421875, 1.3244171142578125, -0.8131103515625, 0.89544677734375, 4.545904159545898, 7.381994247436523, -3.3603668212890625, 4.385093688964844, 3.381561279296875, -3.8199234008789062, 2.774211883544922, 5.763389587402344, 1.8640213012695312, -6.3168182373046875, 4.8300933837890625, 9.6201171875, 1.1748847961425781, 0.8145523071289062, 2.3558502197265625, 4.0587615966796875, 1.2216567993164062, 1.3457794189453125, 4.2374420166015625, 3.5061187744140625, -0.7031021118164062, -0.9494400024414062, 2.473064422607422, 7.016613006591797, 1.2926406860351562, 1.6782073974609375, 1.2192840576171875, 2.2948570251464844, 3.7038917541503906, -3.2013931274414062, 2.9457473754882812, 0.9164009094238281, -0.5284461975097656, 0.8963794708251953, 2.0450057983398438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000312.npy"}
{"epoch": 0.47165532879818595, "step": 313, "batch_size": 64, "mean": 1.8233965635299683, "std": 3.170130729675293, "min": -3.9365196228027344, "p10": -1.8033386230468749, "median": 1.5274906158447266, "p90": 5.59923782348633, "max": 12.111068725585938, "pos_frac": 0.703125, "sample": [1.2973194122314453, 4.508644104003906, 4.2957763671875, 1.5279731750488281, 3.4625091552734375, 2.6449813842773438, 2.2925148010253906, 0.40042877197265625, 3.379535675048828, -2.1609153747558594, -0.2424182891845703, 4.326196670532227, 1.527008056640625, 1.718658447265625, 2.5462417602539062, -0.9620361328125, 1.6524124145507812, -0.4562568664550781, 0.90838623046875, -1.2747993469238281, 4.700950622558594, 3.323657989501953, -0.1701641082763672, 9.658500671386719, -3.9365196228027344, -3.4699344635009766, -0.49322509765625, 0.657989501953125, 4.056732177734375, 2.3164596557617188, 9.0540771484375, 5.818414688110352, -2.8784637451171875, -1.6520214080810547, 5.15277099609375, 5.790580749511719, 0.5463447570800781, -1.1442146301269531, 0.24362945556640625, 7.9057159423828125, 1.0537376403808594, 1.1703624725341797, 2.300933837890625, -1.1606063842773438, 12.111068725585938, 0.805084228515625, -0.1342315673828125, 0.2843780517578125, 3.456573486328125, 1.184173583984375, 5.1404876708984375, 2.6522598266601562, 3.015850067138672, 0.06763648986816406, -0.8466911315917969, -1.8242950439453125, 2.2667312622070312, -2.2328128814697266, 4.946964263916016, -1.7544403076171875, -2.8262367248535156, 2.03265380859375, 1.9343795776367188, 6.179971694946289], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000313.npy"}
{"epoch": 0.47316704459561604, "step": 314, "batch_size": 64, "mean": 2.476907253265381, "std": 2.7140121459960938, "min": -2.1597824096679688, "p10": -0.5562339782714842, "median": 2.0817108154296875, "p90": 5.879200363159181, "max": 10.095888137817383, "pos_frac": 0.84375, "sample": [4.630592346191406, 1.3387470245361328, 1.32818603515625, 9.416095733642578, 3.053081512451172, 3.3095245361328125, 1.504608154296875, 1.9777908325195312, -1.5648880004882812, 2.4526004791259766, 0.1091156005859375, -1.4772300720214844, 8.29425048828125, 0.1021575927734375, -0.3918628692626953, 7.5694122314453125, 2.1895179748535156, 1.8410358428955078, 5.182147979736328, 0.7502517700195312, 0.7189178466796875, -0.4438629150390625, 4.941413879394531, 6.604927062988281, 5.365325927734375, -2.1597824096679688, 2.0677337646484375, 5.617210388183594, 2.1109466552734375, 2.0956878662109375, 4.460878372192383, 0.34972381591796875, 0.7807579040527344, 1.3263969421386719, 0.5307350158691406, 0.6308975219726562, 4.373046875, 0.5979156494140625, 1.1207294464111328, 2.9297943115234375, 0.3052558898925781, -0.6043930053710938, 6.64697265625, 2.272899627685547, 3.04901123046875, 4.5444488525390625, 4.154796600341797, 10.095888137817383, 0.3238525390625, 5.42852783203125, 1.5200424194335938, 5.453319549560547, 3.4201812744140625, 0.5604038238525391, 3.4111862182617188, 2.4008026123046875, -0.78619384765625, 3.2761802673339844, 1.274322509765625, -0.4146728515625, 3.13275146484375, -1.1870193481445312, -1.3825035095214844, 5.991481781005859], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000314.npy"}
{"epoch": 0.47467876039304613, "step": 315, "batch_size": 64, "mean": 1.9778021574020386, "std": 2.622084379196167, "min": -4.235660552978516, "p10": -1.2525264739990232, "median": 2.2350454330444336, "p90": 5.285972404479982, "max": 8.26542854309082, "pos_frac": 0.765625, "sample": [-1.5532302856445312, 2.409881591796875, 5.990449905395508, 2.202932357788086, -0.3682994842529297, 2.9064769744873047, -4.235660552978516, -0.8237133026123047, -1.3298301696777344, 0.8692092895507812, -2.347900390625, 4.959480285644531, 0.09376907348632812, 4.8181915283203125, 0.000896453857421875, 2.9083404541015625, 2.33001708984375, -0.25677490234375, -0.2751426696777344, -0.13530349731445312, 2.6228389739990234, 5.6472015380859375, 0.7721176147460938, 1.1043853759765625, 1.5844268798828125, 2.6187076568603516, 7.275054931640625, -1.0721511840820312, 0.966217041015625, 3.083944320678711, 1.0944366455078125, 1.1893730163574219, 6.101493835449219, 3.4243316650390625, 1.8487701416015625, 3.624969482421875, 2.3225555419921875, 2.0560760498046875, -3.73101806640625, 1.7474517822265625, 3.4476699829101562, -0.011322021484375, 3.296539306640625, 3.773082733154297, 0.22386932373046875, 4.759025573730469, 4.094917297363281, 8.26542854309082, -2.8287811279296875, 1.5923576354980469, 4.596649169921875, 1.0125656127929688, 2.407623291015625, -0.6310958862304688, 2.7325439453125, 2.2671585083007812, 5.425897598266602, 3.3238677978515625, 4.783885955810547, -2.394855499267578, 4.147178649902344, 0.4043140411376953, 6.692413330078125, 2.7534332275390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000315.npy"}
{"epoch": 0.47619047619047616, "step": 316, "batch_size": 64, "mean": 2.473552942276001, "std": 2.9966440200805664, "min": -2.490863800048828, "p10": -1.0264680862426754, "median": 2.1167593002319336, "p90": 6.152440643310547, "max": 10.742691040039062, "pos_frac": 0.75, "sample": [0.0084686279296875, 0.9417343139648438, 3.518585205078125, -0.4194488525390625, -2.490863800048828, -0.5277633666992188, 1.4802398681640625, 4.31878662109375, 1.2709999084472656, 4.9896392822265625, 3.0850753784179688, 6.97906494140625, 2.2493038177490234, 4.891571044921875, 4.369140625, 0.2031707763671875, -2.47747802734375, 0.7961196899414062, -1.4825572967529297, 4.8773345947265625, 4.552898406982422, 6.050163269042969, 1.9842147827148438, 1.2600173950195312, 3.1619186401367188, 2.3523521423339844, 2.6911392211914062, -0.631683349609375, -1.1645622253417969, 4.437858581542969, 5.76909065246582, 8.187240600585938, 1.7684593200683594, 10.742691040039062, -0.5539836883544922, 1.2256240844726562, -1.7981643676757812, 3.7927169799804688, 3.83148193359375, 0.4052581787109375, 5.5869293212890625, 2.365478515625, 3.8794727325439453, 0.6292934417724609, 5.9206695556640625, -0.7042484283447266, -0.012224197387695312, 5.927886962890625, -0.4866485595703125, 9.806221008300781, -1.2823104858398438, -2.2945823669433594, 0.9061737060546875, -0.26733970642089844, -0.181060791015625, 1.4735107421875, 0.5563545227050781, 6.656959533691406, 2.93011474609375, 4.7278594970703125, 6.1962738037109375, 1.5044784545898438, 6.629158020019531, 3.193115234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000316.npy"}
{"epoch": 0.47770219198790626, "step": 317, "batch_size": 64, "mean": 2.8259806632995605, "std": 2.9171323776245117, "min": -2.0440292358398438, "p10": -0.33408050537109374, "median": 2.32757568359375, "p90": 7.256074333190919, "max": 10.709842681884766, "pos_frac": 0.8125, "sample": [3.9342880249023438, 4.054012298583984, 2.5463733673095703, 5.38165283203125, 1.637237548828125, 10.709842681884766, -0.8815364837646484, 1.1810760498046875, 0.6198215484619141, 4.163122177124023, -0.07635116577148438, 1.9907951354980469, -0.33954620361328125, -0.17146873474121094, 3.7174415588378906, 1.1919574737548828, 7.6410675048828125, 0.048095703125, 5.833831787109375, 6.692455291748047, -0.9928207397460938, 0.782073974609375, 3.156890869140625, -2.0440292358398438, 3.8889942169189453, 2.3527679443359375, 2.489551544189453, 4.8302459716796875, 4.45343017578125, -0.199066162109375, 8.837373733520508, 1.724365234375, 2.5652732849121094, 7.37321662902832, 7.591426849365234, -1.8965301513671875, 0.5281219482421875, -0.3155326843261719, 2.9062728881835938, 6.9827423095703125, 1.2937240600585938, 2.1529693603515625, 8.833251953125, -1.5485496520996094, 1.1142044067382812, 1.864187240600586, 2.9531784057617188, 4.4093780517578125, 2.3023834228515625, 0.9039115905761719, 5.632133483886719, 4.319629669189453, 1.7851982116699219, 4.5737762451171875, -0.32132720947265625, 0.8547286987304688, -0.4797210693359375, 4.377166748046875, 5.449853897094727, 8.990171432495117, 0.4257965087890625, 3.7431793212890625, 1.146392822265625, 1.198211669921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000317.npy"}
{"epoch": 0.47921390778533635, "step": 318, "batch_size": 64, "mean": 1.6832314729690552, "std": 3.6539485454559326, "min": -5.104560852050781, "p10": -3.3537147521972654, "median": 1.2402153015136719, "p90": 6.342525482177734, "max": 10.440765380859375, "pos_frac": 0.640625, "sample": [2.637279510498047, 3.9603500366210938, 0.1430511474609375, -2.382080078125, 6.376136779785156, 4.902584075927734, 2.8743438720703125, -3.9616851806640625, 2.217161178588867, 7.677555084228516, -0.008708953857421875, -2.5594558715820312, 2.5350914001464844, 4.252403259277344, 5.829265594482422, 1.3161964416503906, 1.1642341613769531, -1.9348773956298828, 2.8064193725585938, -0.7273349761962891, -0.39542198181152344, 2.4724369049072266, 7.5008697509765625, -3.6656723022460938, 0.15048980712890625, 3.3588600158691406, 6.26409912109375, -5.104560852050781, 2.3926544189453125, 3.3800125122070312, -1.6159133911132812, 3.1177330017089844, -2.4635772705078125, 0.5496482849121094, -1.0652008056640625, 10.215057373046875, 10.440765380859375, -0.9307403564453125, 5.800132751464844, -0.8425674438476562, -0.35589599609375, 0.6443634033203125, 5.135597229003906, -3.8075180053710938, -0.16126251220703125, 3.9914398193359375, 1.0801544189453125, 7.069160461425781, 0.7437629699707031, 3.261425018310547, 0.1992816925048828, 8.925582885742188, 1.9908485412597656, 4.43792724609375, -3.5132179260253906, 0.16702651977539062, 4.933525085449219, -2.9815406799316406, -0.4997711181640625, -0.114288330078125, 3.7959823608398438, -3.8529834747314453, -4.400577545166016, 4.360755920410156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000318.npy"}
{"epoch": 0.48072562358276644, "step": 319, "batch_size": 64, "mean": 2.3112986087799072, "std": 3.341275691986084, "min": -4.893890380859375, "p10": -1.2356658935546874, "median": 1.8628807067871094, "p90": 6.236574745178223, "max": 12.50177001953125, "pos_frac": 0.78125, "sample": [2.042409896850586, 7.841361999511719, 0.6356582641601562, 3.8583526611328125, 0.5072097778320312, 5.660736083984375, 2.404216766357422, 1.6692123413085938, 1.4833526611328125, 3.3193130493164062, 5.426826477050781, 4.196624755859375, -0.10541152954101562, 0.8569259643554688, 3.3571319580078125, 5.559112548828125, -1.017852783203125, 12.50177001953125, 1.5230350494384766, 4.897087097167969, 1.7100715637207031, -1.196380615234375, 3.31292724609375, 4.725444793701172, 9.301101684570312, -1.1440811157226562, 6.044942855834961, 2.7810916900634766, 6.318702697753906, 3.607421875, -1.3263397216796875, -1.25250244140625, 2.120819091796875, 9.611724853515625, -3.3960399627685547, 2.3272933959960938, -0.187286376953125, 5.060188293457031, 2.2688446044921875, -3.36260986328125, 3.452831268310547, 1.6301326751708984, 0.9674224853515625, 0.40725135803222656, 1.250396728515625, 0.6521415710449219, -4.893890380859375, -0.02149200439453125, -4.486141204833984, 6.734319686889648, 0.29767417907714844, 0.61444091796875, 1.9007797241210938, -0.0722503662109375, 1.7804794311523438, 5.8539276123046875, 4.331150054931641, 1.824981689453125, 3.6851844787597656, 2.811006546020508, 0.8453216552734375, -3.9390716552734375, 7.529693603515625, 0.8244094848632812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000319.npy"}
{"epoch": 0.48223733938019653, "step": 320, "batch_size": 64, "mean": 1.8063321113586426, "std": 3.517354965209961, "min": -5.733123779296875, "p10": -2.159650421142578, "median": 0.7452230453491211, "p90": 6.433214950561525, "max": 12.07958984375, "pos_frac": 0.734375, "sample": [3.5142364501953125, -2.248973846435547, 0.7736301422119141, 0.7168159484863281, 1.81268310546875, 0.5497531890869141, 6.575855255126953, -0.39441871643066406, 4.101325988769531, -0.2652912139892578, 0.224365234375, 4.663644790649414, 0.3157997131347656, 0.13791465759277344, -0.8130340576171875, 2.14404296875, 0.6854019165039062, -2.0420989990234375, -0.44791412353515625, 5.292270660400391, -3.4807205200195312, 1.0204505920410156, -5.733123779296875, 1.0704536437988281, -2.0087738037109375, 12.07958984375, 11.114387512207031, 5.101432800292969, 2.2551040649414062, -2.7534027099609375, 0.5488777160644531, -2.7497692108154297, 5.216438293457031, 1.832977294921875, 0.21261215209960938, 8.128662109375, 2.0232772827148438, 0.412994384765625, -2.967151641845703, 0.3562335968017578, 2.511730194091797, 7.492778778076172, 0.2801628112792969, 1.2396163940429688, 0.49371337890625, 0.6693706512451172, 11.224845886230469, 2.2353286743164062, 0.5122604370117188, -0.38799476623535156, -0.13762283325195312, -1.5202522277832031, 5.190330505371094, -1.2494735717773438, 2.083301544189453, 3.2890090942382812, 2.9771347045898438, 6.739372253417969, 6.1003875732421875, 0.12871551513671875, 3.114675521850586, -2.2100296020507812, 4.9323883056640625, 2.9189491271972656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000320.npy"}
{"epoch": 0.4837490551776266, "step": 321, "batch_size": 64, "mean": 2.0608036518096924, "std": 3.1576383113861084, "min": -4.947742462158203, "p10": -2.1407083511352534, "median": 2.0589981079101562, "p90": 6.244190216064454, "max": 9.27157211303711, "pos_frac": 0.765625, "sample": [2.0033340454101562, 2.7048873901367188, 0.5566349029541016, 2.8184661865234375, 2.71923828125, 0.37561798095703125, 7.504123687744141, 2.976419448852539, 7.521503448486328, 0.30388450622558594, 8.377849578857422, 1.2325782775878906, 2.325664520263672, 4.762722015380859, -2.401336669921875, -0.058063507080078125, -0.54583740234375, 2.077850341796875, 2.5011749267578125, 4.724052429199219, 4.155067443847656, 0.824951171875, -4.451446533203125, -0.6516876220703125, 1.7227973937988281, -2.987903594970703, -3.0505447387695312, -3.682168960571289, 3.659564971923828, 2.0401458740234375, 1.1577377319335938, 5.481903076171875, 5.92791748046875, 2.48858642578125, 0.3195762634277344, 4.1406402587890625, 3.490234375, -1.5325756072998047, 1.5732154846191406, 4.281303405761719, 1.1873607635498047, -0.20519256591796875, 0.5052070617675781, 6.795299530029297, 0.464385986328125, -4.802024841308594, 4.956962585449219, 6.002555847167969, 6.393650054931641, 3.983722686767578, 1.3480911254882812, 2.313051223754883, -0.4053192138671875, 4.184049606323242, 6.347747802734375, 4.51385498046875, 2.3200111389160156, 2.0306243896484375, 9.27157211303711, -4.947742462158203, 3.025787353515625, -0.8438873291015625, 1.165985107421875, -1.1024093627929688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000321.npy"}
{"epoch": 0.4852607709750567, "step": 322, "batch_size": 64, "mean": 2.695878267288208, "std": 2.6970016956329346, "min": -4.794712066650391, "p10": -0.17563438415527335, "median": 2.3202695846557617, "p90": 6.07524528503418, "max": 10.55082893371582, "pos_frac": 0.875, "sample": [-0.6365585327148438, 4.098945617675781, 6.781211853027344, 0.6782684326171875, 5.246318817138672, 2.1876697540283203, 3.3466415405273438, 3.5066089630126953, 4.268850326538086, 1.9295482635498047, -0.27471923828125, 3.484466552734375, 6.205192565917969, 7.797540664672852, 2.42376708984375, 2.766185760498047, 2.961517333984375, 1.6233673095703125, 0.8592147827148438, -0.214996337890625, 2.2790603637695312, 1.1697921752929688, -1.3572044372558594, 6.185449600219727, 6.111080169677734, 5.321390151977539, 1.6559429168701172, 3.741975784301758, 0.30828094482421875, 3.6810760498046875, 1.2590560913085938, 1.0499114990234375, 3.6628265380859375, 5.547660827636719, 5.018115997314453, 1.9165668487548828, 0.9574699401855469, 7.7365875244140625, 1.1814804077148438, -0.08378982543945312, 4.8581085205078125, 4.243768692016602, 0.0720367431640625, 5.439380645751953, 2.441427230834961, 5.761287689208984, 5.991630554199219, 2.361478805541992, -2.3853378295898438, -4.794712066650391, 1.4075241088867188, 0.9641265869140625, 2.1487388610839844, 2.8002986907958984, 5.383079528808594, 1.05615234375, 0.9409408569335938, 1.1641845703125, 1.78204345703125, -2.4758453369140625, 10.55082893371582, 0.029582977294921875, 4.20250129699707, 2.211214065551758], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000322.npy"}
{"epoch": 0.48677248677248675, "step": 323, "batch_size": 64, "mean": 2.078073263168335, "std": 4.070512294769287, "min": -6.617919921875, "p10": -1.7676048278808592, "median": 1.426370620727539, "p90": 7.558021545410157, "max": 13.501632690429688, "pos_frac": 0.65625, "sample": [2.1451797485351562, -0.8734283447265625, 2.3902587890625, -0.07099151611328125, 0.22374725341796875, 2.5337448120117188, 4.310882568359375, 2.5676040649414062, -0.02172088623046875, 9.096107482910156, 4.89276123046875, -1.4464855194091797, 6.225486755371094, 2.0085525512695312, 8.093162536621094, -2.153614044189453, -4.462249755859375, 5.16668701171875, 5.0806884765625, 1.801666259765625, -2.5851287841796875, 4.333091735839844, 0.6022796630859375, 1.1032943725585938, 12.459709167480469, -0.2117443084716797, -4.088897705078125, 2.7076187133789062, 0.36232757568359375, 2.316986083984375, 13.501632690429688, -1.4598236083984375, -1.2403793334960938, -1.8634490966796875, 5.135353088378906, 0.681854248046875, 0.29931640625, 3.49609375, 1.5803146362304688, 0.5034618377685547, 4.329189300537109, 0.8704490661621094, -0.40711212158203125, -0.35306549072265625, 7.3368682861328125, 13.40756607055664, 9.499290466308594, -5.30340576171875, 7.652801513671875, -0.18218994140625, -6.617919921875, -0.00966644287109375, 1.9817962646484375, 1.2724266052246094, 3.831634521484375, 4.963525772094727, 3.9530029296875, 3.1126174926757812, -1.541351318359375, -1.5439682006835938, 1.7970237731933594, -0.041385650634765625, 0.6871414184570312, -0.8405303955078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000323.npy"}
{"epoch": 0.48828420256991684, "step": 324, "batch_size": 64, "mean": 2.3483948707580566, "std": 2.632127046585083, "min": -3.7439422607421875, "p10": -0.558437156677246, "median": 2.011587142944336, "p90": 5.583661460876465, "max": 9.246047973632812, "pos_frac": 0.859375, "sample": [4.277767181396484, 1.8797683715820312, 2.9013099670410156, 0.5062255859375, -0.8905677795410156, 7.230926513671875, 3.046977996826172, 0.3641319274902344, -1.45172119140625, 0.7288475036621094, 5.648979187011719, 5.431253433227539, 1.3813858032226562, 3.5230674743652344, 1.4060821533203125, 0.09252166748046875, 5.130241394042969, -3.7439422607421875, 2.5145034790039062, 0.4893341064453125, 2.8322410583496094, 1.5230712890625, 0.13270187377929688, 3.7423553466796875, 4.6772918701171875, 1.6473464965820312, -0.5831375122070312, 0.6684341430664062, 9.227813720703125, 1.6198501586914062, 0.8558845520019531, -1.1337661743164062, 4.7019805908203125, 3.6182098388671875, 3.3153343200683594, 0.8399658203125, 1.9839286804199219, -1.1617717742919922, 3.5211753845214844, 2.5856971740722656, 4.4740447998046875, -1.9120254516601562, 8.2239990234375, 0.9215316772460938, 7.0117645263671875, 0.32874488830566406, 0.6668167114257812, 2.03924560546875, 3.137073516845703, 6.8850250244140625, 9.246047973632812, 1.0743064880371094, 1.2334327697753906, 2.226299285888672, 4.242195129394531, 2.5413970947265625, 0.12966156005859375, -0.0982208251953125, 2.1411666870117188, 0.7802047729492188, 4.312400817871094, 3.978057861328125, 2.133209228515625, -0.5008029937744141], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000324.npy"}
{"epoch": 0.4897959183673469, "step": 325, "batch_size": 64, "mean": 2.1842403411865234, "std": 3.0084950923919678, "min": -6.6348419189453125, "p10": -1.373272132873535, "median": 1.9513282775878906, "p90": 6.418132972717287, "max": 10.706462860107422, "pos_frac": 0.796875, "sample": [1.8829212188720703, 6.747745513916016, 1.6274871826171875, 3.5490970611572266, 1.0588836669921875, 2.7069549560546875, 4.255901336669922, 2.9081497192382812, 1.4036178588867188, 2.3343276977539062, 3.0208206176757812, 3.297029495239258, 0.5742321014404297, -2.3429794311523438, 2.7274932861328125, -1.0726871490478516, 5.981910705566406, 0.9755630493164062, 1.9279899597167969, -1.450155258178711, 10.552574157714844, -1.7397689819335938, 3.0072154998779297, 0.4667987823486328, 0.46142005920410156, 7.754364013671875, -0.11555099487304688, 4.201702117919922, -1.043539047241211, 1.9627227783203125, 1.2902679443359375, 1.7899742126464844, 5.074256896972656, 2.497344970703125, -2.070098876953125, 6.9938812255859375, -1.6646461486816406, 3.484161376953125, 2.087512969970703, 4.12750244140625, 1.94268798828125, 0.28279685974121094, 0.2324981689453125, 6.824951171875, -6.6348419189453125, 1.5499954223632812, 2.4703292846679688, 1.6193161010742188, 6.605085372924805, 2.6028308868408203, 10.706462860107422, -1.5478897094726562, 2.8220348358154297, -0.9407730102539062, 0.8091354370117188, 0.049587249755859375, 3.4248199462890625, 3.3692665100097656, 4.7298736572265625, 1.6201667785644531, 5.644828796386719, -0.3902587890625, 1.9599685668945312, -1.193878173828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000325.npy"}
{"epoch": 0.491307634164777, "step": 326, "batch_size": 64, "mean": 1.2490378618240356, "std": 3.0351760387420654, "min": -4.7061004638671875, "p10": -2.5385883331298826, "median": 1.2562580108642578, "p90": 4.9348091125488285, "max": 8.135459899902344, "pos_frac": 0.640625, "sample": [-1.4864883422851562, 0.10952949523925781, 2.047405242919922, 0.7665061950683594, 4.597930908203125, -4.7061004638671875, 6.788177490234375, 2.12841796875, 2.429840087890625, 6.5528411865234375, 1.1269607543945312, 3.3682518005371094, 0.9953842163085938, 0.60491943359375, 0.8304729461669922, 2.6226959228515625, 0.5490474700927734, 3.0488433837890625, -1.4613151550292969, 4.9737396240234375, 5.151309967041016, 1.4639701843261719, 2.4785690307617188, 4.843971252441406, -2.0530166625976562, -1.4902191162109375, -4.599151611328125, -1.619110107421875, -0.005767822265625, -0.5773239135742188, 4.611860275268555, -3.5523300170898438, 1.5417671203613281, -1.6917381286621094, 4.5726470947265625, 0.7015304565429688, 1.7217788696289062, -0.04412078857421875, -1.86895751953125, -2.80242919921875, 5.527070999145508, 7.109169006347656, -2.329662322998047, 0.19038772583007812, 4.427463531494141, 1.3855552673339844, 4.639072418212891, -2.788339614868164, 2.861124038696289, 8.135459899902344, -0.7229194641113281, -1.3242950439453125, -2.6281280517578125, 2.3204345703125, -1.590423583984375, 3.3129119873046875, -4.537755966186523, 2.0631179809570312, 3.8237648010253906, 1.979156494140625, 3.0909423828125, -2.179767608642578, -0.02414703369140625, 4.527927398681641], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000326.npy"}
{"epoch": 0.4928193499622071, "step": 327, "batch_size": 64, "mean": 2.0652735233306885, "std": 3.4300570487976074, "min": -7.01043701171875, "p10": -1.3833332061767578, "median": 2.0009193420410156, "p90": 6.95338287353516, "max": 10.796501159667969, "pos_frac": 0.71875, "sample": [4.328590393066406, 0.5269699096679688, 0.1849822998046875, 7.571929931640625, -0.8044891357421875, 3.55780029296875, -4.41230583190918, 2.7821044921875, -2.5134105682373047, 4.918460845947266, 7.7940826416015625, 0.07715988159179688, -1.3281478881835938, 4.307003021240234, 4.063148498535156, 5.797250747680664, 5.05352783203125, 0.14605712890625, -1.389862060546875, -1.3680992126464844, 1.1109981536865234, 1.8286705017089844, 3.498434066772461, 6.101165771484375, 7.3186187744140625, -1.342437744140625, 2.0210952758789062, 2.3612747192382812, 4.147762298583984, -1.0736923217773438, 1.833587646484375, 1.8384475708007812, -2.6709747314453125, 5.884819030761719, 2.9900360107421875, -0.45479583740234375, 4.892364501953125, 1.9134750366210938, -0.7104225158691406, -0.7269859313964844, 8.173934936523438, 0.9898910522460938, -7.01043701171875, 2.1168365478515625, 4.643085479736328, 10.796501159667969, 0.0067596435546875, 4.2828521728515625, 8.77899169921875, 2.4568710327148438, 7.338783264160156, -0.6768951416015625, 2.2399215698242188, 2.4992446899414062, -2.80194091796875, -0.5793991088867188, 5.49658203125, 2.9642868041992188, 2.1829757690429688, 0.12594985961914062, 0.41120147705078125, -3.254060745239258, 1.980743408203125, -1.03936767578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000327.npy"}
{"epoch": 0.4943310657596372, "step": 328, "batch_size": 64, "mean": 1.9774516820907593, "std": 2.910179376602173, "min": -3.914764404296875, "p10": -1.648173141479492, "median": 1.8294029235839844, "p90": 5.413181304931642, "max": 8.5623779296875, "pos_frac": 0.75, "sample": [8.406917572021484, 1.1626167297363281, 2.4068984985351562, 1.3298187255859375, 4.096977233886719, 0.5458412170410156, -0.8484249114990234, 4.525135040283203, 2.209056854248047, 2.8788681030273438, -1.259521484375, 3.96844482421875, 8.228187561035156, 2.675365447998047, 1.4418487548828125, -1.5396270751953125, 4.930624008178711, 3.106740951538086, 3.7702178955078125, -2.5501022338867188, 1.8505859375, -1.9365692138671875, 0.181671142578125, 2.658153533935547, -1.6760520935058594, -0.2265167236328125, 8.138092041015625, 1.1469497680664062, -3.914764404296875, 3.32720947265625, -2.617950439453125, 3.7316417694091797, 5.559608459472656, 2.679046630859375, 4.700187683105469, 0.1611480712890625, 0.02691650390625, -0.4173736572265625, 8.5623779296875, 0.40573883056640625, 3.6384811401367188, 2.3625946044921875, -0.9439659118652344, -1.8411235809326172, 3.686767578125, 2.8890914916992188, 0.5325107574462891, -0.6682891845703125, 0.7645034790039062, 2.2536888122558594, 0.9668617248535156, -1.5831222534179688, 3.1968307495117188, 5.0715179443359375, -1.968780517578125, 0.7537307739257812, 2.6243438720703125, 0.6980209350585938, 7.646646499633789, 1.8082199096679688, 0.46780967712402344, -0.032726287841796875, 7.353996276855469, 5.053318023681641], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000328.npy"}
{"epoch": 0.4958427815570673, "step": 329, "batch_size": 64, "mean": 2.1446990966796875, "std": 3.5886335372924805, "min": -4.8419189453125, "p10": -2.309412956237793, "median": 1.761514663696289, "p90": 6.4533294677734405, "max": 12.628257751464844, "pos_frac": 0.75, "sample": [2.393991470336914, 0.9830284118652344, 2.718109130859375, 4.0422515869140625, -2.4160385131835938, 1.5910797119140625, 6.757110595703125, 0.7693634033203125, 2.885498046875, 1.1656951904296875, 5.687164306640625, 2.715057373046875, -2.3764209747314453, 4.199501037597656, -1.315185546875, -1.02191162109375, 1.5904541015625, 3.417510986328125, -3.250823974609375, 3.7265625, 0.8103713989257812, 0.5274600982666016, 9.686162948608398, 3.8257904052734375, -3.842864990234375, 7.606712341308594, 12.237098693847656, 3.6799697875976562, -2.1530609130859375, -0.18614578247070312, 4.1485443115234375, 2.1954803466796875, -3.02337646484375, 2.6231689453125, 3.6257190704345703, -0.9885635375976562, 9.439788818359375, -0.5432491302490234, 5.00010871887207, 4.218559265136719, 5.7445068359375, 4.6849822998046875, 0.4155464172363281, -2.1082992553710938, 1.8439903259277344, 1.6790390014648438, 0.08758354187011719, 1.6782341003417969, 0.876434326171875, 3.1349353790283203, 0.3720207214355469, 3.6976318359375, 2.0218582153320312, -0.6939888000488281, 12.628257751464844, 5.5272064208984375, -1.2364635467529297, 7.654979705810547, 0.7076873779296875, -4.8419189453125, 2.4946746826171875, 0.2317962646484375, 0.17117881774902344, -2.660797119140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000329.npy"}
{"epoch": 0.4973544973544973, "step": 330, "batch_size": 64, "mean": 2.421109914779663, "std": 3.514007091522217, "min": -7.765216827392578, "p10": -1.6193281173706051, "median": 1.8942337036132812, "p90": 7.022560119628906, "max": 10.628189086914062, "pos_frac": 0.796875, "sample": [-0.5608596801757812, -0.8817825317382812, -1.3032073974609375, -3.7657699584960938, -0.2002410888671875, -7.765216827392578, 6.9500885009765625, 1.4801406860351562, -2.3856582641601562, 7.778144836425781, 6.607147216796875, 1.8574562072753906, -4.107465744018555, 1.64080810546875, 4.908657073974609, 7.053619384765625, 7.2265777587890625, -0.64532470703125, 10.628189086914062, 5.301616668701172, 5.555652618408203, -0.8860187530517578, 2.6871490478515625, 0.5281810760498047, 5.305290222167969, 4.4215545654296875, 1.88116455078125, 0.6769790649414062, 0.07429122924804688, 0.657012939453125, 1.6644229888916016, 4.391838073730469, 1.69378662109375, 2.357929229736328, -3.492830276489258, 5.955635070800781, 4.8115997314453125, 4.185455322265625, 5.4664306640625, 2.325733184814453, 0.5803298950195312, 3.1139373779296875, 0.6819000244140625, 3.568744659423828, 6.824638366699219, 6.213535308837891, 1.4794960021972656, 2.158641815185547, 2.1412353515625, 0.0965728759765625, 1.0246047973632812, 1.9073028564453125, 7.9995880126953125, 4.011716842651367, 1.70123291015625, 2.428619384765625, 0.4804229736328125, -1.7548084259033203, 7.4731292724609375, -3.604888916015625, 4.495050430297852, 1.5680313110351562, 1.2238845825195312, 9.059947967529297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000330.npy"}
{"epoch": 0.4988662131519274, "step": 331, "batch_size": 64, "mean": 2.901820182800293, "std": 3.674994945526123, "min": -6.848388671875, "p10": -1.3526960372924801, "median": 2.757282257080078, "p90": 7.147788429260255, "max": 14.733055114746094, "pos_frac": 0.796875, "sample": [6.168886184692383, 2.4347763061523438, 2.528362274169922, 3.053638458251953, 3.100147247314453, 4.4818878173828125, 3.589874267578125, 6.314544677734375, 3.0800018310546875, 1.2947959899902344, -0.8298377990722656, 7.3050994873046875, 6.250820159912109, 8.720413208007812, 1.696014404296875, -0.5074386596679688, 14.733055114746094, 3.6328506469726562, 2.9018325805664062, 10.796175003051758, 4.265613555908203, 0.8600234985351562, 0.844482421875, 1.6214447021484375, 0.26752281188964844, 5.682952880859375, 5.101764678955078, 10.176055908203125, -0.6776885986328125, 5.1153411865234375, 5.0727081298828125, 1.8518638610839844, 5.529438018798828, 6.1366424560546875, 1.2545394897460938, 1.2092208862304688, 5.448694229125977, -1.8271713256835938, -1.021768569946289, 4.30926513671875, -0.6825332641601562, 6.937248229980469, 2.274473190307617, -1.4945220947265625, 2.279024124145508, -6.848388671875, 1.5031967163085938, -1.4977798461914062, -4.17315673828125, 0.22700881958007812, -1.5560684204101562, 1.5107574462890625, 4.865264892578125, 0.09327316284179688, 3.7497215270996094, 0.5943260192871094, -0.6480026245117188, 5.088579177856445, 3.2450790405273438, -3.2241172790527344, 7.238019943237305, 3.4564876556396484, 2.61273193359375, 8.199010848999023], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000331.npy"}
{"epoch": 0.5003779289493575, "step": 332, "batch_size": 64, "mean": 2.4156653881073, "std": 3.216212749481201, "min": -4.107601165771484, "p10": -0.8138423919677733, "median": 1.763169288635254, "p90": 6.628987121582032, "max": 12.480100631713867, "pos_frac": 0.828125, "sample": [-0.8543281555175781, 1.850006103515625, 0.9145126342773438, 0.00766754150390625, 1.8481159210205078, 2.0761871337890625, 1.9303817749023438, 1.6662826538085938, 1.2046051025390625, 1.5785942077636719, 3.193695068359375, 9.656684875488281, 6.2046356201171875, 4.66015625, 6.550731658935547, 2.0891494750976562, 1.6189289093017578, 0.260498046875, 2.954120635986328, -0.3067169189453125, 6.662525177001953, 1.2859954833984375, -3.873554229736328, 9.00039291381836, 0.5860137939453125, 3.0096435546875, -1.0441303253173828, 0.16881561279296875, 1.4653167724609375, 4.100028991699219, -0.09438323974609375, 5.534351348876953, 0.695404052734375, -1.6337509155273438, 3.8372344970703125, 2.7347888946533203, 0.5160255432128906, 8.3974609375, -3.203807830810547, -0.023153305053710938, 1.9858245849609375, 5.55963134765625, 3.047252655029297, 2.7396602630615234, 1.1121368408203125, 0.6888542175292969, 3.6346359252929688, 4.408901214599609, 2.32293701171875, 2.5286426544189453, 8.171279907226562, 1.3942947387695312, 2.1299171447753906, 0.21042633056640625, 1.67822265625, -0.7193756103515625, 12.480100631713867, 0.6942119598388672, 1.4453887939453125, 6.3585205078125, 9.416580200195312, 1.22900390625, -4.107601165771484, -1.0319900512695312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000332.npy"}
{"epoch": 0.5018896447467877, "step": 333, "batch_size": 64, "mean": 3.1737163066864014, "std": 3.4090168476104736, "min": -2.3811569213867188, "p10": -0.7604101181030273, "median": 2.436532974243164, "p90": 7.693315124511721, "max": 14.456733703613281, "pos_frac": 0.796875, "sample": [1.6929931640625, -0.7828617095947266, 7.953849792480469, 5.705284118652344, 3.2890090942382812, 0.12889480590820312, 5.790264129638672, 2.243408203125, 1.5956916809082031, -0.01422119140625, 7.2151947021484375, 6.518716812133789, 3.7460174560546875, 3.0518531799316406, 6.421697616577148, 3.627166748046875, 3.5337600708007812, 10.237258911132812, 6.8336029052734375, 7.898223876953125, 0.7170867919921875, 2.5927467346191406, 1.6637115478515625, -0.6836395263671875, 1.528421401977539, -1.2410049438476562, -0.9102745056152344, 1.6816787719726562, 1.4207763671875, 2.2709426879882812, 4.395549774169922, 9.143014907836914, -0.255767822265625, 1.7100448608398438, 4.606698989868164, 4.545066833496094, 14.456733703613281, -1.1453495025634766, 0.12367439270019531, 1.119964599609375, -0.0445556640625, 3.9840240478515625, -2.3811569213867188, 1.1906852722167969, 1.0079727172851562, -0.7080230712890625, 6.780670166015625, 1.3308868408203125, -0.2683219909667969, 3.2499847412109375, 2.033660888671875, -1.3921051025390625, 3.5770111083984375, 8.529766082763672, 0.1597423553466797, -2.2158203125, 2.7891006469726562, 2.2803192138671875, 8.720647811889648, 6.717912673950195, 6.461700439453125, 5.945976257324219, 6.777565002441406, 4.1643218994140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000333.npy"}
{"epoch": 0.5034013605442177, "step": 334, "batch_size": 64, "mean": 2.062617778778076, "std": 3.5949995517730713, "min": -6.577781677246094, "p10": -2.602540588378906, "median": 1.6969280242919922, "p90": 7.697481536865235, "max": 9.101287841796875, "pos_frac": 0.734375, "sample": [-1.7714691162109375, -6.144462585449219, 2.7818145751953125, 1.7089958190917969, 2.67266845703125, 1.5257453918457031, -3.3483734130859375, 7.726799011230469, 0.5871963500976562, -6.577781677246094, -0.32872772216796875, 9.101287841796875, -2.6973800659179688, 1.1774826049804688, 1.2015457153320312, 2.9919471740722656, 0.54058837890625, -0.465484619140625, 1.161865234375, 7.901618957519531, 5.882514953613281, 7.9752044677734375, 1.4692649841308594, 2.792644500732422, 5.361122131347656, -4.137119293212891, 1.6848602294921875, 0.41849517822265625, 7.806575775146484, 1.7635269165039062, 0.9306507110595703, -1.8089179992675781, 6.637382507324219, -1.2872314453125, 5.154699325561523, 8.202228546142578, 3.412811279296875, 3.33270263671875, 3.2849502563476562, 1.6231307983398438, 2.6322555541992188, -2.3812484741210938, -0.10714149475097656, 2.9198455810546875, 7.96612548828125, -0.1721038818359375, -0.21062088012695312, 0.5950431823730469, -3.930267333984375, 4.928031921386719, 3.9426422119140625, 0.8835906982421875, 3.273406982421875, 1.21319580078125, -2.202098846435547, 3.0464401245117188, 5.157173156738281, 7.6290740966796875, -3.0468368530273438, 0.6811790466308594, 4.3013458251953125, 3.11328125, 5.788337707519531, 5.741508483886719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000334.npy"}
{"epoch": 0.5049130763416477, "step": 335, "batch_size": 64, "mean": 2.4321506023406982, "std": 3.5632457733154297, "min": -5.602420806884766, "p10": -1.73721923828125, "median": 1.8477096557617188, "p90": 7.117699813842775, "max": 10.02218246459961, "pos_frac": 0.703125, "sample": [3.677520751953125, 6.3221282958984375, -2.5945301055908203, 7.526458740234375, 1.6359176635742188, 5.359222412109375, 5.785362243652344, -0.23123931884765625, 2.791656494140625, 1.8415069580078125, 7.281955718994141, 5.941886901855469, 1.853912353515625, 3.21685791015625, 8.994277954101562, 6.5121612548828125, 1.8153457641601562, 0.23978042602539062, 0.3946647644042969, 9.451974868774414, -3.3138656616210938, 2.8464813232421875, 2.614614486694336, 8.2392578125, -0.258941650390625, 4.9974365234375, 5.802112579345703, -4.181179046630859, -0.8438224792480469, 4.825828552246094, 5.082695007324219, -1.3625259399414062, 1.0301933288574219, 1.6989707946777344, 4.724636077880859, -1.8044624328613281, 6.135343551635742, -0.6570625305175781, 0.8199539184570312, 10.02218246459961, 1.4657135009765625, 2.4792537689208984, -0.36952972412109375, 3.9180374145507812, 2.2717666625976562, 8.286258697509766, -0.49456024169921875, -0.6301422119140625, -1.5803184509277344, -0.16168212890625, 1.9677734375, -1.3521728515625, 0.933380126953125, -2.9371280670166016, -5.602420806884766, -1.9300804138183594, 6.1183624267578125, 4.924060821533203, 1.5314617156982422, 5.346408843994141, -0.3428955078125, 0.43508148193359375, 0.4119110107421875, 6.73443603515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000335.npy"}
{"epoch": 0.5064247921390779, "step": 336, "batch_size": 64, "mean": 2.2667746543884277, "std": 3.8019673824310303, "min": -4.0816650390625, "p10": -1.7415573120117187, "median": 1.484018325805664, "p90": 7.528991699218751, "max": 13.780899047851562, "pos_frac": 0.671875, "sample": [-0.3714141845703125, 5.725067138671875, 5.806915283203125, -0.870574951171875, 11.429168701171875, 4.387275695800781, 3.6309471130371094, -1.4257049560546875, 11.073070526123047, 0.2770061492919922, -0.9516963958740234, -0.6497611999511719, 9.730499267578125, -1.7346343994140625, 4.23443603515625, 0.8399658203125, -0.2506256103515625, 1.7315406799316406, 8.8282470703125, -1.9293022155761719, -0.52239990234375, 2.4520797729492188, 7.084434509277344, 1.94195556640625, -1.8353042602539062, 5.845539093017578, 0.9176673889160156, -3.5695037841796875, -0.828643798828125, -4.0816650390625, 8.446754455566406, -2.7407760620117188, 0.8057823181152344, 4.7973175048828125, -0.18727493286132812, -0.514434814453125, 1.5527992248535156, -1.7711639404296875, 6.131633758544922, 2.7378807067871094, 0.13323211669921875, -0.35790252685546875, 2.1014785766601562, 0.5574874877929688, 4.369304656982422, 1.9190826416015625, 2.2646636962890625, -1.7388763427734375, 1.8341121673583984, 7.589622497558594, 0.8456649780273438, 1.124114990234375, 3.608043670654297, 7.387519836425781, 13.780899047851562, 3.6404647827148438, 0.03497314453125, 2.1535720825195312, 4.6634521484375, -1.742706298828125, 1.4152374267578125, 3.437419891357422, 0.801422119140625, -0.9217987060546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000336.npy"}
{"epoch": 0.5079365079365079, "step": 337, "batch_size": 64, "mean": 1.9988073110580444, "std": 3.2809324264526367, "min": -4.541316986083984, "p10": -2.331806945800781, "median": 2.0047378540039062, "p90": 5.894456481933595, "max": 12.117324829101562, "pos_frac": 0.71875, "sample": [1.4773216247558594, -0.2328338623046875, -2.9002113342285156, -2.3885574340820312, 3.0977935791015625, 4.068733215332031, -0.11445808410644531, 6.340627670288086, -1.5118408203125, -0.0659637451171875, 8.722808837890625, 2.1396102905273438, 2.2675399780273438, -4.525629043579102, 1.802703857421875, 2.0391845703125, 3.888988494873047, 3.977630615234375, 1.3455924987792969, -1.3255386352539062, 1.8808822631835938, -0.052051544189453125, -2.9868812561035156, -4.541316986083984, 4.963018417358398, 3.591489791870117, -2.342193603515625, 2.7760848999023438, 1.4678802490234375, 9.47406005859375, 2.0772552490234375, -1.7384719848632812, 6.284185409545898, -0.47423553466796875, 4.5823822021484375, 4.23480224609375, 4.677005767822266, 1.247802734375, 0.9829673767089844, 3.758697509765625, 5.5496368408203125, 2.0882186889648438, 3.9580001831054688, 2.5244293212890625, 0.0392608642578125, -1.7530288696289062, 2.3380393981933594, 0.5509490966796875, 3.5746917724609375, 0.7844696044921875, 5.1046142578125, 12.117324829101562, -0.03350830078125, 1.7574005126953125, 1.099212646484375, 7.0832061767578125, 0.29009437561035156, -2.3075714111328125, 4.570913314819336, 4.089200973510742, -3.8924713134765625, 1.9702911376953125, 6.042236328125, 2.4111900329589844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000337.npy"}
{"epoch": 0.509448223733938, "step": 338, "batch_size": 64, "mean": 2.142843723297119, "std": 3.173322916030884, "min": -4.493995666503906, "p10": -1.6895807266235348, "median": 2.0988540649414062, "p90": 5.85153160095215, "max": 9.235021591186523, "pos_frac": 0.71875, "sample": [-2.149627685546875, 0.8695869445800781, 4.862091064453125, 4.377191543579102, 4.516315460205078, 3.8255043029785156, -4.207494735717773, 3.6043853759765625, 0.40142822265625, 8.8807373046875, 9.028987884521484, 1.13885498046875, -1.1133308410644531, 2.863983154296875, 1.5194282531738281, -1.8758544921875, 0.699493408203125, -3.4487247467041016, -0.2779693603515625, 0.4687347412109375, 3.2988967895507812, 4.180328369140625, 2.530862808227539, -0.16107177734375, 3.3643417358398438, 2.4432411193847656, -4.493995666503906, 5.471611022949219, 4.3053741455078125, 5.345855712890625, 2.4148635864257812, 6.5411376953125, -0.365692138671875, 2.531665802001953, 0.68548583984375, 2.361083984375, 3.489961624145508, 2.8224868774414062, 5.036262512207031, -0.556915283203125, -0.4149017333984375, 3.9662094116210938, -2.1288604736328125, 3.241558074951172, 1.033111572265625, 2.788755416870117, -1.1574249267578125, -1.2549419403076172, 1.4514694213867188, -0.17685699462890625, 0.8604507446289062, 8.560384750366211, 5.204853057861328, 0.5175514221191406, 1.6616325378417969, 6.014354705810547, 1.8366241455078125, 9.141525268554688, 3.5088424682617188, -2.352832794189453, -0.5938930511474609, -0.5757980346679688, 9.235021591186523, 1.5456485748291016], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000338.npy"}
{"epoch": 0.5109599395313681, "step": 339, "batch_size": 64, "mean": 2.4100594520568848, "std": 3.270848274230957, "min": -3.2616119384765625, "p10": -1.6771053314208983, "median": 1.8669567108154297, "p90": 6.5779861450195325, "max": 11.539039611816406, "pos_frac": 0.78125, "sample": [3.2416954040527344, 2.7014236450195312, -0.01134490966796875, 0.5237083435058594, -1.7097129821777344, 5.758821487426758, 2.4114913940429688, 2.3239288330078125, 0.9167690277099609, -1.6010208129882812, 5.072120666503906, -0.2402191162109375, 1.1990089416503906, 0.7030925750732422, 0.8245086669921875, 0.072723388671875, -2.8571701049804688, 3.2490921020507812, 7.5305938720703125, 0.15401458740234375, 5.207489013671875, 1.405853271484375, 4.326877593994141, 3.1741714477539062, -0.15500450134277344, 5.697660446166992, 1.5864791870117188, 1.8190536499023438, 0.8517913818359375, 2.6776180267333984, 6.352165222167969, 6.894462585449219, 10.628265380859375, 10.839376449584961, 3.9386749267578125, 1.7723579406738281, 3.528839111328125, -2.1597976684570312, 4.96343994140625, 3.8893280029296875, 7.1610260009765625, 0.5908660888671875, -2.6040782928466797, -1.7349166870117188, 2.98602294921875, -1.4738616943359375, -0.6209869384765625, 3.064239501953125, -2.6270217895507812, -3.2616119384765625, 0.6945018768310547, 0.7709331512451172, 4.063686370849609, 5.117681503295898, -1.3575668334960938, 11.539039611816406, 0.1782684326171875, 1.9148597717285156, 1.769287109375, 4.89361572265625, 3.4923858642578125, 4.258693695068359, 1.2513561248779297, 6.674766540527344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000339.npy"}
{"epoch": 0.5124716553287982, "step": 340, "batch_size": 64, "mean": 2.6568050384521484, "std": 3.9729902744293213, "min": -6.797843933105469, "p10": -2.1694284439086915, "median": 1.8730354309082031, "p90": 8.34341621398926, "max": 10.083099365234375, "pos_frac": 0.734375, "sample": [3.9263916015625, 7.091552734375, 4.408819198608398, 2.2086410522460938, -1.1156768798828125, 9.516363143920898, 0.2536792755126953, 0.0070362091064453125, -2.196226119995117, 3.43548583984375, 1.8044967651367188, 4.288700103759766, 4.102294921875, 3.730527877807617, 10.000129699707031, 5.926578521728516, -0.2744770050048828, -4.861991882324219, 7.83331298828125, 6.6211090087890625, -1.0639209747314453, -0.43219947814941406, -3.473602294921875, 3.4835357666015625, 3.7088775634765625, 10.083099365234375, 1.5064697265625, 5.19732666015625, 4.665794372558594, 7.994707107543945, -2.7763900756835938, 3.6237335205078125, 1.157958984375, 2.487335205078125, 5.248504638671875, 1.835968017578125, 8.44620132446289, -0.15391159057617188, 0.150177001953125, 8.013744354248047, 10.003602981567383, -1.2295093536376953, 0.01407623291015625, 8.103584289550781, 5.809375762939453, -2.1621532440185547, -1.50384521484375, 1.5275611877441406, 1.7491188049316406, 5.9596405029296875, 2.1361541748046875, 1.9101028442382812, 0.945648193359375, -0.2705879211425781, -6.797843933105469, -2.17254638671875, 9.255043029785156, -0.08924102783203125, 1.776458740234375, 8.947181701660156, 1.47003173828125, 0.33568572998046875, -3.6844921112060547, 1.5923309326171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000340.npy"}
{"epoch": 0.5139833711262283, "step": 341, "batch_size": 64, "mean": 2.6133944988250732, "std": 3.8772542476654053, "min": -6.008056640625, "p10": -1.8448570251464842, "median": 2.1980743408203125, "p90": 7.680303955078125, "max": 10.329498291015625, "pos_frac": 0.71875, "sample": [-3.2639427185058594, 4.919456481933594, 4.378303527832031, 5.766397476196289, 7.310098648071289, 5.585031509399414, 6.287532806396484, 10.329498291015625, 5.011070251464844, -2.7936859130859375, 8.185523986816406, 1.840250015258789, -1.747283935546875, -0.7208805084228516, -0.2666740417480469, 0.9376983642578125, 5.114402770996094, 2.0021743774414062, 9.032081604003906, 9.736442565917969, 4.9586334228515625, 7.353202819824219, 6.391834259033203, 2.2438716888427734, -6.008056640625, 8.786561965942383, 4.6297454833984375, -1.2146568298339844, -0.5706329345703125, -1.7544784545898438, 2.2370147705078125, 2.3852767944335938, -1.8835906982421875, -0.97454833984375, 3.7724609375, 7.725860595703125, 1.7708473205566406, 1.4665584564208984, 0.398040771484375, -4.227210998535156, -2.9780044555664062, -0.3891754150390625, 1.5579757690429688, 4.8184967041015625, 0.8403835296630859, 8.806144714355469, 5.5513458251953125, 5.82940673828125, 2.5576324462890625, 7.1871337890625, 2.9022369384765625, 2.8889312744140625, 6.69255256652832, -1.62432861328125, 0.38767242431640625, 7.574005126953125, 2.1591339111328125, -0.27634429931640625, 0.6899127960205078, -1.4848861694335938, 0.9446563720703125, 0.719757080078125, 1.4246406555175781, -4.6522674560546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000341.npy"}
{"epoch": 0.5154950869236583, "step": 342, "batch_size": 64, "mean": 3.076798915863037, "std": 3.8778271675109863, "min": -5.61322021484375, "p10": -1.016001319885254, "median": 2.0494251251220703, "p90": 8.811271286010744, "max": 12.025186538696289, "pos_frac": 0.78125, "sample": [3.6721954345703125, 0.0571441650390625, -1.7180557250976562, -1.0182151794433594, 0.10337066650390625, 8.597843170166016, 1.1322574615478516, 7.315582275390625, -0.826873779296875, 7.818855285644531, 0.34090614318847656, 2.8686752319335938, 0.6690998077392578, 8.566543579101562, 1.8013229370117188, -2.009490966796875, 0.45880126953125, -5.61322021484375, -0.5193004608154297, -0.9432621002197266, 1.8098182678222656, 6.527896881103516, 4.38592529296875, 7.110677719116211, -1.11529541015625, 4.131687164306641, 2.4960403442382812, 3.742757797241211, 4.225433349609375, 6.307880401611328, 2.147624969482422, 2.4310741424560547, 1.9512252807617188, 1.378265380859375, 5.8693389892578125, -1.6027069091796875, 7.562839508056641, -0.4658203125, 6.97900390625, 7.483194351196289, -0.3871002197265625, 9.694478988647461, 9.002105712890625, 2.2759552001953125, -1.0108356475830078, 10.713279724121094, 0.7203121185302734, -0.9286251068115234, 9.063568115234375, 10.963882446289062, 2.1523590087890625, 1.269948959350586, 0.13687896728515625, 12.025186538696289, 0.1683025360107422, 8.902740478515625, 3.708402633666992, 0.5519180297851562, 0.734375, 1.8531303405761719, 4.811574935913086, 1.4061546325683594, -1.0527725219726562, 6.02886962890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000342.npy"}
{"epoch": 0.5170068027210885, "step": 343, "batch_size": 64, "mean": 1.5729038715362549, "std": 3.4696226119995117, "min": -8.074958801269531, "p10": -2.64473419189453, "median": 1.4712867736816406, "p90": 5.474051666259766, "max": 9.213455200195312, "pos_frac": 0.734375, "sample": [2.9930419921875, 0.27759552001953125, -0.356781005859375, 0.077880859375, -1.1337127685546875, 6.308807373046875, 2.7462921142578125, -1.0350303649902344, 0.5332183837890625, -1.057220458984375, -4.613605499267578, -1.480072021484375, 1.3277130126953125, 5.159980773925781, 7.221435546875, 0.11504364013671875, 2.8815994262695312, 8.687667846679688, 8.745586395263672, 4.709997177124023, 0.705718994140625, 3.8875598907470703, 3.1577224731445312, 2.4111862182617188, 0.28376007080078125, 2.8652725219726562, 1.6829586029052734, -3.990753173828125, 4.447044372558594, 0.09434127807617188, -3.8331985473632812, 0.6056785583496094, 3.21826171875, 0.8324546813964844, -0.3459014892578125, 0.4252166748046875, 5.22821044921875, 3.342742919921875, -8.074958801269531, 9.213455200195312, -1.06268310546875, 0.8874893188476562, 3.8304214477539062, 5.532867431640625, 2.14923095703125, 1.6148605346679688, 8.581893920898438, 2.6687889099121094, -0.8021602630615234, 4.102378845214844, 5.336814880371094, -0.7777938842773438, 1.8682022094726562, 2.563385009765625, 2.1039886474609375, 4.427703857421875, -3.1058349609375, 0.037322998046875, 2.9610595703125, 0.1440887451171875, -5.32281494140625, -4.767143249511719, -1.5688323974609375, 0.9964046478271484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000343.npy"}
{"epoch": 0.5185185185185185, "step": 344, "batch_size": 64, "mean": 2.447740316390991, "std": 3.4457671642303467, "min": -5.966608047485352, "p10": -1.9626874923706052, "median": 2.2251739501953125, "p90": 6.692799377441407, "max": 11.54351806640625, "pos_frac": 0.796875, "sample": [-2.7175827026367188, 4.969598770141602, 2.6177215576171875, 2.2896881103515625, 6.7377471923828125, 0.1579723358154297, 0.5874843597412109, 3.001800537109375, 2.4941177368164062, 6.587921142578125, 1.406097412109375, 0.41353607177734375, 2.49420166015625, 5.812736511230469, 1.2223052978515625, 5.498012542724609, 2.28314208984375, 11.54351806640625, 2.167205810546875, 5.621337890625, -2.0277557373046875, 0.7056732177734375, -2.6716079711914062, 6.7719268798828125, 0.25919342041015625, -1.809112548828125, -2.4737091064453125, 0.2313823699951172, 7.0107269287109375, 5.199939727783203, -1.2906112670898438, 8.553031921386719, 1.6881332397460938, 1.1406021118164062, 1.6724357604980469, 6.301338195800781, 4.899196624755859, 2.044208526611328, 1.2087078094482422, -4.660026550292969, 0.8203582763671875, 1.2936172485351562, -0.9263458251953125, -0.6946277618408203, 1.0358200073242188, -1.810861587524414, 9.193710327148438, 5.5715179443359375, 4.12286376953125, 3.3478355407714844, 4.970989227294922, 2.945831298828125, 6.367712020874023, 2.16021728515625, 4.432640075683594, -0.43597984313964844, -2.899993896484375, 6.574459075927734, 0.175140380859375, 3.1206588745117188, -5.966608047485352, 6.798042297363281, 4.6138763427734375, 3.9022674560546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000344.npy"}
{"epoch": 0.5200302343159486, "step": 345, "batch_size": 64, "mean": 2.3924896717071533, "std": 3.5468997955322266, "min": -5.527307510375977, "p10": -1.6949434280395503, "median": 2.0606002807617188, "p90": 6.822538757324219, "max": 12.25439453125, "pos_frac": 0.75, "sample": [1.26300048828125, 0.03299903869628906, 11.169700622558594, -4.611188888549805, 1.6273689270019531, 0.1350555419921875, 0.9446868896484375, -1.9355621337890625, 1.2305908203125, 0.7289390563964844, 2.558979034423828, 3.8433837890625, 6.241584777832031, 3.424692153930664, -1.8907833099365234, 2.3532867431640625, -2.6481781005859375, 1.767913818359375, 0.7340164184570312, 7.218070983886719, 12.25439453125, 0.38481712341308594, 3.2139739990234375, 3.3992843627929688, 3.98980712890625, 0.65618896484375, -0.5597610473632812, 2.7599315643310547, 0.900726318359375, 3.0356521606445312, 4.3336029052734375, 7.570709228515625, 3.31842041015625, 3.193784713745117, 5.903472900390625, -3.0266895294189453, 8.484529495239258, 6.3226165771484375, 6.831119537353516, 0.09394454956054688, 6.802516937255859, -1.2379837036132812, -2.4354782104492188, 9.87139892578125, 3.5951156616210938, -1.1044025421142578, -0.7511672973632812, -0.650604248046875, 2.6744384765625, -0.04557037353515625, 4.570854187011719, 1.6062774658203125, -0.9838790893554688, 4.2799072265625, 1.1829833984375, 4.9775543212890625, -0.4219341278076172, -5.527307510375977, 1.4181556701660156, -0.18595123291015625, 5.06382942199707, 2.901824951171875, 4.090877532958984, 6.178794860839844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000345.npy"}
{"epoch": 0.5215419501133787, "step": 346, "batch_size": 64, "mean": 2.994063138961792, "std": 3.762364387512207, "min": -5.341499328613281, "p10": -1.5204071044921874, "median": 2.0078182220458984, "p90": 8.705175781250002, "max": 10.0831298828125, "pos_frac": 0.796875, "sample": [8.000499725341797, 9.651069641113281, 5.7158203125, 6.485374450683594, 9.237197875976562, 2.7224273681640625, -0.1917877197265625, 4.704347610473633, 9.539209365844727, 7.949329376220703, 8.851394653320312, 1.8501510620117188, 2.189117431640625, 7.242816925048828, 0.018373489379882812, 9.314434051513672, -1.5347442626953125, 1.6658134460449219, -1.8296546936035156, 0.999755859375, 4.089771270751953, 1.8701324462890625, 1.1843719482421875, 1.5007743835449219, 2.917928695678711, 10.0831298828125, -5.341499328613281, 3.1791114807128906, 1.536468505859375, -5.2048797607421875, 0.445526123046875, -2.4685516357421875, 4.914569854736328, -1.6233863830566406, 1.8637733459472656, 1.5636062622070312, 2.1455039978027344, -2.3073577880859375, 8.363998413085938, 7.441261291503906, 6.4998931884765625, 8.009706497192383, 1.0994033813476562, 5.159271240234375, 1.4388656616210938, 1.0906982421875, 9.168146133422852, 3.5396270751953125, -0.5900287628173828, 3.6825408935546875, 2.9401321411132812, 0.2099456787109375, 1.6760063171386719, 0.15139007568359375, -0.11639785766601562, 0.7669906616210938, 1.7314071655273438, 2.5484542846679688, -1.4869537353515625, 7.771263122558594, -0.0145721435546875, -0.3448009490966797, 2.7371673583984375, 5.2166900634765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000346.npy"}
{"epoch": 0.5230536659108088, "step": 347, "batch_size": 64, "mean": 2.382420539855957, "std": 3.8673338890075684, "min": -5.082481384277344, "p10": -2.223117637634277, "median": 1.750284194946289, "p90": 7.2133930206298835, "max": 14.937469482421875, "pos_frac": 0.734375, "sample": [1.3562469482421875, 2.0851211547851562, 3.4584732055664062, 2.8109588623046875, -5.082481384277344, 0.4867820739746094, 3.89788818359375, 3.831846237182617, 2.120054244995117, 7.314777374267578, 0.5985031127929688, 8.28607177734375, 0.6372146606445312, -2.2461471557617188, -4.287208557128906, 12.160263061523438, 2.3333206176757812, 3.256673812866211, -3.609039306640625, 1.7011642456054688, 0.6882553100585938, 0.8487701416015625, 6.118499755859375, 1.5565032958984375, -2.9214916229248047, -1.5637054443359375, 4.239959716796875, 6.675832748413086, 7.6565093994140625, -3.0752487182617188, -0.3324165344238281, -0.7883815765380859, -0.2291412353515625, 2.1757354736328125, -1.634450912475586, 1.5293235778808594, -2.169382095336914, 3.8901329040527344, 1.4136962890625, 1.5328216552734375, 4.233320236206055, -0.35260772705078125, 0.21234130859375, 4.471916198730469, 3.061307907104492, 9.813526153564453, 0.2039813995361328, -2.9467697143554688, 14.937469482421875, 9.883056640625, 6.976829528808594, 4.346813201904297, 5.4100799560546875, 3.4996337890625, -0.17154502868652344, 5.1541748046875, 2.120187759399414, 1.7994041442871094, 1.2636871337890625, -0.4647674560546875, -0.6371192932128906, 1.0793838500976562, 5.9477081298828125, 5.910593032836914], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000347.npy"}
{"epoch": 0.5245653817082389, "step": 348, "batch_size": 64, "mean": 3.3596537113189697, "std": 4.979955196380615, "min": -3.142274856567383, "p10": -1.6112369537353515, "median": 1.8645992279052734, "p90": 10.030551528930664, "max": 19.375009536743164, "pos_frac": 0.734375, "sample": [9.581436157226562, 8.072364807128906, 0.146514892578125, 0.09551239013671875, 8.533638000488281, 1.657501220703125, 3.2244720458984375, -1.4857978820800781, -2.5778427124023438, -0.19014739990234375, 12.892463684082031, 0.30704498291015625, 2.6490478515625, 5.528844833374023, 10.082504272460938, 7.886329650878906, 1.9481277465820312, 13.022598266601562, 1.3301353454589844, 9.90932846069336, 0.5825843811035156, 6.30133056640625, 7.209041595458984, 3.1648406982421875, 2.3878097534179688, 5.89448356628418, 0.1140289306640625, 7.622552871704102, -1.6250762939453125, -1.8885936737060547, 4.418708801269531, -1.3874893188476562, 1.7810707092285156, 14.445266723632812, -0.49762725830078125, -2.251638412475586, 3.295970916748047, -1.5789451599121094, 0.3137531280517578, 1.9587764739990234, 10.809547424316406, 19.375009536743164, 1.451690673828125, -1.3341655731201172, 7.354709625244141, 0.08596420288085938, -2.0938854217529297, -1.8288002014160156, -1.4773635864257812, 3.3434104919433594, 0.16806793212890625, 15.115041732788086, -3.142274856567383, 4.625030517578125, 6.843467712402344, 1.10888671875, 5.497213363647461, 1.4238967895507812, -1.5564384460449219, 0.1478252410888672, -0.9956703186035156, 3.19085693359375, 5.241264343261719, -1.2103767395019531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000348.npy"}
{"epoch": 0.5260770975056689, "step": 349, "batch_size": 64, "mean": 2.9376420974731445, "std": 3.785829544067383, "min": -8.23773193359375, "p10": -1.3017553329467773, "median": 2.2800111770629883, "p90": 7.7855468750000005, "max": 11.738052368164062, "pos_frac": 0.78125, "sample": [1.0047035217285156, 5.12646484375, 4.8731689453125, 2.0035476684570312, 2.248628616333008, 2.3113937377929688, 4.3643951416015625, 4.504886627197266, 1.3899173736572266, 3.6982574462890625, -1.334360122680664, 0.8349742889404297, 5.841102600097656, 1.8714275360107422, 3.0237960815429688, 3.544466018676758, 6.574567794799805, -3.6580581665039062, -8.23773193359375, 6.61939811706543, 4.598518371582031, 9.685012817382812, 2.2232589721679688, 2.1958160400390625, 5.750543594360352, 1.5183029174804688, -1.225677490234375, 5.10328483581543, 1.4081802368164062, 0.761077880859375, 6.029041290283203, 5.07513427734375, -1.815338134765625, 9.322942733764648, -0.6378555297851562, 6.389034271240234, 1.6075172424316406, 11.656234741210938, -2.287616729736328, 2.0153865814208984, 7.8740692138671875, 0.14945602416992188, 0.979736328125, 2.0890350341796875, 4.436077117919922, 8.897819519042969, 2.3729248046875, 0.3605308532714844, -1.9802932739257812, -1.4368171691894531, 6.7073822021484375, 3.5269622802734375, -0.7022190093994141, 1.5437850952148438, 2.5093631744384766, 11.738052368164062, -1.1503715515136719, -1.2123565673828125, -1.1741943359375, 7.5789947509765625, -0.11046600341796875, 4.19781494140625, 4.591331481933594, 10.244720458984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000349.npy"}
{"epoch": 0.527588813303099, "step": 350, "batch_size": 64, "mean": 3.5284602642059326, "std": 4.10563850402832, "min": -5.047477722167969, "p10": -1.1046585083007812, "median": 2.622401237487793, "p90": 9.13849334716797, "max": 13.103080749511719, "pos_frac": 0.828125, "sample": [11.649375915527344, 1.945556640625, 3.8577919006347656, -0.7899436950683594, 4.17156982421875, 1.9381961822509766, 8.361259460449219, 11.182418823242188, 2.5088253021240234, 4.077756881713867, 0.193572998046875, -1.357757568359375, 0.182037353515625, 4.635181427001953, 1.2943496704101562, 5.243953704833984, 6.407585144042969, 0.8381900787353516, -1.1223030090332031, -1.3044319152832031, 5.08929443359375, 7.162635803222656, -3.547840118408203, 2.19287109375, 1.9132022857666016, -1.0634880065917969, -5.047477722167969, 6.8091278076171875, 0.3425769805908203, -0.9162712097167969, 6.2613067626953125, -2.922119140625, 6.460418701171875, 1.5946578979492188, 6.956989288330078, 8.780899047851562, 3.4217185974121094, 6.548797607421875, 9.49359130859375, 9.291748046875, 3.3945255279541016, 13.103080749511719, 12.556098937988281, 0.8611221313476562, 1.8659782409667969, 2.0963668823242188, 6.897472381591797, 0.6463642120361328, -0.7440528869628906, 7.841278076171875, 0.4868011474609375, 4.0191192626953125, 0.5194644927978516, 2.7359771728515625, 0.4677848815917969, -1.4165115356445312, 7.5556640625, 1.4226837158203125, 1.9986724853515625, 0.2529754638671875, 11.724115371704102, 4.454967498779297, 3.8282833099365234, 6.517406463623047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000350.npy"}
{"epoch": 0.5291005291005291, "step": 351, "batch_size": 64, "mean": 2.696340322494507, "std": 3.8944180011749268, "min": -8.78506088256836, "p10": -1.8681007385253907, "median": 3.438669204711914, "p90": 7.114178085327149, "max": 11.266952514648438, "pos_frac": 0.75, "sample": [0.7322483062744141, 3.5663833618164062, 8.00918960571289, 3.6793441772460938, -0.3484001159667969, 6.241573333740234, -1.4577903747558594, 4.888740539550781, 7.106773376464844, 2.111053466796875, 4.607147216796875, 7.822502136230469, 1.6830272674560547, 1.81591796875, 2.899005889892578, -1.8622207641601562, 0.7484130859375, -0.606414794921875, 7.117351531982422, 1.910247802734375, 3.3654098510742188, -2.1353836059570312, 6.2847900390625, 4.9960174560546875, 6.230186462402344, 5.1774444580078125, 5.2251434326171875, 3.5697784423828125, -7.748138427734375, 3.5964202880859375, 7.276592254638672, -2.6441612243652344, 6.644706726074219, -0.23077392578125, 4.418310165405273, -3.636962890625, 1.3450546264648438, 7.400543212890625, 3.3825950622558594, 3.7678871154785156, 4.85516357421875, 6.519813537597656, 3.5922622680664062, -1.3006439208984375, 5.393363952636719, 2.143798828125, 5.276966094970703, 3.0695266723632812, -1.2810821533203125, 3.4947433471679688, -1.6069602966308594, -0.3428077697753906, 4.7906646728515625, 4.6136016845703125, 8.157983779907227, 2.2485694885253906, 2.253183364868164, 11.266952514648438, -8.78506088256836, 3.116189956665039, -7.49078369140625, 3.2415695190429688, 4.259838104248047, -1.8706207275390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000351.npy"}
{"epoch": 0.5306122448979592, "step": 352, "batch_size": 64, "mean": 2.631096839904785, "std": 3.6260111331939697, "min": -4.308868408203125, "p10": -1.0399906158447263, "median": 1.7698078155517578, "p90": 7.344488525390625, "max": 13.486839294433594, "pos_frac": 0.78125, "sample": [5.583885192871094, 12.56814193725586, 13.486839294433594, 0.8461990356445312, 5.265380859375, -4.308868408203125, -1.456369400024414, 1.4388618469238281, 0.6808013916015625, 3.1758651733398438, 3.8389453887939453, 4.812713623046875, -1.374298095703125, -0.6814403533935547, 3.7563323974609375, 0.810150146484375, -2.5335693359375, -0.17919921875, -1.1039962768554688, 1.6432418823242188, 7.2028350830078125, 3.0194625854492188, 4.1176605224609375, 8.173053741455078, 7.469505310058594, 3.744873046875, 3.25823974609375, -2.615297317504883, 0.6723613739013672, 0.5164031982421875, -0.1383037567138672, 1.1758975982666016, -0.17908477783203125, 1.8517684936523438, 0.1550445556640625, 2.009521484375, 0.7066650390625, 6.088409423828125, 6.280738830566406, 4.091636657714844, 5.148651123046875, 2.2857742309570312, 11.30584716796875, 0.6889266967773438, 1.6878471374511719, 6.859102249145508, 0.15240478515625, 0.33034515380859375, -0.8906440734863281, 4.625892639160156, 0.72216796875, 0.993988037109375, 2.81292724609375, 9.327041625976562, 4.666877746582031, 1.2544097900390625, -0.3849601745605469, 7.4051971435546875, -3.833576202392578, -0.171875, 0.18178749084472656, 3.0890445709228516, 3.206174850463867, 3.055849075317383], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000352.npy"}
{"epoch": 0.5321239606953893, "step": 353, "batch_size": 64, "mean": 2.724489688873291, "std": 3.996722936630249, "min": -10.844169616699219, "p10": -1.247732925415039, "median": 2.0629539489746094, "p90": 8.226208114624024, "max": 13.337516784667969, "pos_frac": 0.78125, "sample": [1.707132339477539, 7.6678314208984375, -4.264095306396484, 2.978931427001953, -2.3334789276123047, 1.3372573852539062, 0.6894454956054688, -3.4001312255859375, 9.274246215820312, 2.87823486328125, -0.3096427917480469, 3.5374603271484375, 5.588033676147461, -0.7989501953125, 7.584014892578125, -0.4426097869873047, 5.021087646484375, 3.7044143676757812, 6.892463684082031, 7.942939758300781, 8.278030395507812, 9.156890869140625, 0.22837257385253906, 5.368133544921875, 0.15789031982421875, -1.2711830139160156, 10.44265365600586, 3.144582748413086, 8.327041625976562, 1.6539936065673828, 3.150787353515625, 2.067577362060547, 2.767559051513672, -0.07283401489257812, -10.844169616699219, 5.823282241821289, -1.5548286437988281, -0.9160041809082031, 5.603404998779297, 7.288909912109375, 1.5320320129394531, 8.572280883789062, 13.337516784667969, 2.058330535888672, 1.9286956787109375, 2.4488792419433594, 1.1898880004882812, 1.8069438934326172, 0.4666290283203125, 2.4587249755859375, 2.6368961334228516, 1.0103607177734375, 0.74566650390625, 1.0793533325195312, 0.2997150421142578, 1.88287353515625, 2.1487884521484375, 1.7604751586914062, -1.8598556518554688, -0.6930313110351562, 4.87396240234375, -1.1930160522460938, 8.105289459228516, 3.715251922607422], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000353.npy"}
{"epoch": 0.5336356764928194, "step": 354, "batch_size": 64, "mean": 3.057818651199341, "std": 3.965773344039917, "min": -6.872480392456055, "p10": -2.253314590454101, "median": 2.9009666442871094, "p90": 8.348191452026368, "max": 11.617176055908203, "pos_frac": 0.734375, "sample": [11.320823669433594, 6.519926071166992, -2.657806396484375, 5.582309722900391, -3.1162948608398438, 2.291217803955078, 5.5519561767578125, 2.84796142578125, 6.219207763671875, -0.18074417114257812, 4.16554069519043, 8.791488647460938, 3.785580635070801, -0.20203399658203125, -0.03460693359375, 0.4339485168457031, 7.023429870605469, -1.46917724609375, 9.11849594116211, 11.617176055908203, 7.882610321044922, 4.662254333496094, 0.9821929931640625, 3.6905059814453125, -6.872480392456055, -0.03824424743652344, -2.603240966796875, 7.537302017211914, 3.465972900390625, 3.8177871704101562, 3.1126174926757812, 8.085704803466797, 6.037464141845703, 5.488494873046875, 0.553375244140625, 0.29071807861328125, 2.0379066467285156, -3.293069839477539, 7.498954772949219, -0.27902984619140625, 2.953643798828125, 8.460685729980469, -2.7676467895507812, -1.8996238708496094, 9.00970458984375, -0.625091552734375, 4.8187408447265625, 9.395973205566406, 1.8216686248779297, 0.6339874267578125, 1.887847900390625, 2.8482894897460938, 6.136760711669922, 7.638031005859375, 4.120204925537109, 2.5646495819091797, 1.6005020141601562, 1.7156295776367188, -1.9191627502441406, 3.0883216857910156, 1.5930900573730469, 5.371879577636719, -2.3965225219726562, -0.017368316650390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000354.npy"}
{"epoch": 0.5351473922902494, "step": 355, "batch_size": 64, "mean": 2.908555269241333, "std": 4.054718017578125, "min": -6.101169586181641, "p10": -1.9079750061035157, "median": 3.1978540420532227, "p90": 8.198938751220703, "max": 11.334518432617188, "pos_frac": 0.734375, "sample": [3.814138412475586, 11.334518432617188, 0.0926971435546875, 4.000213623046875, 6.258270263671875, 3.8409652709960938, 2.099945068359375, -2.6659297943115234, -1.9264984130859375, 0.05060577392578125, 5.381565093994141, -0.0642852783203125, 4.489654541015625, -3.7914886474609375, 3.69049072265625, 3.150239944458008, -1.6715774536132812, 3.786773681640625, -3.996368408203125, 1.6957015991210938, -4.3674468994140625, 2.616199493408203, 6.568634033203125, 8.204360961914062, 5.567832946777344, 2.213258743286133, 3.535684585571289, 5.221742630004883, 8.186286926269531, 9.330673217773438, -1.2334136962890625, 1.3418750762939453, -0.40239524841308594, 8.79290771484375, 1.1506423950195312, -3.1491546630859375, 3.398529052734375, -1.8647537231445312, 1.3342399597167969, 6.00178337097168, 3.4761962890625, -0.5214385986328125, 10.464500427246094, -0.5642852783203125, 2.806520462036133, 8.074653625488281, 5.975677490234375, 1.1718406677246094, 10.054378509521484, -6.101169586181641, 8.138969421386719, 4.7760772705078125, 6.5546417236328125, -0.9701728820800781, 10.277191162109375, -1.6788196563720703, -1.1554508209228516, 0.4806194305419922, 7.917568206787109, 0.432769775390625, 4.039958953857422, 2.637714385986328, 3.2454681396484375, 4.597015380859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000355.npy"}
{"epoch": 0.5366591080876795, "step": 356, "batch_size": 64, "mean": 2.592515230178833, "std": 4.679877758026123, "min": -9.806537628173828, "p10": -3.525818252563476, "median": 2.5140247344970703, "p90": 8.423579978942872, "max": 13.863540649414062, "pos_frac": 0.75, "sample": [9.929311752319336, 6.952390670776367, 2.6563186645507812, -5.765022277832031, 8.498559951782227, 2.1682357788085938, 1.2810707092285156, 6.5477142333984375, 2.994100570678711, 0.48241615295410156, -5.477855682373047, -7.930076599121094, 0.7421875, -2.8700942993164062, 6.490360260009766, 9.763618469238281, 2.5616455078125, 0.9761543273925781, 8.746343612670898, -0.02391815185546875, 6.3869781494140625, -9.806537628173828, 4.394775390625, 2.7010955810546875, 2.4681243896484375, 8.248626708984375, -3.806842803955078, -2.7446441650390625, 3.445384979248047, 2.8979644775390625, 1.1489105224609375, 1.4685840606689453, -1.5401191711425781, -0.19052505493164062, -4.900596618652344, 4.1957855224609375, 5.552696228027344, -0.3711700439453125, 4.450828552246094, 7.262992858886719, 1.935791015625, 1.1135940551757812, 8.059329986572266, -0.215484619140625, 2.0773086547851562, 2.2085342407226562, 8.136188507080078, 0.9958572387695312, -1.2031898498535156, 4.2517547607421875, 0.5515403747558594, 4.408430099487305, 10.827203750610352, 6.76983642578125, 0.5663375854492188, 4.098283767700195, 2.559925079345703, 0.30944061279296875, 7.3427276611328125, -1.3026580810546875, -5.747989654541016, 13.863540649414062, 5.780738830566406, 8.54815673828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000356.npy"}
{"epoch": 0.5381708238851096, "step": 357, "batch_size": 64, "mean": 2.3360087871551514, "std": 3.9558708667755127, "min": -7.212760925292969, "p10": -2.170325088500976, "median": 1.6416893005371094, "p90": 7.433204078674318, "max": 11.587974548339844, "pos_frac": 0.78125, "sample": [5.533988952636719, -0.21709442138671875, 6.855049133300781, 1.35064697265625, -7.212760925292969, 10.007034301757812, -0.221160888671875, 1.6224288940429688, 1.512237548828125, 0.5542716979980469, -1.70166015625, 8.184921264648438, -0.8709754943847656, 2.550945281982422, 11.587974548339844, 3.4533843994140625, 7.568822860717773, 1.2938232421875, 5.693296432495117, 1.1521797180175781, 3.96917724609375, 1.9839248657226562, 2.037067413330078, 2.1928367614746094, 0.9410743713378906, 4.447547912597656, 0.3395862579345703, -0.6049385070800781, -3.54571533203125, 3.5217819213867188, -3.5653076171875, 0.7480411529541016, -2.3711814880371094, 1.66094970703125, 4.243827819824219, 10.184074401855469, 5.9955291748046875, 6.19659423828125, 7.11676025390625, 9.223716735839844, 0.23165130615234375, 3.7911529541015625, 0.6909561157226562, 1.2203598022460938, 6.1171875, 2.050884246826172, 0.5128059387207031, -0.31526947021484375, -5.802188873291016, 1.2751541137695312, 1.1378154754638672, 3.2025909423828125, 6.3533935546875, 2.26885986328125, 6.8770599365234375, 0.12548828125, 1.5957660675048828, 2.6993865966796875, 8.157257080078125, -4.003820419311523, -1.5085906982421875, -6.8874969482421875, 5.335334777832031, 0.9661235809326172], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000357.npy"}
{"epoch": 0.5396825396825397, "step": 358, "batch_size": 64, "mean": 3.41914439201355, "std": 3.877908229827881, "min": -3.9598426818847656, "p10": -1.5025117874145506, "median": 3.067995071411133, "p90": 7.681694030761719, "max": 12.978515625, "pos_frac": 0.796875, "sample": [6.149562835693359, 4.239585876464844, 4.938655853271484, 6.907676696777344, -1.299093246459961, 7.4061737060546875, 4.900062561035156, -1.589691162109375, 0.5986747741699219, 2.5034942626953125, -1.9093666076660156, 4.10546875, 1.8007431030273438, 4.9648895263671875, 7.2769317626953125, 0.554779052734375, -2.7277297973632812, 5.1639862060546875, 6.852783203125, 4.460605621337891, 7.33624267578125, 1.3298759460449219, 0.5988502502441406, 7.116081237792969, 7.229471206665039, -0.1271820068359375, 2.0179595947265625, -0.5154876708984375, -0.9559783935546875, 1.26611328125, -3.6430740356445312, 3.8347320556640625, -0.49200439453125, 1.0797653198242188, 0.1282501220703125, 7.088043212890625, 5.427085876464844, 6.225803375244141, 1.9563980102539062, -0.4726600646972656, 9.604476928710938, 7.725433349609375, 12.978515625, 0.8898963928222656, 10.418556213378906, 11.426795959472656, 3.897735595703125, -2.1342506408691406, 1.6690902709960938, 5.24151611328125, 0.8773365020751953, -2.677133560180664, -3.9598426818847656, 8.729854583740234, 1.6722564697265625, 4.265056610107422, 2.7483673095703125, 2.0868301391601562, 1.753173828125, 4.959255218505859, 3.356250762939453, 7.5796356201171875, 2.7797393798828125, 11.210220336914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000358.npy"}
{"epoch": 0.5411942554799698, "step": 359, "batch_size": 64, "mean": 2.186225414276123, "std": 3.9166958332061768, "min": -6.4868316650390625, "p10": -2.4256362915039062, "median": 2.3556346893310547, "p90": 7.495030975341799, "max": 11.033187866210938, "pos_frac": 0.6875, "sample": [-1.2714767456054688, 7.759574890136719, -1.75714111328125, 1.7509765625, 3.0045127868652344, -0.8585357666015625, -2.8568363189697266, 1.908966064453125, -2.01123046875, 9.098114013671875, 10.2493896484375, 1.6480045318603516, 0.5511474609375, 4.7193450927734375, -1.4773635864257812, 4.2369232177734375, 5.751720428466797, -3.0973854064941406, 3.3340606689453125, -5.727977752685547, 0.3928337097167969, 4.671039581298828, 2.544830322265625, -2.2959136962890625, 2.0717620849609375, 6.426429748535156, 2.7777233123779297, 2.792238235473633, 2.522125244140625, -6.4868316650390625, 8.49462890625, 11.033187866210938, -2.0503387451171875, 6.508033752441406, 0.119659423828125, 3.4509506225585938, 4.293979644775391, 4.116306304931641, 0.2593345642089844, 2.423828125, -3.089385986328125, 4.57362174987793, 6.8777618408203125, 5.211849212646484, 1.2477874755859375, -2.481231689453125, -0.6890029907226562, -3.4019927978515625, -1.1820144653320312, 3.3257369995117188, 2.2874412536621094, -2.230754852294922, 0.7623748779296875, -1.0516986846923828, 4.755043029785156, -0.9063949584960938, 5.486732482910156, 2.8108596801757812, 3.828338623046875, 9.019454956054688, 3.9286231994628906, 10.608955383300781, -0.4160881042480469, 1.621826171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000359.npy"}
{"epoch": 0.5427059712773998, "step": 360, "batch_size": 64, "mean": 2.899702548980713, "std": 4.15297794342041, "min": -4.630218505859375, "p10": -1.943766975402832, "median": 2.125631332397461, "p90": 8.262332344055176, "max": 14.278549194335938, "pos_frac": 0.75, "sample": [0.8793182373046875, 14.278549194335938, -3.35516357421875, 1.6637191772460938, 3.3392791748046875, 7.085777282714844, -2.3693504333496094, 1.7754535675048828, 0.2824211120605469, 4.18218994140625, -1.1046104431152344, -0.5647354125976562, 1.2355499267578125, -2.843372344970703, 6.724788665771484, -3.11083984375, 8.71380615234375, -1.9851837158203125, 3.5366077423095703, 4.5028076171875, 2.3337669372558594, 2.0347747802734375, -4.630218505859375, -1.3459930419921875, 1.8291473388671875, 1.6849517822265625, 5.4613037109375, 2.745502471923828, 5.002010345458984, 3.750823974609375, 3.1052017211914062, 1.7061195373535156, 3.1224098205566406, -0.41015625, 1.1397247314453125, 2.8461761474609375, -3.4248809814453125, 4.662883758544922, 6.766700744628906, 1.70257568359375, 14.068328857421875, 1.69732666015625, 5.293449401855469, -1.847127914428711, 10.074630737304688, 4.188865661621094, 8.014715194702148, 3.2205467224121094, 2.2164878845214844, 14.07861328125, 8.368453979492188, 5.7007293701171875, 9.701778411865234, -0.04074859619140625, -0.3733978271484375, 3.5810317993164062, 0.7691364288330078, -1.59185791015625, 0.5918121337890625, 7.996665954589844, 1.533203125, -0.46282196044921875, 3.8320999145507812, 2.0192108154296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000360.npy"}
{"epoch": 0.54421768707483, "step": 361, "batch_size": 64, "mean": 3.1286067962646484, "std": 4.1300048828125, "min": -6.9487457275390625, "p10": -1.4548191070556638, "median": 2.688518524169922, "p90": 7.043275451660158, "max": 16.056961059570312, "pos_frac": 0.765625, "sample": [4.1618804931640625, 5.682891845703125, 0.3779468536376953, -4.566883087158203, 10.208961486816406, 1.878641128540039, -1.8708839416503906, -2.90679931640625, 2.172832489013672, 1.965860366821289, 0.531341552734375, -4.129390716552734, -0.6090164184570312, 6.3653717041015625, 6.7231597900390625, 4.5021209716796875, -1.6259346008300781, -0.6480178833007812, 0.47437286376953125, 2.07135009765625, 2.365203857421875, 5.669181823730469, -0.13355255126953125, 8.006523132324219, 6.7284698486328125, 3.6958160400390625, 5.153362274169922, -6.9487457275390625, 8.98980712890625, 1.830413818359375, 5.230449676513672, 0.5428390502929688, 5.378314971923828, 16.056961059570312, 6.6357421875, 4.889251708984375, 5.176445007324219, 5.347051620483398, -0.37579345703125, -1.0555496215820312, -0.5049266815185547, 0.4166412353515625, 7.468742370605469, 6.120105743408203, 5.739765167236328, 6.4921875, 2.0849609375, 1.1511764526367188, -2.3317642211914062, 6.409996032714844, 5.937034606933594, 2.4603652954101562, 3.476043701171875, 1.130828857421875, 15.098167419433594, 3.3625831604003906, 7.178192138671875, 2.9166717529296875, -1.0491485595703125, 5.673095703125, 1.4605236053466797, 3.995412826538086, -0.07947921752929688, 1.681671142578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000361.npy"}
{"epoch": 0.54572940287226, "step": 362, "batch_size": 64, "mean": 3.0863213539123535, "std": 3.8392724990844727, "min": -4.896263122558594, "p10": -1.188375473022461, "median": 2.467609405517578, "p90": 8.253136444091798, "max": 13.545021057128906, "pos_frac": 0.8125, "sample": [-1.0974655151367188, 3.0341663360595703, 0.48138427734375, 0.1289844512939453, 1.9128971099853516, -4.896263122558594, 4.853721618652344, -0.11767578125, 11.587890625, 5.9813995361328125, 0.9636306762695312, -2.0935726165771484, 2.3809051513671875, 1.686431884765625, 0.1295452117919922, 0.017650604248046875, 3.2938079833984375, 6.023490905761719, 1.8630256652832031, 6.5719757080078125, 0.5755748748779297, 2.48468017578125, 0.7595577239990234, 0.0601806640625, 6.9142303466796875, 2.6944122314453125, 8.425262451171875, 1.6378173828125, -0.6304779052734375, 5.697498321533203, 9.46710205078125, 0.8003101348876953, 0.718048095703125, 5.565494537353516, 0.34329986572265625, 8.384246826171875, 3.2825965881347656, 2.91644287109375, 2.947803497314453, -0.2728843688964844, 0.3623390197753906, 10.551414489746094, 7.690193176269531, 7.947212219238281, 4.784027099609375, -1.05621337890625, 7.0899505615234375, 7.8708343505859375, 5.381383895874023, 2.0495758056640625, -3.716825485229492, 3.435272216796875, 4.54656982421875, -2.718852996826172, 2.4505386352539062, 1.3795166015625, 9.169708251953125, -1.3720703125, -1.80767822265625, 5.1125640869140625, 13.545021057128906, -1.2273368835449219, 6.611183166503906, 3.9691085815429688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000362.npy"}
{"epoch": 0.54724111866969, "step": 363, "batch_size": 64, "mean": 3.205629587173462, "std": 4.075931549072266, "min": -6.700052261352539, "p10": -1.739201927185058, "median": 2.844789505004883, "p90": 9.494665718078615, "max": 12.213394165039062, "pos_frac": 0.828125, "sample": [4.354137420654297, -4.385490417480469, 9.75311279296875, 3.8796043395996094, 3.3450164794921875, 2.4115447998046875, 2.4698562622070312, 12.213394165039062, 2.878814697265625, -0.9527473449707031, -0.52020263671875, 5.872833251953125, 12.15106201171875, 4.775360107421875, 1.5894088745117188, 12.160629272460938, 1.5673351287841797, 1.27069091796875, 2.9480514526367188, 3.206371307373047, 0.5338516235351562, 4.479494094848633, 4.728883743286133, 10.453994750976562, 8.173904418945312, 3.236204147338867, 1.21588134765625, 1.7610015869140625, -2.0867271423339844, 1.0208187103271484, -3.0253429412841797, -6.700052261352539, 3.2631301879882812, -2.6011314392089844, 8.019588470458984, 4.698549270629883, 1.8350963592529297, 5.065696716308594, 2.3715591430664062, -0.0429534912109375, 2.931201934814453, 2.6837005615234375, 6.1221160888671875, 9.041946411132812, 2.8107643127441406, 7.223091125488281, 3.5875473022460938, 2.0252151489257812, 4.759101867675781, 0.176849365234375, 9.688688278198242, 2.0230941772460938, 2.600391387939453, 6.520505905151367, -1.992013931274414, -6.49755859375, 5.4512176513671875, -1.1493072509765625, 9.696823120117188, 2.085590362548828, 0.026353836059570312, 1.70526123046875, 3.6800994873046875, 2.5693817138671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000363.npy"}
{"epoch": 0.5487528344671202, "step": 364, "batch_size": 64, "mean": 3.3547396659851074, "std": 4.903685569763184, "min": -6.9298248291015625, "p10": -1.9037956237792968, "median": 2.706012725830078, "p90": 9.908059310913087, "max": 13.7847900390625, "pos_frac": 0.734375, "sample": [10.955032348632812, -0.5671291351318359, -0.33472442626953125, 13.7847900390625, 9.989906311035156, 1.536773681640625, 3.913471221923828, 4.249031066894531, -1.4311904907226562, 8.613296508789062, 13.392044067382812, 4.338050842285156, -1.5266075134277344, -2.398700714111328, 9.625961303710938, 12.20254135131836, 1.4623451232910156, 0.7657871246337891, 12.887771606445312, -1.7575759887695312, 9.009986877441406, 1.5796966552734375, 4.749565124511719, 6.389472961425781, -6.498992919921875, -3.757770538330078, 10.83349609375, -1.2076473236083984, 0.5977439880371094, 5.031669616699219, 6.100181579589844, 5.833839416503906, 0.043399810791015625, 2.759521484375, 2.580718994140625, 0.8186492919921875, 7.483654022216797, 5.010345458984375, -0.3746185302734375, 0.2845134735107422, 3.3785324096679688, 2.8635635375976562, -0.78125, 1.6897125244140625, 6.013427734375, -0.315093994140625, 0.4039897918701172, 6.015998840332031, 0.9843597412109375, 9.717082977294922, -1.966461181640625, 7.810197830200195, -6.0833892822265625, 6.370269775390625, -6.9298248291015625, -0.5463714599609375, 5.548057556152344, 5.750480651855469, 0.5362262725830078, 2.4944076538085938, 2.6525039672851562, -3.728607177734375, 9.143306732177734, 6.713911056518555], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000364.npy"}
{"epoch": 0.5502645502645502, "step": 365, "batch_size": 64, "mean": 3.1297316551208496, "std": 3.517530918121338, "min": -5.475189208984375, "p10": -1.0653169631958006, "median": 3.0561485290527344, "p90": 6.917510986328125, "max": 12.722663879394531, "pos_frac": 0.828125, "sample": [0.4897880554199219, 4.320798873901367, 1.7680435180664062, 3.6424198150634766, 3.5583267211914062, 2.5203628540039062, 6.505950927734375, 4.011029243469238, 7.8087615966796875, 2.464752197265625, 4.161323547363281, 2.129669189453125, -5.4103546142578125, 3.503307342529297, 2.5856857299804688, 5.158378601074219, -0.8504695892333984, 3.539447784423828, 1.1061935424804688, 0.14338302612304688, 4.683197021484375, -0.21897125244140625, 5.869052886962891, 1.5133476257324219, 1.3458938598632812, -5.475189208984375, 2.080413818359375, 12.722663879394531, 3.0212173461914062, 6.892463684082031, 1.2746429443359375, 2.8181724548339844, 5.380298614501953, 3.070018768310547, 7.2924041748046875, 3.042278289794922, 6.928245544433594, -0.17724990844726562, 0.09401702880859375, 3.650726318359375, 5.187744140625, 6.583038330078125, 5.726409912109375, -1.478302001953125, 4.6765899658203125, 11.237384796142578, 2.689350128173828, -2.372385025024414, 9.1337890625, 2.2207489013671875, 6.344905853271484, -2.271106719970703, 5.261711120605469, 3.08905029296875, 1.9029350280761719, 3.5873870849609375, -1.1573944091796875, -2.9176483154296875, 1.8330535888671875, 0.8897132873535156, 10.603534698486328, 5.93035888671875, -0.5649032592773438, 5.202415466308594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000365.npy"}
{"epoch": 0.5517762660619804, "step": 366, "batch_size": 64, "mean": 2.5670762062072754, "std": 4.451139450073242, "min": -8.151206970214844, "p10": -2.8975013732910155, "median": 2.2951221466064453, "p90": 7.301786804199219, "max": 13.8563232421875, "pos_frac": 0.71875, "sample": [-0.6303367614746094, 7.363800048828125, 5.661609649658203, 1.5326881408691406, -1.3311386108398438, 7.71435546875, -4.5562744140625, -2.416196823120117, 1.134429931640625, -2.7388381958007812, 1.1237258911132812, 5.154594421386719, 4.263843536376953, 4.951633453369141, 4.849884033203125, 1.09613037109375, -8.151206970214844, 3.776611328125, 2.0149097442626953, 5.676959991455078, 1.110260009765625, 2.907939910888672, 2.3639144897460938, -1.2459068298339844, -0.7104701995849609, 3.8137855529785156, 4.130138397216797, 6.999092102050781, -4.864570617675781, 0.15275192260742188, 13.8563232421875, -2.0337791442871094, -3.174976348876953, 7.06817626953125, -6.6117706298828125, 1.7607784271240234, 0.17417526245117188, -2.9654998779296875, 3.734128952026367, 2.897369384765625, 0.3027229309082031, -0.871429443359375, 0.7938861846923828, 5.2726287841796875, -1.1387557983398438, -1.1900596618652344, 0.38041114807128906, 6.6510467529296875, 11.005050659179688, 1.2138290405273438, 3.7872695922851562, 7.057518005371094, 6.044109344482422, -4.103736877441406, 5.765283584594727, -0.18215560913085938, 13.33115005493164, 7.1570892333984375, 8.126018524169922, 7.071418762207031, 8.645370483398438, 5.737026214599609, 5.357810974121094, 2.226329803466797], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000366.npy"}
{"epoch": 0.5532879818594104, "step": 367, "batch_size": 64, "mean": 2.3382668495178223, "std": 3.3817694187164307, "min": -8.163982391357422, "p10": -1.35021915435791, "median": 2.710145950317383, "p90": 6.523939514160157, "max": 10.497596740722656, "pos_frac": 0.75, "sample": [1.123220443725586, 7.8992919921875, 3.7094383239746094, -0.3298149108886719, 3.9335193634033203, 1.3116378784179688, 8.486000061035156, 6.601585388183594, 2.7745819091796875, 0.7437171936035156, -0.3396148681640625, 1.216339111328125, 1.456207275390625, 1.8321075439453125, 8.851585388183594, 3.820169448852539, 3.5993499755859375, -0.313507080078125, -1.9838237762451172, 3.770009994506836, 5.3290252685546875, 4.08905029296875, -0.5324935913085938, -3.34942626953125, 2.6548004150390625, -8.163982391357422, -0.801025390625, 2.8126602172851562, 2.765491485595703, 4.82391357421875, 5.28302001953125, 3.0689945220947266, 1.6469306945800781, 3.36456298828125, 2.4606285095214844, 1.2759895324707031, 4.72998046875, -1.1370773315429688, 2.9425525665283203, 6.2596282958984375, 0.18424606323242188, -6.29681396484375, 1.3240680694580078, 10.497596740722656, 3.3983917236328125, -1.5631256103515625, 2.948413848876953, 2.2145423889160156, 4.166511535644531, -0.7334861755371094, 2.1223373413085938, 1.695322036743164, -2.7063446044921875, 3.1617050170898438, 6.4353179931640625, 6.561920166015625, -0.7567253112792969, -1.3360118865966797, 4.936737060546875, 6.980926513671875, -1.3563079833984375, 2.7735824584960938, 1.0378875732421875, 6.273155212402344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000367.npy"}
{"epoch": 0.5547996976568406, "step": 368, "batch_size": 64, "mean": 2.4575319290161133, "std": 4.021417617797852, "min": -6.019554138183594, "p10": -2.8317726135253904, "median": 1.940582275390625, "p90": 8.10711784362793, "max": 10.272125244140625, "pos_frac": 0.6875, "sample": [-3.7379150390625, 4.371070861816406, -2.6688156127929688, 1.2550601959228516, -0.416778564453125, 1.4122238159179688, 8.27400016784668, 4.710674285888672, 0.06148529052734375, -0.8675918579101562, 5.45294189453125, 1.3018989562988281, 0.9319076538085938, 4.8282012939453125, 10.081558227539062, 1.5870361328125, -1.3828887939453125, -3.2803878784179688, -2.0644149780273438, 6.336517333984375, 1.4983482360839844, 4.583349227905273, 5.0205078125, -2.6639251708984375, 6.647247314453125, 2.0381622314453125, -2.901611328125, 3.4215011596679688, 9.222030639648438, 7.621219635009766, -0.03367424011230469, 0.6529998779296875, 7.654888153076172, -0.7309684753417969, 1.8430023193359375, 4.402751922607422, 1.0525360107421875, 2.144317626953125, 4.0296630859375, 4.649192810058594, 1.7369422912597656, -0.4545154571533203, 10.272125244140625, 9.43682861328125, -3.67626953125, -3.9521560668945312, 3.1956825256347656, -1.738494873046875, 9.039787292480469, -3.2012939453125, 7.746498107910156, 8.261669158935547, 6.6129150390625, 6.2794952392578125, 0.6704902648925781, 3.9099884033203125, 4.408599853515625, -2.4520339965820312, -0.6611747741699219, 3.2102813720703125, 6.266693115234375, 2.285764694213867, -6.019554138183594, -0.233551025390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000368.npy"}
{"epoch": 0.5563114134542706, "step": 369, "batch_size": 64, "mean": 2.4341330528259277, "std": 3.814312696456909, "min": -6.526159286499023, "p10": -2.858379173278807, "median": 2.433929443359375, "p90": 7.585757446289063, "max": 10.03697395324707, "pos_frac": 0.75, "sample": [5.575374603271484, -4.7432098388671875, 2.8765830993652344, 3.75897216796875, -6.526159286499023, 7.7919464111328125, 7.3061981201171875, 1.7905197143554688, 2.3579788208007812, 3.6091651916503906, 1.5342330932617188, 7.6496429443359375, 5.132102966308594, 9.438858032226562, 6.187736511230469, 1.22027587890625, 2.7885208129882812, 2.6737213134765625, 3.1194000244140625, 1.5746116638183594, 1.3271636962890625, -5.465068817138672, 1.2915573120117188, -5.228113174438477, 2.4945144653320312, 5.6851654052734375, -0.9300785064697266, 4.318077087402344, 4.703289031982422, -1.4591922760009766, 8.833908081054688, 1.0375900268554688, 2.5839576721191406, 6.784149169921875, 5.301124572753906, 8.274879455566406, 10.03697395324707, -0.27890777587890625, 1.9398040771484375, -4.15509033203125, 1.858734130859375, 1.8028793334960938, -0.5398273468017578, 2.44732666015625, -0.09117507934570312, 0.107147216796875, 6.744384765625, 3.4645462036132812, 4.882598876953125, 0.49576568603515625, 1.1470870971679688, 2.4205322265625, -0.7255973815917969, 7.4366912841796875, -0.27188873291015625, -1.2071113586425781, 0.39240074157714844, 8.209793090820312, -0.02822113037109375, 3.869537353515625, -3.4580307006835938, 5.029205322265625, -4.70726203918457, 4.292825698852539], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000369.npy"}
{"epoch": 0.5578231292517006, "step": 370, "batch_size": 64, "mean": 3.222743034362793, "std": 4.925309658050537, "min": -9.765764236450195, "p10": -2.3300312042236326, "median": 2.726205825805664, "p90": 8.992859649658204, "max": 14.702781677246094, "pos_frac": 0.75, "sample": [1.8430156707763672, 10.489898681640625, 4.7584075927734375, 2.2894210815429688, 8.717117309570312, 1.3855552673339844, 12.359283447265625, 2.8147811889648438, 14.702781677246094, 1.6453208923339844, 0.6802215576171875, -1.9853363037109375, 0.10967636108398438, 8.51898193359375, 13.323684692382812, 2.049114227294922, 2.0789947509765625, -3.2118453979492188, -5.0882568359375, 5.813323974609375, 5.49676513671875, 5.4690704345703125, 3.4005889892578125, 6.32489013671875, 7.089015960693359, 2.565032958984375, 4.218997955322266, -0.3917198181152344, 2.0223541259765625, -1.33636474609375, -1.4619770050048828, -0.1041259765625, 0.05810546875, -5.3754119873046875, -0.7946853637695312, 7.387931823730469, 11.472076416015625, 3.7730636596679688, 10.053241729736328, 4.9197540283203125, 8.230209350585938, -8.750442504882812, 4.979955673217773, 4.615386962890625, 1.3733692169189453, 8.991134643554688, 5.5778656005859375, 6.098653793334961, 6.605415344238281, 5.9266357421875, -9.765764236450195, 2.607440948486328, 8.993598937988281, -2.4413833618164062, -0.7921218872070312, 1.7289657592773438, 2.8951873779296875, -0.093505859375, -2.070209503173828, 0.03539848327636719, 5.01324462890625, 8.431182861328125, -2.653034210205078, 2.6376304626464844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000370.npy"}
{"epoch": 0.5593348450491308, "step": 371, "batch_size": 64, "mean": 2.909559488296509, "std": 4.642667770385742, "min": -8.259098052978516, "p10": -2.874131393432617, "median": 2.92995548248291, "p90": 8.219552612304689, "max": 13.045904159545898, "pos_frac": 0.703125, "sample": [7.743888854980469, -8.259098052978516, 11.407562255859375, -0.1976470947265625, 7.685863494873047, -2.30120849609375, 7.718132019042969, -1.0961475372314453, 0.33905029296875, 1.718832015991211, -0.8240966796875, 5.174835205078125, -3.9960784912109375, 13.045904159545898, 0.2953624725341797, 6.706718444824219, 0.12372779846191406, 7.158905029296875, 4.579164505004883, 12.536491394042969, 2.0363616943359375, -1.932647705078125, 0.00701141357421875, 0.7967720031738281, 4.841522216796875, 8.932947158813477, 5.21990966796875, 12.515487670898438, 6.744903564453125, 6.341119766235352, 6.698577880859375, 4.757972717285156, -4.8079833984375, -0.2701568603515625, 2.8581981658935547, 1.9479141235351562, -0.37744903564453125, -4.071653366088867, -1.094696044921875, 8.423408508300781, -2.9566078186035156, 6.499847412109375, -0.07662010192871094, 2.685749053955078, 4.356563568115234, -2.6816864013671875, 3.304574966430664, 2.01458740234375, 0.7424087524414062, 3.583484649658203, 3.0017127990722656, -3.555154800415039, -0.22099685668945312, 5.0084381103515625, 11.477676391601562, 0.18773460388183594, 5.7304229736328125, 3.1498489379882812, 6.3466796875, 3.2492141723632812, 5.37115478515625, 6.205078125, -4.0048370361328125, -2.3351478576660156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000371.npy"}
{"epoch": 0.5608465608465608, "step": 372, "batch_size": 64, "mean": 2.102135181427002, "std": 4.8596367835998535, "min": -8.674156188964844, "p10": -4.627227210998535, "median": 2.468341827392578, "p90": 8.207725524902346, "max": 13.952041625976562, "pos_frac": 0.703125, "sample": [-2.0855636596679688, 3.3733673095703125, 4.809120178222656, 0.579498291015625, 3.5824737548828125, 3.227935791015625, 8.329421997070312, -1.514129638671875, 8.443717956542969, 4.468635559082031, 9.376983642578125, 0.5006217956542969, 2.9558181762695312, 12.664009094238281, 3.8476181030273438, 1.7739028930664062, 5.960548400878906, -7.695808410644531, 2.0005874633789062, 6.8883209228515625, 0.6256580352783203, 0.6731090545654297, 2.762542724609375, 9.24655532836914, -0.19910430908203125, -2.261077880859375, -5.967567443847656, 4.5207672119140625, 13.952041625976562, -1.8160152435302734, 5.604820251464844, 6.6785125732421875, 0.2667961120605469, -1.5308609008789062, -4.408208847045898, 4.178308486938477, -6.265323638916016, 2.3734817504882812, 5.8485870361328125, 2.6597728729248047, 7.92376708984375, 3.6907386779785156, -1.240692138671875, 6.255989074707031, -1.5435867309570312, 4.2288818359375, -8.674156188964844, 0.6890792846679688, 0.5191669464111328, 3.6460952758789062, 0.9368705749511719, -6.8939666748046875, 4.771461486816406, 0.9306697845458984, -1.4569244384765625, -4.721092224121094, -5.940013885498047, 2.6431427001953125, 4.893886566162109, 13.739315032958984, -0.6590366363525391, -2.0596923828125, 1.8636856079101562, 2.563201904296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000372.npy"}
{"epoch": 0.562358276643991, "step": 373, "batch_size": 64, "mean": 2.5857386589050293, "std": 4.668900966644287, "min": -6.075927734375, "p10": -4.178062438964843, "median": 2.454318046569824, "p90": 8.076934814453125, "max": 14.104133605957031, "pos_frac": 0.75, "sample": [-4.591644287109375, 7.513206481933594, 9.027114868164062, 2.562591552734375, 6.377342224121094, -4.520744323730469, -0.3742218017578125, 7.460384368896484, 1.3035316467285156, 2.3831100463867188, 6.734457015991211, -5.653358459472656, 3.9439544677734375, -2.5096588134765625, -5.69139289855957, -1.3434333801269531, 2.2190017700195312, 2.6150665283203125, -6.0353546142578125, -0.5982322692871094, 3.7293624877929688, 0.27374839782714844, 6.701423645019531, -2.3687057495117188, 3.2028045654296875, 8.059738159179688, 0.1347503662109375, 0.009521484375, 5.970924377441406, 3.3433895111083984, 11.012990951538086, 2.5791854858398438, 0.6801910400390625, 8.084304809570312, -3.3660812377929688, 0.1746959686279297, -2.0904388427734375, 2.02874755859375, -0.066314697265625, 7.009014129638672, 4.620796203613281, -6.075927734375, -3.3784713745117188, 1.8545341491699219, 0.486236572265625, 4.296548843383789, -4.708467483520508, 8.315677642822266, 2.5255260467529297, 1.4041748046875, 1.3088455200195312, 10.795928955078125, 1.8954200744628906, 14.104133605957031, 2.6109580993652344, 7.505306243896484, 5.1939697265625, 5.9825592041015625, 1.6680335998535156, 13.219528198242188, 2.852415084838867, 1.7688980102539062, 3.3208885192871094, 7.9947967529296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000373.npy"}
{"epoch": 0.563869992441421, "step": 374, "batch_size": 64, "mean": 3.7042269706726074, "std": 3.697435140609741, "min": -3.78582763671875, "p10": -0.6835647583007812, "median": 3.9014663696289062, "p90": 8.878865432739259, "max": 10.737388610839844, "pos_frac": 0.796875, "sample": [9.73370361328125, 3.8373870849609375, 5.2883758544921875, -1.055755615234375, 1.1554069519042969, 6.300397872924805, 8.55258560180664, 6.679328918457031, 10.737388610839844, 1.9655513763427734, 9.045713424682617, -0.6898040771484375, 3.3648529052734375, 9.018699645996094, 5.552223205566406, -0.264007568359375, 6.629798889160156, 0.9743881225585938, -0.1389923095703125, 0.9798736572265625, 3.0203475952148438, 5.05955696105957, 1.4933090209960938, -0.66900634765625, 2.3714828491210938, 5.413330078125, 1.0585365295410156, 7.462089538574219, 0.4828910827636719, 4.550254821777344, 2.273040771484375, 6.642356872558594, 1.709096908569336, -0.4447956085205078, 3.6583709716796875, 6.950775146484375, 4.170753479003906, -1.8147735595703125, 6.0321807861328125, 5.753227233886719, 4.240879058837891, -1.914113998413086, -0.13051795959472656, 10.606460571289062, 7.178192138671875, 1.0924301147460938, -2.065673828125, 3.965545654296875, 1.765340805053711, 6.428749084472656, 10.233917236328125, 7.32037353515625, 10.421524047851562, 1.9534683227539062, 4.517810821533203, 0.8284015655517578, 8.007011413574219, 5.1753082275390625, 4.138252258300781, 7.396800994873047, -3.207632064819336, 0.079010009765625, -0.01532745361328125, -3.78582763671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000374.npy"}
{"epoch": 0.5653817082388511, "step": 375, "batch_size": 64, "mean": 2.133056640625, "std": 4.124659538269043, "min": -7.446405410766602, "p10": -3.09610595703125, "median": 2.1539764404296875, "p90": 6.780254364013673, "max": 12.912696838378906, "pos_frac": 0.671875, "sample": [-0.9080734252929688, -4.211212158203125, -0.17479705810546875, 3.029520034790039, 4.945272445678711, 1.7357025146484375, 7.641746520996094, 2.2173233032226562, -0.821014404296875, 3.7047061920166016, -0.8320655822753906, 6.175537109375, -0.68951416015625, 8.962394714355469, 3.0668163299560547, -2.354534149169922, 3.051860809326172, -1.0533294677734375, 3.180511474609375, -1.5382003784179688, 2.5542526245117188, 6.0482940673828125, 6.429084777832031, -3.4834136962890625, 3.532947540283203, 7.565093994140625, 0.2782478332519531, -2.155853271484375, 1.8042678833007812, 4.674003601074219, 1.1714935302734375, 0.16973304748535156, -1.2621879577636719, -4.895729064941406, 0.374969482421875, 2.262937545776367, 2.0906295776367188, 1.9470367431640625, 12.912696838378906, -0.7208633422851562, 6.47216796875, 4.949268341064453, 8.292049407958984, -7.446405410766602, 1.4552230834960938, -0.0369415283203125, 11.49365234375, 2.4165191650390625, 5.614410400390625, 3.027141571044922, 5.342681884765625, 6.8504180908203125, -3.606231689453125, 4.797740936279297, 2.0892105102539062, 1.7803421020507812, 3.7714767456054688, -6.67656135559082, -2.8584823608398438, 5.702423095703125, -2.6642189025878906, 6.616539001464844, 5.904838562011719, -3.1979446411132812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000375.npy"}
{"epoch": 0.5668934240362812, "step": 376, "batch_size": 64, "mean": 3.8644466400146484, "std": 4.819431304931641, "min": -5.828067779541016, "p10": -2.2236999511718745, "median": 3.2902183532714844, "p90": 10.86663227081299, "max": 16.804107666015625, "pos_frac": 0.796875, "sample": [2.2511978149414062, 1.8957672119140625, 2.0484561920166016, 4.4415740966796875, 3.71875, -2.6494197845458984, -1.6694793701171875, 8.902435302734375, 3.3905029296875, 4.06805419921875, 7.211395263671875, 5.563764572143555, -5.828067779541016, -0.8980503082275391, -2.3983917236328125, 0.3542613983154297, 11.431900024414062, 6.516410827636719, 4.5259552001953125, 16.804107666015625, 3.8913116455078125, 8.453529357910156, 0.5832138061523438, 7.401092529296875, 10.377105712890625, -1.8160858154296875, 11.331306457519531, 8.673555374145508, 4.9093170166015625, 6.565422058105469, 5.012506484985352, 2.974466323852539, 12.868911743164062, 7.4917449951171875, 3.081186294555664, 7.6864471435546875, 0.87786865234375, -1.3105354309082031, 11.990486145019531, -4.820793151855469, 2.5970687866210938, -2.526662826538086, 7.168304443359375, 1.7188873291015625, 2.6656856536865234, 10.107696533203125, -1.5604839324951172, 3.9871673583984375, -0.861785888671875, 6.415369033813477, -5.292388916015625, 4.801691055297852, 3.1899337768554688, 3.0199317932128906, 12.462644577026367, 0.7870578765869141, 1.9929656982421875, 1.9802284240722656, 11.07642936706543, 6.686065673828125, 0.7846298217773438, 2.7328624725341797, 1.1973285675048828, -3.709228515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000376.npy"}
{"epoch": 0.5684051398337112, "step": 377, "batch_size": 64, "mean": 2.6792593002319336, "std": 3.8830175399780273, "min": -5.651084899902344, "p10": -1.708763313293457, "median": 2.9099502563476562, "p90": 7.970930290222169, "max": 12.16717529296875, "pos_frac": 0.71875, "sample": [4.987743377685547, -3.0013809204101562, 5.818458557128906, 5.268856048583984, 1.8918304443359375, 0.09683990478515625, -5.651084899902344, -0.8591995239257812, 2.6892852783203125, 5.792112350463867, 8.625205993652344, -0.5180282592773438, 8.868949890136719, 0.08010101318359375, 5.154541015625, 3.5994644165039062, -0.817047119140625, 4.3081207275390625, -1.8190364837646484, 0.8928318023681641, 8.081127166748047, 0.22076416015625, 5.080585479736328, 6.498863220214844, 3.647024154663086, -4.949211120605469, 3.8592529296875, 9.186073303222656, 1.8570747375488281, 1.2005558013916016, 5.405265808105469, -1.416900634765625, 4.335565567016602, 0.3432159423828125, 2.003753662109375, -1.5453681945800781, 6.5523681640625, 7.699708938598633, 3.465606689453125, 4.299888610839844, -1.7213706970214844, 8.989381790161133, 2.2532272338867188, 2.9197120666503906, 3.8519287109375, 10.261009216308594, 3.4885501861572266, -0.855255126953125, -2.1008148193359375, 3.344057083129883, 4.53729248046875, -0.5703201293945312, -0.05963134765625, 12.16717529296875, -1.0862655639648438, -1.6793460845947266, 7.713804244995117, 2.1650829315185547, -5.570852279663086, 1.2466812133789062, 4.94236946105957, -1.4207000732421875, 4.5229034423828125, 2.900188446044922], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000377.npy"}
{"epoch": 0.5699168556311414, "step": 378, "batch_size": 64, "mean": 2.506265163421631, "std": 4.324029922485352, "min": -7.410343170166016, "p10": -1.5727760314941406, "median": 1.6984367370605469, "p90": 9.087373352050783, "max": 14.3897705078125, "pos_frac": 0.71875, "sample": [3.80535888671875, 0.82232666015625, 2.556436538696289, 1.5594558715820312, 6.363792419433594, 5.991584777832031, -3.5290374755859375, -0.6101226806640625, 1.8374176025390625, 10.293304443359375, -7.410343170166016, 2.1218910217285156, 11.570404052734375, 2.006105422973633, 6.911933898925781, 5.3406524658203125, 1.1388931274414062, 3.096128463745117, 10.686004638671875, 0.027652740478515625, 5.589225769042969, 3.21875, -0.2522087097167969, 2.2596664428710938, 10.044307708740234, 0.7587738037109375, 1.1369476318359375, 6.08941650390625, -3.67254638671875, 0.3155517578125, 5.3411865234375, 1.4320526123046875, -3.1332931518554688, 1.9847087860107422, 7.006866455078125, 0.2907142639160156, -0.20386505126953125, -0.03078460693359375, 5.49658203125, -1.6315231323242188, 14.3897705078125, 1.8506622314453125, 1.2702407836914062, 9.246391296386719, 2.003509521484375, 4.1168060302734375, -1.1383781433105469, 10.094121932983398, 3.5337257385253906, 1.379720687866211, 0.8344764709472656, -1.1190299987792969, 1.3852806091308594, -6.025398254394531, -0.7804908752441406, 7.1422271728515625, -1.2955589294433594, -1.435699462890625, 1.4230175018310547, 4.281837463378906, -0.6422767639160156, -4.6445465087890625, -0.8061466217041016, 8.716331481933594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000378.npy"}
{"epoch": 0.5714285714285714, "step": 379, "batch_size": 64, "mean": 3.2557549476623535, "std": 4.202541351318359, "min": -5.337772369384766, "p10": -1.3688507080078123, "median": 2.933384895324707, "p90": 8.177461242675783, "max": 13.85968017578125, "pos_frac": 0.796875, "sample": [3.322877883911133, -1.155548095703125, 8.305717468261719, -2.701446533203125, 4.555816650390625, 3.9557037353515625, 4.354103088378906, -4.664999008178711, 0.8331947326660156, 3.204296112060547, -0.25940704345703125, 0.7095947265625, -2.6305084228515625, 5.381187438964844, -0.372222900390625, 13.85968017578125, 5.219940185546875, -0.9191436767578125, 7.2896575927734375, 7.216468811035156, 11.98455810546875, 7.0384063720703125, 8.407344818115234, 7.785247802734375, 0.077880859375, -1.46026611328125, 6.3274993896484375, -1.7053680419921875, 7.690620422363281, -4.411834716796875, 11.075767517089844, 4.747980117797852, 2.693796157836914, 1.372314453125, 1.20361328125, 5.1602020263671875, 6.342317581176758, 1.221038818359375, 0.83245849609375, -5.337772369384766, -0.381378173828125, 4.6269378662109375, 1.3874435424804688, 10.711732864379883, 5.973354339599609, 0.002201080322265625, 13.178325653076172, 2.1879119873046875, 1.3455123901367188, 1.9155120849609375, 7.878196716308594, -0.755584716796875, 0.82379150390625, 3.251302719116211, 5.284685134887695, 0.4143524169921875, 2.554004669189453, 5.494724273681641, 0.72705078125, 0.36865997314453125, 3.1729736328125, 6.293815612792969, 4.58441162109375, 0.777618408203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000379.npy"}
{"epoch": 0.5729402872260015, "step": 380, "batch_size": 64, "mean": 3.680061101913452, "std": 4.5199503898620605, "min": -8.326065063476562, "p10": -2.0174823760986325, "median": 3.3715972900390625, "p90": 8.148111343383789, "max": 14.052726745605469, "pos_frac": 0.78125, "sample": [3.2323360443115234, 7.9648590087890625, 3.0043869018554688, 3.0842666625976562, -3.028430938720703, -3.906005859375, 7.497539520263672, 8.055717468261719, -2.0786705017089844, 6.675178527832031, 3.863269805908203, -0.6313972473144531, -0.7951221466064453, 8.099178314208984, 2.4380950927734375, -1.8747100830078125, 0.4511985778808594, 3.1593093872070312, 7.230079650878906, 7.912443161010742, 2.6447906494140625, 5.661003112792969, -1.58831787109375, -8.326065063476562, 11.1136474609375, 3.1099700927734375, 6.630241394042969, 8.169082641601562, -0.2581501007080078, 2.0175628662109375, 5.334144592285156, 6.708282470703125, 3.358367919921875, 2.7462806701660156, 11.843505859375, 1.5655174255371094, 9.66046142578125, 5.5628204345703125, -1.036468505859375, 1.1057281494140625, 5.850948333740234, -7.529308319091797, 11.449384689331055, 11.827774047851562, 4.5797119140625, 5.384372711181641, 7.595607757568359, 2.6378326416015625, -0.6467437744140625, 6.3665924072265625, 2.2125015258789062, 1.6683979034423828, -3.177154541015625, 14.052726745605469, 7.279998779296875, 5.190944671630859, 1.6751346588134766, 3.9779510498046875, 6.618997573852539, 5.1190032958984375, 3.38482666015625, 5.697353363037109, 0.28850555419921875, -2.3573760986328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000380.npy"}
{"epoch": 0.5744520030234316, "step": 381, "batch_size": 64, "mean": 1.5775420665740967, "std": 4.016316890716553, "min": -11.508102416992188, "p10": -2.1425586700439454, "median": 1.0767860412597656, "p90": 6.280631256103516, "max": 12.08990478515625, "pos_frac": 0.640625, "sample": [-1.501312255859375, 1.030242919921875, 2.6819190979003906, -4.462900161743164, -11.508102416992188, 1.1758003234863281, 0.5083808898925781, 2.255931854248047, 11.732891082763672, 3.8438949584960938, 4.6674652099609375, -2.082365036010742, 0.6489372253417969, 3.7764625549316406, -2.038145065307617, 12.08990478515625, 5.3753509521484375, 0.7890739440917969, 0.9112548828125, -0.7411155700683594, -0.1882648468017578, -1.9631271362304688, 2.8491344451904297, 2.960418701171875, -3.3660240173339844, 3.5751419067382812, -2.446563720703125, -3.4598121643066406, 4.201602935791016, 0.19892120361328125, 3.798248291015625, 2.7234039306640625, 2.5455055236816406, 0.5243072509765625, -1.50421142578125, 6.101268768310547, 2.903423309326172, -0.4324455261230469, -2.168355941772461, 2.8607177734375, 6.594482421875, -0.750335693359375, 4.222164154052734, 1.1233291625976562, 2.6281814575195312, 10.080436706542969, 6.290483474731445, 4.447456359863281, -0.40024566650390625, -1.3533287048339844, 1.7848358154296875, 6.25764274597168, 4.056610107421875, -1.81524658203125, 0.5171985626220703, -0.6445541381835938, 7.5155029296875, -6.17816162109375, -1.0992774963378906, 1.5821666717529297, 0.3201179504394531, -1.4558792114257812, -0.6128463745117188, 8.985103607177734], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000381.npy"}
{"epoch": 0.5759637188208617, "step": 382, "batch_size": 64, "mean": 2.53261137008667, "std": 3.7170181274414062, "min": -5.014291763305664, "p10": -2.011108016967773, "median": 2.6921615600585938, "p90": 6.35545883178711, "max": 13.777278900146484, "pos_frac": 0.703125, "sample": [-0.9911956787109375, -0.11561775207519531, 7.9620208740234375, 5.878963470458984, 5.915031433105469, -2.10809326171875, 1.000762939453125, 0.8311386108398438, 4.778511047363281, 4.78959846496582, 2.2703857421875, 1.1458663940429688, -1.2322502136230469, 5.260353088378906, 4.182914733886719, -1.1622390747070312, -2.438343048095703, 3.2781219482421875, -5.014291763305664, 3.3770217895507812, -0.7397174835205078, 3.1143417358398438, 4.8682708740234375, -1.121490478515625, -0.606109619140625, 3.0260162353515625, 4.14668083190918, 1.104461669921875, -0.5270767211914062, 13.777278900146484, 3.8475341796875, 2.909698486328125, -3.3263092041015625, -3.0417404174804688, 5.525779724121094, 0.23279953002929688, -2.590076446533203, 0.24964141845703125, 2.4097137451171875, 5.24732780456543, 0.75634765625, 12.79425048828125, 1.5119476318359375, 5.5891571044921875, 1.0346450805664062, 6.277854919433594, -1.7848091125488281, -1.0758590698242188, -0.4211883544921875, 6.378303527832031, -0.4368896484375, 6.302154541015625, -2.2567176818847656, 2.660503387451172, 3.699848175048828, 0.8531951904296875, 6.1965789794921875, 3.441631317138672, 7.459430694580078, 9.722373962402344, 2.7238197326660156, 6.4905853271484375, 3.232318878173828, 4.821952819824219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000382.npy"}
{"epoch": 0.5774754346182918, "step": 383, "batch_size": 64, "mean": 3.2536959648132324, "std": 4.672168731689453, "min": -7.733917236328125, "p10": -1.9671905517578123, "median": 2.796661376953125, "p90": 9.847383117675783, "max": 15.52423095703125, "pos_frac": 0.734375, "sample": [0.06211090087890625, 6.0983734130859375, 2.596771240234375, 1.7508392333984375, 2.6554183959960938, -1.8106689453125, 6.083183288574219, 3.1730117797851562, -1.0411300659179688, -4.113367080688477, 1.7882423400878906, -0.07219696044921875, 2.6542739868164062, 4.067987442016602, 7.116434097290039, 0.176971435546875, 6.105175018310547, -1.6557388305664062, 10.465179443359375, 4.260669708251953, 3.1781158447265625, -2.034271240234375, 8.661514282226562, 6.075653076171875, 9.667840957641602, -1.4151611328125, 4.3657989501953125, 0.9465789794921875, 4.0284576416015625, 10.733840942382812, 9.92432975769043, -2.0697555541992188, 0.079986572265625, 15.52423095703125, -0.400299072265625, 3.5406494140625, 8.796127319335938, -7.733917236328125, 2.9379043579101562, 3.8113327026367188, -2.1194534301757812, 1.8331375122070312, 3.1567535400390625, 2.2122726440429688, 5.010704040527344, 0.2902717590332031, 0.8144798278808594, 3.1087570190429688, -2.45867919921875, 10.190086364746094, -1.003072738647461, 13.641510009765625, 8.006614685058594, 0.5399398803710938, -0.9126129150390625, 9.539047241210938, 10.0394287109375, 2.14715576171875, -1.0966339111328125, -4.906536102294922, 7.849031448364258, -0.8419647216796875, 9.526145935058594, 4.689657211303711], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000383.npy"}
{"epoch": 0.5789871504157218, "step": 384, "batch_size": 64, "mean": 2.899064064025879, "std": 3.86751127243042, "min": -7.194976806640625, "p10": -1.4568908691406248, "median": 3.097949981689453, "p90": 7.426341629028321, "max": 13.118728637695312, "pos_frac": 0.796875, "sample": [-1.50634765625, 6.66912841796875, 2.586101531982422, 1.1803436279296875, -0.3599281311035156, 7.295078277587891, 3.1111679077148438, 5.196693420410156, 5.123874664306641, -0.37494659423828125, 4.2135467529296875, 3.6796722412109375, 0.7179718017578125, 2.580047607421875, -2.003509521484375, 1.7347259521484375, 0.6413497924804688, 11.597984313964844, -1.2166748046875, 7.880306243896484, -1.34149169921875, 4.34881591796875, 2.809040069580078, -5.124908447265625, -2.068014144897461, 5.023994445800781, 12.664405822753906, 3.9117050170898438, -2.2881717681884766, 3.0847320556640625, 9.906660079956055, 4.105522155761719, 0.06321907043457031, 2.1496353149414062, 4.431571960449219, 0.8088912963867188, 13.118728637695312, 1.7318572998046875, -7.194976806640625, 7.482597351074219, 1.738739013671875, 4.6075439453125, 7.843467712402344, -4.189933776855469, 6.901557922363281, 6.628303527832031, 3.963916778564453, 0.7511272430419922, 1.4780616760253906, 3.546205520629883, 3.5772857666015625, 1.0546836853027344, 3.2813549041748047, 3.125, 1.1748046875, -0.2818717956542969, 0.006366729736328125, 5.061958312988281, -1.012664794921875, 1.3715248107910156, 6.701629638671875, 3.572906494140625, 4.376995086669922, 3.8907470703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000384.npy"}
{"epoch": 0.5804988662131519, "step": 385, "batch_size": 64, "mean": 3.1745595932006836, "std": 3.6281042098999023, "min": -4.796993255615234, "p10": -0.9703273773193359, "median": 3.073838233947754, "p90": 7.899433898925784, "max": 13.370532989501953, "pos_frac": 0.78125, "sample": [-0.8633956909179688, 0.09442520141601562, 5.691509246826172, 4.622802734375, 3.7026748657226562, 3.8493804931640625, 1.8911590576171875, 4.159343719482422, 1.5035171508789062, 2.0439682006835938, -2.8562889099121094, 3.006317138671875, 3.1691818237304688, 5.193855285644531, 3.7394866943359375, 6.247592926025391, 8.972888946533203, 9.610603332519531, 0.08393669128417969, 2.38934326171875, -0.08318328857421875, -4.0239715576171875, 1.6877899169921875, 6.424034118652344, 8.658946990966797, 6.0929412841796875, 5.042154312133789, 2.777496337890625, 1.3751068115234375, 4.518314361572266, 2.8957595825195312, -4.796993255615234, 6.0313262939453125, 0.16794586181640625, 3.457714080810547, 6.891380310058594, 3.0736618041992188, 3.5808486938476562, 3.334627151489258, 4.05769157409668, -1.5755863189697266, -0.5663909912109375, 5.2142486572265625, 7.133277893066406, -0.9910011291503906, 10.991470336914062, 5.721893310546875, -1.854156494140625, -1.39788818359375, -0.922088623046875, 2.1926116943359375, 3.074014663696289, -0.392242431640625, 2.5772247314453125, 2.2196044921875, 7.181724548339844, 0.3173179626464844, 5.903831481933594, -0.7798995971679688, 13.370532989501953, -0.5191841125488281, 1.5567398071289062, 9.092845916748047, 8.207023620605469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000385.npy"}
{"epoch": 0.582010582010582, "step": 386, "batch_size": 64, "mean": 2.5815157890319824, "std": 3.8417160511016846, "min": -6.7993011474609375, "p10": -1.6614566802978517, "median": 2.190847396850586, "p90": 7.5701736450195325, "max": 11.4862060546875, "pos_frac": 0.734375, "sample": [11.4862060546875, -1.1455421447753906, -0.9927444458007812, 4.130596160888672, 6.8232269287109375, 2.156463623046875, -0.1496429443359375, 6.649444580078125, 2.2126846313476562, 3.8676071166992188, 0.5137786865234375, -2.1658477783203125, -4.952470779418945, 1.073333740234375, -0.9737091064453125, 8.90264892578125, 4.5617218017578125, 9.564800262451172, 5.798606872558594, 2.1747589111328125, 4.1929931640625, 5.236663818359375, 3.3584976196289062, -1.6693038940429688, -1.2244873046875, 4.124046325683594, 9.297380447387695, 0.0186309814453125, 4.514930725097656, 1.5310745239257812, 3.2238922119140625, 5.940399169921875, -1.0081863403320312, 1.9344482421875, -4.460552215576172, -0.13217926025390625, 1.6469078063964844, 0.8059158325195312, 2.8219547271728516, 7.2863006591796875, 2.696136474609375, -2.0764236450195312, 3.5562877655029297, 6.6823883056640625, 8.231391906738281, -1.6431465148925781, -0.1000823974609375, 2.709686279296875, -0.36775970458984375, 10.445953369140625, 0.79705810546875, 0.4644775390625, -4.103668212890625, 2.7975006103515625, 7.69183349609375, 1.9755325317382812, 1.7992706298828125, 2.2069358825683594, 6.0231170654296875, 7.180564880371094, 5.6282501220703125, 1.4508781433105469, 0.9948806762695312, -6.7993011474609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000386.npy"}
{"epoch": 0.5835222978080121, "step": 387, "batch_size": 64, "mean": 3.56048583984375, "std": 3.5284054279327393, "min": -5.597038269042969, "p10": -0.49422416687011717, "median": 3.021709442138672, "p90": 8.02972640991211, "max": 14.528823852539062, "pos_frac": 0.84375, "sample": [6.2218475341796875, 1.1233367919921875, 7.755119323730469, -0.5082511901855469, 1.75555419921875, 8.147415161132812, 4.44873046875, 1.0300331115722656, 3.734447479248047, 14.528823852539062, 2.750120162963867, -0.5259494781494141, -0.009792327880859375, 2.6042861938476562, 7.716732025146484, 1.8102836608886719, 4.16802978515625, 8.244049072265625, 4.631122589111328, 2.486846923828125, 6.09912109375, 6.065845489501953, 6.069488525390625, -0.46149444580078125, -1.7260322570800781, 4.819427490234375, 1.7438812255859375, -0.18609237670898438, 7.029964447021484, 2.3930931091308594, 1.9476890563964844, 1.8285064697265625, 0.20574188232421875, 9.41868782043457, 0.6353302001953125, 5.285259246826172, 2.6897315979003906, 0.5730171203613281, 2.649332046508789, -1.393890380859375, 1.2294921875, 5.427337646484375, 6.236354827880859, -5.597038269042969, -1.0839576721191406, 4.846532821655273, 5.160133361816406, 10.989906311035156, 1.045583724975586, 7.407016754150391, -1.9609031677246094, 6.070228576660156, 3.538726806640625, 4.3433990478515625, 0.28926849365234375, 3.3198890686035156, 2.91009521484375, 3.0019702911376953, 3.0795745849609375, 8.5084228515625, 10.208282470703125, 2.024007797241211, 6.0359344482421875, 3.0414485931396484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000387.npy"}
{"epoch": 0.5850340136054422, "step": 388, "batch_size": 64, "mean": 2.6280932426452637, "std": 4.461057186126709, "min": -5.7178192138671875, "p10": -2.2584367752075196, "median": 1.8002815246582031, "p90": 8.573785400390626, "max": 14.038970947265625, "pos_frac": 0.65625, "sample": [6.401084899902344, 11.362846374511719, 6.8912353515625, 12.874507904052734, 1.2677841186523438, 4.087860107421875, 6.198493957519531, 1.7959442138671875, -2.2946853637695312, -1.3187179565429688, 4.448150634765625, 4.752754211425781, -1.3473129272460938, 2.96319580078125, -4.0970001220703125, 14.038970947265625, -1.8609352111816406, 10.180206298828125, 0.5635471343994141, 4.7188262939453125, 7.23486328125, 1.418060302734375, 6.20136833190918, -0.02768707275390625, 7.836612701416016, -5.7178192138671875, 0.7765045166015625, 4.054252624511719, 0.24834632873535156, 1.4699649810791016, -1.6135730743408203, -2.173856735229492, -0.6606769561767578, 10.048593521118164, -0.6000118255615234, 3.7956390380859375, 1.4464263916015625, -3.403594970703125, 4.467121124267578, -2.594196319580078, 6.451698303222656, 8.105224609375, -1.1316452026367188, 7.62347412109375, 6.800359725952148, 2.7062225341796875, -1.180999755859375, 2.0810394287109375, 3.733367919921875, 9.663511276245117, 1.6214370727539062, 8.77459716796875, -0.6545562744140625, 0.0772705078125, -5.3341827392578125, -3.1439361572265625, 5.1620941162109375, 2.6482391357421875, -0.9925994873046875, -0.37364959716796875, 1.8046188354492188, -0.8041610717773438, -1.6541900634765625, 2.3816471099853516], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000388.npy"}
{"epoch": 0.5865457294028723, "step": 389, "batch_size": 64, "mean": 3.223116874694824, "std": 4.283161163330078, "min": -8.859542846679688, "p10": -1.7364627838134763, "median": 3.3747501373291016, "p90": 8.34141082763672, "max": 12.22137451171875, "pos_frac": 0.765625, "sample": [-2.1964492797851562, -2.4181957244873047, 4.559453964233398, 3.687408447265625, 1.9814834594726562, 3.2203750610351562, 6.572120666503906, 8.279769897460938, 10.571212768554688, 8.367828369140625, 0.040771484375, -3.2595367431640625, 5.760307312011719, 5.072954177856445, 4.369270324707031, 5.794338226318359, 4.236976623535156, 6.2501220703125, 4.351837158203125, 2.4165992736816406, 6.705482482910156, 5.23431396484375, 1.0672607421875, 4.436820983886719, 7.5713958740234375, -0.7890510559082031, 4.790214538574219, -0.9140243530273438, 3.64532470703125, 4.48399543762207, -1.392852783203125, 3.516803741455078, -1.0533981323242188, 10.805511474609375, 9.634033203125, 3.232696533203125, 11.652084350585938, 2.8295974731445312, -1.8837242126464844, -8.029764175415039, 7.278045654296875, -1.0577507019042969, 1.4209442138671875, -0.9601287841796875, 5.219270706176758, 0.029449462890625, 5.987236022949219, -0.11502265930175781, -8.859542846679688, 3.152008056640625, 1.56201171875, 2.2504844665527344, 7.880828857421875, -0.3018531799316406, -2.1731224060058594, 12.22137451171875, 10.127708435058594, 3.788055419921875, 2.9835891723632812, 7.67694091796875, 0.034587860107421875, 1.9665298461914062, 2.248502731323242, 0.71795654296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000389.npy"}
{"epoch": 0.5880574452003023, "step": 390, "batch_size": 64, "mean": 3.0623459815979004, "std": 3.9995064735412598, "min": -5.4109039306640625, "p10": -1.455955696105957, "median": 2.356048583984375, "p90": 8.550683975219728, "max": 14.088081359863281, "pos_frac": 0.78125, "sample": [-4.290962219238281, 1.5687198638916016, 3.7840728759765625, -0.0272064208984375, 9.09768295288086, 0.5151100158691406, 4.342014312744141, 6.134540557861328, -1.4301834106445312, 4.178184509277344, 0.009273529052734375, -2.2085037231445312, 4.701652526855469, 0.13891029357910156, -1.8106765747070312, 14.088081359863281, 4.7852783203125, -2.370462417602539, 2.8442764282226562, -0.765869140625, 5.13543701171875, 0.348846435546875, 4.117570877075195, 8.781414031982422, 6.120147705078125, 7.24658203125, 1.984649658203125, -1.9252853393554688, -0.7148399353027344, 11.515060424804688, -1.0322341918945312, 2.2527694702148438, 4.69175910949707, 6.630943298339844, -1.467000961303711, 0.1957244873046875, -5.4109039306640625, 5.960906982421875, 10.800765991210938, 0.7293167114257812, 9.734291076660156, 1.8242931365966797, 5.961997985839844, 3.0145530700683594, 1.2641830444335938, 5.280475616455078, 1.6219139099121094, 2.4593276977539062, 8.012313842773438, 2.6717300415039062, 6.382514953613281, 3.8516693115234375, 1.6811752319335938, 6.409616470336914, 0.9950141906738281, 0.7827987670898438, 1.5630626678466797, 0.590972900390625, 0.6855220794677734, -0.7412033081054688, 6.649269104003906, 5.006011962890625, -0.34445953369140625, 11.38754653930664], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000390.npy"}
{"epoch": 0.5895691609977324, "step": 391, "batch_size": 64, "mean": 3.586272954940796, "std": 5.175695896148682, "min": -8.885738372802734, "p10": -3.6483539581298823, "median": 3.3006105422973633, "p90": 9.927363586425782, "max": 16.941268920898438, "pos_frac": 0.75, "sample": [3.5769729614257812, -8.885738372802734, 10.432060241699219, 3.81243896484375, 3.13299560546875, 6.746284484863281, -0.5574531555175781, 7.7008819580078125, 10.236259460449219, 13.561248779296875, 4.4972381591796875, -7.341865539550781, 6.7690277099609375, -1.2908287048339844, 9.696159362792969, 10.373199462890625, -2.0032577514648438, 2.7400665283203125, -0.7695083618164062, 0.219085693359375, 1.46435546875, 5.066734313964844, 4.0085296630859375, 6.415443420410156, 2.6395263671875, 0.3435497283935547, -4.059734344482422, -2.958303451538086, 6.6505126953125, 2.4722747802734375, 16.941268920898438, 6.39654541015625, 3.4191513061523438, -3.0006675720214844, 1.809173583984375, -1.3832721710205078, 1.2849769592285156, 6.426704406738281, 10.396270751953125, 6.554374694824219, 2.612335205078125, 9.243797302246094, -5.126394271850586, -3.925933837890625, 3.9893035888671875, 3.7060089111328125, 9.464736938476562, 2.9141921997070312, 9.369216918945312, 9.517807006835938, -4.6312255859375, -4.020164489746094, 9.210708618164062, 9.338581085205078, 10.026451110839844, 7.673580169677734, 5.2390899658203125, 3.182069778442383, -0.0214996337890625, 1.4113540649414062, -0.5204010009765625, 3.137096405029297, 1.718740463256836, 2.4793319702148438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000391.npy"}
{"epoch": 0.5910808767951625, "step": 392, "batch_size": 64, "mean": 3.9834842681884766, "std": 5.120331287384033, "min": -7.115720748901367, "p10": -2.106660461425781, "median": 3.570394515991211, "p90": 10.881058120727541, "max": 17.52227783203125, "pos_frac": 0.78125, "sample": [-5.226842880249023, -0.0004940032958984375, 4.706085205078125, 4.7704925537109375, 1.7405853271484375, 0.9741840362548828, 11.446945190429688, -0.7059516906738281, 9.210250854492188, 6.991384506225586, 3.07244873046875, -2.189208984375, 5.772712707519531, 7.996189117431641, 10.313186645507812, 4.5477294921875, 0.2679634094238281, 5.966392517089844, 8.960830688476562, 5.346916198730469, 3.6230697631835938, 3.5533447265625, 3.587444305419922, -7.115720748901367, 11.194992065429688, 4.8228607177734375, 11.124431610107422, 3.4421920776367188, 4.701690673828125, -4.938323974609375, 1.84881591796875, 9.950668334960938, 6.559051513671875, 1.304931640625, -2.7648162841796875, 6.303119659423828, 2.5354061126708984, 5.838722229003906, 3.4858551025390625, 9.310943603515625, 15.489143371582031, 13.110877990722656, -1.9140472412109375, 15.2366943359375, 0.29837799072265625, 1.9267520904541016, -0.5547657012939453, 0.0504608154296875, 10.294044494628906, 3.010295867919922, -1.1161575317382812, 17.52227783203125, -0.48645782470703125, 0.05072021484375, -3.2782745361328125, 0.4449291229248047, 6.127288818359375, -2.3834228515625, 1.359039306640625, 6.3684844970703125, 2.8003063201904297, 4.16944694519043, -0.15828704833984375, 4.2447967529296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000392.npy"}
{"epoch": 0.5925925925925926, "step": 393, "batch_size": 64, "mean": 3.5361287593841553, "std": 3.929734706878662, "min": -3.309356689453125, "p10": -1.1018999099731444, "median": 2.7525291442871094, "p90": 9.157720947265627, "max": 11.890625, "pos_frac": 0.8125, "sample": [3.9395599365234375, 0.2555370330810547, 7.622200012207031, 6.837482452392578, 10.505193710327148, 6.263587951660156, 2.6002578735351562, 2.407745361328125, 0.30745697021484375, 1.4408683776855469, -0.6794147491455078, 1.5477676391601562, 11.521392822265625, 5.4199371337890625, 4.43695068359375, -0.5302047729492188, 2.2848739624023438, 6.555915832519531, -0.9336090087890625, 6.866764068603516, 0.5194931030273438, 1.8173274993896484, -1.226409912109375, 11.45745849609375, 1.4984970092773438, 7.7377166748046875, 0.3948097229003906, 9.891830444335938, 5.1988677978515625, 6.252422332763672, 5.739952087402344, -1.1740245819091797, 7.66009521484375, 10.472808837890625, 0.4870338439941406, 4.493171691894531, -1.6835994720458984, 8.746795654296875, 1.4037399291992188, -0.4411430358886719, 3.3568267822265625, 0.844329833984375, 1.7577590942382812, -2.482471466064453, 2.6649093627929688, 3.9267940521240234, 5.1005096435546875, 1.2499008178710938, 0.509796142578125, 9.333831787109375, 8.624744415283203, 11.890625, 4.92201042175293, -3.309356689453125, 2.869173049926758, 6.382972717285156, 1.0693817138671875, -0.9130859375, -1.3486480712890625, -2.9163055419921875, 3.9651947021484375, 7.7897186279296875, 2.84014892578125, 0.2663726806640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000393.npy"}
{"epoch": 0.5941043083900227, "step": 394, "batch_size": 64, "mean": 3.1560916900634766, "std": 3.9600489139556885, "min": -4.455188751220703, "p10": -2.540764427185058, "median": 3.0130958557128906, "p90": 8.582078552246095, "max": 12.287174224853516, "pos_frac": 0.796875, "sample": [-1.9246368408203125, 10.491020202636719, 8.020858764648438, 1.451446533203125, 1.4263877868652344, 4.684547424316406, -0.5185699462890625, 3.0621185302734375, 9.594856262207031, 3.7745819091796875, 11.230926513671875, 4.136234283447266, -0.13557815551757812, -1.0138664245605469, -4.455188751220703, 2.645862579345703, 1.5197219848632812, -3.608675003051758, 1.39385986328125, 2.920856475830078, 3.120166778564453, -2.804819107055664, 3.641490936279297, 6.254463195800781, 5.051309585571289, 12.051368713378906, 5.359407424926758, 6.438941955566406, 2.670034408569336, 0.457305908203125, -3.3652000427246094, 1.1800079345703125, 12.287174224853516, 5.41241455078125, 1.241445541381836, 4.256805419921875, 0.129425048828125, 0.6469268798828125, 8.822601318359375, 1.7914199829101562, 5.7325592041015625, -3.8745384216308594, -0.67718505859375, 3.50909423828125, 4.796567916870117, 11.831336975097656, -3.343130111694336, 6.392955780029297, -0.2592887878417969, 2.6997222900390625, -2.9287757873535156, 2.3821868896484375, 2.9640731811523438, 3.9925308227539062, 3.5509109497070312, 0.6764659881591797, 4.9632568359375, 5.616203308105469, 2.670045852661133, 3.9976348876953125, 1.3319320678710938, 7.028472900390625, 4.5969085693359375, 5.000480651855469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000394.npy"}
{"epoch": 0.5956160241874527, "step": 395, "batch_size": 64, "mean": 2.7706217765808105, "std": 4.100688934326172, "min": -5.518608093261719, "p10": -2.296779441833496, "median": 2.166139602661133, "p90": 8.442936706542971, "max": 11.16567611694336, "pos_frac": 0.75, "sample": [4.971035003662109, 7.19708251953125, 6.7012786865234375, 0.0138702392578125, 6.90118408203125, 6.8653411865234375, -0.30537986755371094, 8.625099182128906, 9.805915832519531, -2.775106430053711, 1.9630584716796875, 5.5768585205078125, 7.5392303466796875, 10.363128662109375, 1.1287574768066406, 11.16567611694336, 2.1057281494140625, 4.396121978759766, -5.23101806640625, 1.9019851684570312, 0.3834190368652344, 1.2855758666992188, 2.2127761840820312, 2.797534942626953, 4.73951530456543, -2.700193405151367, 5.195026397705078, 6.227287292480469, 3.3638248443603516, -1.70684814453125, 3.685333251953125, 9.115142822265625, 1.8184356689453125, -0.4361724853515625, -3.139373779296875, 3.1896209716796875, -5.518608093261719, 0.4500160217285156, 6.593086242675781, -2.364217758178711, -0.921142578125, 0.6009883880615234, 9.779861450195312, 0.6413688659667969, 8.017890930175781, 7.5231170654296875, 1.3779621124267578, 4.363578796386719, -1.14605712890625, -0.8236255645751953, 1.1347579956054688, 10.958709716796875, -0.75250244140625, 2.1195030212402344, 3.1383056640625, 5.1560516357421875, -2.139423370361328, 0.4126625061035156, 3.4011707305908203, -5.260444641113281, 2.731504440307617, -1.6370697021484375, 2.777231216430664, 1.764373779296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000395.npy"}
{"epoch": 0.5971277399848829, "step": 396, "batch_size": 64, "mean": 3.540876865386963, "std": 4.010435581207275, "min": -5.142219543457031, "p10": -1.1237945556640625, "median": 2.958937644958496, "p90": 9.124949836730957, "max": 13.26416015625, "pos_frac": 0.8125, "sample": [10.908203125, 13.26416015625, 5.813240051269531, 12.265960693359375, 8.886331558227539, 10.878702163696289, 3.3589706420898438, 5.153652191162109, 1.8569793701171875, 1.5111885070800781, 4.756690979003906, 0.5932998657226562, 5.7161865234375, -1.419015884399414, 1.988037109375, 0.14788818359375, 6.676177978515625, 6.464092254638672, -0.5190963745117188, 5.162614822387695, 1.0553741455078125, -1.1370925903320312, 12.042549133300781, 3.4761199951171875, 1.4018669128417969, 6.97344970703125, 2.7351036071777344, -5.142219543457031, 6.114898681640625, 1.88555908203125, 2.2133922576904297, -2.5778045654296875, 1.216653823852539, 9.475425720214844, 2.444854736328125, 4.603340148925781, 4.291736602783203, 3.0484161376953125, 8.856475830078125, 8.43438720703125, -1.0927658081054688, 5.041040420532227, 0.09068679809570312, 3.658782958984375, 3.8165969848632812, 1.6652412414550781, 1.8847732543945312, -0.31246185302734375, 5.6626739501953125, -1.7907867431640625, 1.3155136108398438, -3.101055145263672, 3.2025146484375, 0.16017913818359375, -0.30373191833496094, 6.815113067626953, 2.8694591522216797, 7.160436630249023, 2.4898014068603516, 2.0974369049072266, -2.7215423583984375, 4.17054557800293, -0.2663002014160156, 9.227214813232422], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000396.npy"}
{"epoch": 0.5986394557823129, "step": 397, "batch_size": 64, "mean": 2.0799670219421387, "std": 4.857539653778076, "min": -14.334400177001953, "p10": -4.031031990051268, "median": 2.4786376953125, "p90": 7.525711822509766, "max": 12.734273910522461, "pos_frac": 0.71875, "sample": [0.5929775238037109, 2.5420761108398438, -5.12548828125, 3.0573043823242188, -2.7932186126708984, 2.8870697021484375, 5.801677703857422, 1.751434326171875, 2.8745193481445312, 5.500518798828125, 1.7557353973388672, 4.416023254394531, 2.5491256713867188, 9.8656005859375, 6.9795989990234375, 7.609123229980469, -8.711761474609375, -4.5615234375, 1.5603065490722656, -0.7576751708984375, 6.769979476928711, -4.7957305908203125, 3.0128307342529297, 4.260528564453125, 2.1334762573242188, -7.6414794921875, -1.943634033203125, 1.8698196411132812, 6.682975769042969, 6.1862335205078125, 2.4151992797851562, 1.5399150848388672, 8.836708068847656, 4.55255126953125, 10.91689682006836, 3.24334716796875, 3.3007125854492188, 8.683990478515625, 6.886928558349609, 0.8658924102783203, 1.377410888671875, 7.331085205078125, 5.232414245605469, 2.2816162109375, -14.334400177001953, 2.6840667724609375, 10.76507568359375, -0.779510498046875, -2.0866775512695312, -2.1444778442382812, 3.432209014892578, 2.9895858764648438, 1.5829620361328125, 0.9989070892333984, -0.5084686279296875, -0.897369384765625, 2.68109130859375, 2.1038742065429688, -1.6562385559082031, 3.2747802734375, -7.687919616699219, -1.3539810180664062, -0.47300148010253906, 12.734273910522461], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000397.npy"}
{"epoch": 0.600151171579743, "step": 398, "batch_size": 64, "mean": 3.4996397495269775, "std": 4.0850019454956055, "min": -3.585979461669922, "p10": -1.3199325561523434, "median": 2.9555091857910156, "p90": 9.323911285400394, "max": 13.18475341796875, "pos_frac": 0.796875, "sample": [1.932229995727539, 3.7421340942382812, 5.160488128662109, 1.3143157958984375, 5.830589294433594, 1.0718803405761719, 11.579788208007812, 6.38153076171875, -1.6134033203125, 12.249507904052734, 7.193183898925781, 2.1187591552734375, -1.5212478637695312, -0.8424911499023438, 6.8992462158203125, -0.2492828369140625, 6.499073028564453, -2.2577171325683594, 4.014190673828125, -2.848682403564453, 4.79937744140625, 0.29837989807128906, 12.772270202636719, 9.931228637695312, -0.14534759521484375, 8.686080932617188, 10.646860122680664, 4.061399459838867, 4.112531661987305, 2.5327606201171875, 0.27646827697753906, 3.1549224853515625, 1.5307388305664062, 7.962547302246094, 2.8738479614257812, -2.774190902709961, 0.03227996826171875, 8.675945281982422, 1.000274658203125, 2.8659820556640625, -1.7526321411132812, 0.48436737060546875, 5.056526184082031, 0.8622283935546875, 4.462638854980469, 6.6570892333984375, 1.1908245086669922, -0.8501968383789062, 0.46826934814453125, 6.893383026123047, -0.47344207763671875, 13.18475341796875, 3.43902587890625, 3.03717041015625, 2.3764419555664062, -0.15834808349609375, 7.56842041015625, -3.585979461669922, 3.05560302734375, 0.8184795379638672, 3.516021728515625, 6.569332122802734, 9.597267150878906, 1.6112518310546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000398.npy"}
{"epoch": 0.6016628873771731, "step": 399, "batch_size": 64, "mean": 3.865616798400879, "std": 4.958830833435059, "min": -4.764369964599609, "p10": -1.5015579223632811, "median": 2.9390640258789062, "p90": 11.029093170166016, "max": 19.7962646484375, "pos_frac": 0.8125, "sample": [3.382171630859375, 1.8048019409179688, 4.5301055908203125, 6.0775146484375, -1.342376708984375, 6.955963134765625, 7.111354827880859, 3.2695369720458984, -2.9098472595214844, -4.482940673828125, -3.6236953735351562, 14.422515869140625, 2.3862552642822266, -4.764369964599609, 3.508941650390625, 1.6106719970703125, 3.2847900390625, 3.267364501953125, 2.9518051147460938, 10.84344482421875, -4.49639892578125, 1.2439117431640625, -3.3443450927734375, 5.520994186401367, 12.5782470703125, 0.667327880859375, 3.314289093017578, 19.7962646484375, 0.41729736328125, 0.65655517578125, 2.0842971801757812, -0.006649017333984375, 4.5948028564453125, 0.8990516662597656, 1.1689834594726562, -1.5697784423828125, 1.5448150634765625, 7.1389617919921875, 2.9263229370117188, 6.332427978515625, 11.075828552246094, -0.3143749237060547, 10.8736572265625, 1.2905197143554688, 9.275634765625, 2.6471118927001953, 4.823524475097656, 2.389850616455078, -0.247955322265625, 0.23387908935546875, 9.048641204833984, 11.516117095947266, 11.527412414550781, 2.5806121826171875, 6.85228157043457, 8.205352783203125, 3.0908966064453125, 12.268196105957031, -0.17437744140625, 1.5564689636230469, 10.9200439453125, 0.501617431640625, 2.5854415893554688, 5.121696472167969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000399.npy"}
{"epoch": 0.6031746031746031, "step": 400, "batch_size": 64, "mean": 2.7275257110595703, "std": 4.263143062591553, "min": -5.24127197265625, "p10": -1.676628494262695, "median": 1.878570556640625, "p90": 8.482844161987305, "max": 14.625434875488281, "pos_frac": 0.71875, "sample": [-0.7941322326660156, 2.0009384155273438, 0.2881889343261719, 4.6160888671875, 7.3543853759765625, 9.39306640625, 0.4272918701171875, 5.320043563842773, 4.480438232421875, 10.959753036499023, 2.3241806030273438, -1.4379901885986328, 2.1878318786621094, 1.3915958404541016, 0.023235321044921875, 1.4951171875, 3.42578125, 8.616191864013672, 3.9427947998046875, 7.221052169799805, 0.788665771484375, 2.0561866760253906, 8.0924072265625, -2.2727489471435547, 1.817352294921875, 5.88862419128418, -0.04751777648925781, 3.7018814086914062, 1.4225959777832031, -0.1259288787841797, -5.24127197265625, -0.7667922973632812, 3.9578399658203125, 2.233123779296875, 14.625434875488281, -0.050495147705078125, -1.8433609008789062, -0.5619621276855469, 7.53515625, 2.6246566772460938, 8.171699523925781, 6.303810119628906, -4.908546447753906, 1.854949951171875, 2.7989425659179688, 0.6022529602050781, 1.902191162109375, -0.45751190185546875, 3.6854782104492188, 13.408317565917969, -0.6379451751708984, 2.8669662475585938, -1.7789020538330078, 1.4974899291992188, -2.8591442108154297, 13.285919189453125, 9.730091094970703, -0.3172149658203125, 0.5033073425292969, 1.335906982421875, -4.189205169677734, 0.4385719299316406, -0.2368621826171875, 4.491371154785156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000400.npy"}
{"epoch": 0.6046863189720333, "step": 401, "batch_size": 64, "mean": 4.466838836669922, "std": 4.7320404052734375, "min": -4.352104187011719, "p10": -0.9299076080322265, "median": 4.042743682861328, "p90": 10.997653961181644, "max": 17.58795166015625, "pos_frac": 0.828125, "sample": [-2.144683837890625, 6.417976379394531, 7.432437896728516, 3.060150146484375, -1.2728996276855469, 0.1599445343017578, 2.376220703125, 4.292957305908203, 0.7121009826660156, -0.9470977783203125, 10.185859680175781, 0.759307861328125, -3.3273963928222656, -0.8683547973632812, 4.3676605224609375, 0.41608428955078125, 11.345565795898438, 9.61871337890625, 12.652503967285156, 9.670936584472656, 1.5687026977539062, 3.0098304748535156, 0.5379142761230469, 5.148746490478516, -2.8582992553710938, -4.352104187011719, 4.5380859375, 5.807676315307617, 2.860820770263672, 3.353759765625, 8.634918212890625, 5.203971862792969, 3.6519126892089844, 9.7340087890625, -0.08028411865234375, 9.556819915771484, 0.1655406951904297, 5.642372131347656, 1.0142078399658203, 7.299652099609375, 1.0200996398925781, 11.733846664428711, 6.4212493896484375, -1.6795940399169922, 4.9075164794921875, 3.792530059814453, 2.374847412109375, 13.847381591796875, 7.208740234375, 5.818693161010742, 4.549571990966797, 1.1147346496582031, 2.9744186401367188, 2.42230224609375, -0.8897972106933594, 3.6661033630371094, 9.6241455078125, 4.769096374511719, 12.094108581542969, 15.1986083984375, 5.383211135864258, -0.4351463317871094, 7.026821136474609, 17.58795166015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000401.npy"}
{"epoch": 0.6061980347694633, "step": 402, "batch_size": 64, "mean": 3.4514427185058594, "std": 4.6494340896606445, "min": -6.33740234375, "p10": -2.07775764465332, "median": 3.2568702697753906, "p90": 9.32461662292481, "max": 13.606269836425781, "pos_frac": 0.796875, "sample": [7.907251358032227, 9.780754089355469, 5.661556243896484, 5.763948440551758, 5.086097717285156, 5.490358352661133, 13.488555908203125, -0.171844482421875, 5.137786865234375, 12.1685791015625, 1.3581600189208984, 12.493522644042969, 3.606649398803711, -4.6552581787109375, 6.996734619140625, 2.979755401611328, 0.8340301513671875, 4.287378311157227, 0.8381271362304688, 3.0976409912109375, 12.854976654052734, 0.9586143493652344, 1.3131561279296875, -4.84881591796875, 7.618255615234375, -0.2122211456298828, 4.015460968017578, 7.74383544921875, 0.6508064270019531, 0.942169189453125, 12.037322998046875, 13.606269836425781, 1.4329147338867188, 0.4632568359375, 4.002052307128906, -0.4811115264892578, 6.704833984375, 8.260295867919922, 4.397918701171875, 1.2722644805908203, -2.1733970642089844, -3.430500030517578, -6.33740234375, 7.02668571472168, 6.3518524169921875, 0.9586753845214844, 0.8224430084228516, -0.26910400390625, 1.1331863403320312, 0.16982650756835938, 6.078130722045898, 6.019309997558594, 5.804573059082031, 6.534400939941406, 0.60577392578125, -0.36530303955078125, 1.3229522705078125, -3.659881591796875, -5.0114898681640625, 4.4203948974609375, 3.4160995483398438, 6.522361755371094, -1.8545989990234375, 1.9253005981445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000402.npy"}
{"epoch": 0.6077097505668935, "step": 403, "batch_size": 64, "mean": 4.581813812255859, "std": 5.186957836151123, "min": -7.547290802001953, "p10": -1.8985530853271484, "median": 4.255580902099609, "p90": 11.750730705261233, "max": 18.70977783203125, "pos_frac": 0.84375, "sample": [-1.8497161865234375, 2.501800537109375, 8.923736572265625, 4.390838623046875, 10.376279830932617, 1.1025543212890625, 14.789482116699219, 5.810955047607422, 2.020008087158203, 6.379230499267578, 8.730249404907227, -2.8653564453125, 11.118375778198242, 1.2738838195800781, 13.17487907409668, 3.4352340698242188, 3.760862350463867, 1.8226680755615234, 3.2668609619140625, 1.0782909393310547, -1.1645946502685547, -2.2015113830566406, 18.70977783203125, 13.141464233398438, 0.89739990234375, 8.51114273071289, 8.645797729492188, 1.0949020385742188, 7.322010040283203, 4.152366638183594, 0.018810272216796875, 3.8545761108398438, 4.896327972412109, 4.810150146484375, 5.066152572631836, -2.9260177612304688, 1.7569255828857422, 14.384544372558594, 1.3673324584960938, 4.358795166015625, 2.1579360961914062, 1.0009593963623047, 12.021739959716797, 2.755359649658203, 7.009027481079102, -0.567108154296875, 5.191867828369141, 9.024223327636719, 7.048297882080078, 1.9481658935546875, 8.561782836914062, 8.771533966064453, -7.547290802001953, -3.7700157165527344, 7.24493408203125, -5.037221908569336, 13.104774475097656, 6.432273864746094, 0.55047607421875, 9.696678161621094, -1.9194831848144531, 2.168060302734375, 6.233375549316406, 5.218294143676758], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000403.npy"}
{"epoch": 0.6092214663643235, "step": 404, "batch_size": 64, "mean": 3.6440062522888184, "std": 4.592062950134277, "min": -6.4692840576171875, "p10": -1.7269548416137694, "median": 3.57476806640625, "p90": 9.728547668457036, "max": 14.45443344116211, "pos_frac": 0.78125, "sample": [2.796539306640625, 2.0774307250976562, 5.950202941894531, 0.3658447265625, 1.132680892944336, -0.6630706787109375, 8.382926940917969, 0.7387046813964844, -1.5777416229248047, 0.2772979736328125, 8.410888671875, 1.787750244140625, -5.954532623291016, -5.221738815307617, -0.3557319641113281, 7.16241455078125, -1.6685543060302734, -3.4428768157958984, 10.467323303222656, 8.075164794921875, 6.968929290771484, 10.293258666992188, 5.3515167236328125, 11.585445404052734, -1.976165771484375, 4.354373931884766, -0.8878860473632812, 4.7288818359375, 4.0921783447265625, 8.199565887451172, 0.93780517578125, 7.119785308837891, -0.21852493286132812, 8.01466178894043, -1.3874893188476562, 0.26358795166015625, 1.8383712768554688, 6.9033050537109375, 3.3295516967773438, 1.6545867919921875, 2.2977218627929688, 3.4705352783203125, 4.638927459716797, 3.9124298095703125, 11.317525863647461, 5.686855316162109, 6.3947296142578125, 8.155670166015625, 14.45443344116211, 2.77740478515625, 0.04187774658203125, 12.44781494140625, 11.931427001953125, 4.680908203125, 4.764469146728516, 5.1974029541015625, 6.1654510498046875, -2.7530555725097656, -1.751983642578125, -6.4692840576171875, 3.6790008544921875, 6.288745880126953, 3.0454368591308594, 2.9352874755859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000404.npy"}
{"epoch": 0.6107331821617535, "step": 405, "batch_size": 64, "mean": 3.459103584289551, "std": 4.692801475524902, "min": -6.7837371826171875, "p10": -1.657554244995117, "median": 2.958986282348633, "p90": 10.10863265991211, "max": 17.909242630004883, "pos_frac": 0.71875, "sample": [7.626182556152344, -0.4707527160644531, -1.5944633483886719, 15.164604187011719, 3.0514984130859375, 2.4099044799804688, 11.018157958984375, 3.6083984375, 0.4286003112792969, -6.7837371826171875, -0.23249053955078125, 1.9018688201904297, -0.5280799865722656, -2.553741455078125, 4.5994110107421875, 1.3565826416015625, 12.790679931640625, -0.5424861907958984, -2.632640838623047, 3.3807830810546875, 7.104034423828125, 7.556243896484375, 3.269115447998047, 10.165718078613281, -1.7710685729980469, 2.4482765197753906, 5.436126708984375, 9.764402389526367, -1.6845932006835938, 5.292165756225586, 1.9620132446289062, -0.8283729553222656, 3.622650146484375, 4.602203369140625, -0.5598907470703125, 7.025875091552734, 2.866474151611328, -1.2647552490234375, 4.949371337890625, 9.121284484863281, 2.0149784088134766, -0.9773139953613281, 3.997283935546875, 2.790407180786133, 1.9509658813476562, 1.3298778533935547, 1.1573944091796875, 4.061759948730469, 9.975433349609375, 5.561000823974609, -0.9804515838623047, 3.3065643310546875, 8.056159973144531, 1.9761276245117188, 10.45526123046875, 17.909242630004883, -3.2988510131835938, 3.9258689880371094, 4.375476837158203, 0.15586090087890625, -0.17945098876953125, 10.619224548339844, 4.7003631591796875, -2.5761032104492188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000405.npy"}
{"epoch": 0.6122448979591837, "step": 406, "batch_size": 64, "mean": 4.082479476928711, "std": 4.794675827026367, "min": -7.522113800048828, "p10": -1.2770919799804688, "median": 4.07562255859375, "p90": 9.9915340423584, "max": 15.499702453613281, "pos_frac": 0.75, "sample": [8.106338500976562, 1.8573760986328125, -1.4311199188232422, -2.2346725463867188, 4.944526672363281, 9.251571655273438, 10.020992279052734, 9.2562255859375, -3.967893600463867, 11.19122314453125, 6.891653060913086, 5.7167205810546875, 1.8837814331054688, -0.7664947509765625, 9.922798156738281, 4.6699371337890625, 10.30047607421875, 2.198972702026367, 3.2651290893554688, 11.345821380615234, 8.364013671875, 1.5486335754394531, 5.613365173339844, 7.991424560546875, 2.68865966796875, 7.848175048828125, -1.2415771484375, 4.6746978759765625, -0.2649421691894531, 1.2169437408447266, -0.03709983825683594, -0.2777671813964844, -7.522113800048828, -1.2261829376220703, 7.192344665527344, 8.479087829589844, 1.2212238311767578, 10.11334228515625, -1.2249794006347656, 13.741737365722656, 5.2771759033203125, -0.9915924072265625, 0.47698211669921875, 15.499702453613281, 0.4810333251953125, 9.187505722045898, -2.04449462890625, 1.7527904510498047, 9.286991119384766, 9.479461669921875, 5.511480331420898, -1.2923126220703125, 7.5633392333984375, 2.2677001953125, 0.7969169616699219, -4.457468032836914, 3.4813079833984375, 6.42236328125, -0.514556884765625, 1.51055908203125, 6.0443267822265625, 1.29541015625, 7.582855224609375, 5.3388671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000406.npy"}
{"epoch": 0.6137566137566137, "step": 407, "batch_size": 64, "mean": 2.883037567138672, "std": 4.597412109375, "min": -7.63702392578125, "p10": -2.06054630279541, "median": 2.2620582580566406, "p90": 8.765903282165528, "max": 18.74993896484375, "pos_frac": 0.75, "sample": [1.3575668334960938, 1.7208881378173828, 1.5182952880859375, 7.607391357421875, -0.8980064392089844, 7.467948913574219, 9.10552978515625, 0.12700843811035156, -4.1663970947265625, 6.337080001831055, 5.5929718017578125, 1.2025947570800781, -0.8636398315429688, -4.578895568847656, -2.62030029296875, 1.6396942138671875, 3.5999755859375, 4.629058837890625, 12.307758331298828, 7.232240676879883, -1.7314300537109375, 0.5682296752929688, 5.034564971923828, 4.257789611816406, 0.3662261962890625, 2.4746627807617188, 2.4328231811523438, 1.6493396759033203, 3.2865142822265625, 18.74993896484375, 3.5418014526367188, 0.207000732421875, 8.9375, 5.737060546875, 7.125396728515625, 0.3589134216308594, 0.29376220703125, 7.3935546875, -3.7889652252197266, 0.31705284118652344, -2.0957489013671875, 10.5263671875, 5.261962890625, 2.5978927612304688, 8.788154602050781, 7.0631103515625, 8.713983535766602, -0.19562530517578125, 5.279144287109375, -0.4872150421142578, 1.6776275634765625, -7.63702392578125, -4.406341552734375, 2.0912933349609375, -1.9784069061279297, 3.7734451293945312, 0.8644027709960938, 9.803276062011719, -1.740325927734375, -1.341552734375, -1.1114959716796875, 3.3243408203125, 5.790615081787109, 4.422027587890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000407.npy"}
{"epoch": 0.6152683295540439, "step": 408, "batch_size": 64, "mean": 4.296040058135986, "std": 4.661093711853027, "min": -4.9592742919921875, "p10": -1.626905059814453, "median": 3.8928165435791016, "p90": 10.345906829833988, "max": 14.884407043457031, "pos_frac": 0.796875, "sample": [2.175596237182617, 9.765918731689453, 7.1544647216796875, 4.306056976318359, -1.4525032043457031, -2.5382919311523438, -4.9592742919921875, 2.035045623779297, 6.93182373046875, 2.9669189453125, 4.281158447265625, 9.357192993164062, -0.971710205078125, 2.0064239501953125, 11.48846435546875, 8.49847412109375, 0.7047233581542969, -0.787750244140625, 5.941730499267578, 1.5318527221679688, 4.9506683349609375, -0.18335723876953125, -1.9874076843261719, 5.29484748840332, 14.802230834960938, -3.3943634033203125, 3.5461959838867188, 14.884407043457031, 9.666183471679688, 6.910528182983398, 9.412395477294922, -0.106170654296875, 6.661966323852539, 0.04170036315917969, 3.782733917236328, 10.775981903076172, 4.047760009765625, -2.715822219848633, 12.575714111328125, 2.7706756591796875, 2.5538673400878906, 6.62261962890625, 8.627830505371094, 11.64697265625, 10.593215942382812, 2.724630355834961, 7.981025695800781, -1.7016487121582031, 0.6630496978759766, 5.801666259765625, 7.111595153808594, 3.3732032775878906, 9.768852233886719, -3.411092758178711, 4.002899169921875, 8.510986328125, 0.9772300720214844, -0.6490192413330078, 5.651264190673828, 0.6371269226074219, 3.0439071655273438, 1.7730827331542969, 1.2292556762695312, 7.240854263305664], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000408.npy"}
{"epoch": 0.6167800453514739, "step": 409, "batch_size": 64, "mean": 2.3577980995178223, "std": 4.958812713623047, "min": -8.855178833007812, "p10": -2.5126831054687497, "median": 1.1714363098144531, "p90": 9.926129531860353, "max": 14.996414184570312, "pos_frac": 0.71875, "sample": [-0.6444625854492188, 0.5587444305419922, -1.0797119140625, 0.726104736328125, 0.5663490295410156, 0.5144004821777344, 4.240865707397461, -1.8371696472167969, 0.9518318176269531, -4.220024108886719, 0.427734375, 10.083564758300781, 0.34711647033691406, 14.996414184570312, 5.160938262939453, 2.20159912109375, 5.447898864746094, -1.7680740356445312, -1.8694419860839844, 12.121179580688477, 11.852439880371094, -2.1243209838867188, 5.060585021972656, 5.515777587890625, 2.4726104736328125, -2.5451202392578125, 2.0852813720703125, 6.1315460205078125, 4.503942489624023, -5.04931640625, -2.4369964599609375, 1.5569686889648438, 5.251960754394531, -2.354541778564453, 5.825187683105469, 0.26177024841308594, 9.558780670166016, 10.909896850585938, 0.23226165771484375, 1.801309585571289, -0.4413337707519531, -0.884613037109375, 0.29871368408203125, -4.274511337280273, 1.2552261352539062, 1.087646484375, 0.8376808166503906, 1.5590858459472656, -8.855178833007812, 11.8516845703125, 0.9185066223144531, 4.889167785644531, -0.5822563171386719, 7.461555480957031, -8.388824462890625, 1.427459716796875, 4.691314697265625, 0.00250244140625, 4.5884246826171875, 8.435232162475586, 13.421096801757812, -2.9430370330810547, 7.063201904296875, 2.0444679260253906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000409.npy"}
{"epoch": 0.618291761148904, "step": 410, "batch_size": 64, "mean": 3.2761478424072266, "std": 3.8150415420532227, "min": -4.376649856567383, "p10": -1.3541870117187496, "median": 2.5047569274902344, "p90": 8.543246841430664, "max": 12.922088623046875, "pos_frac": 0.8125, "sample": [1.3525009155273438, 2.166849136352539, 3.9454002380371094, 0.6907215118408203, 11.190567016601562, -2.0716400146484375, 6.938667297363281, 0.3646812438964844, -0.9949798583984375, -4.376649856567383, 5.9660186767578125, 2.0764122009277344, 0.6387004852294922, -2.9479293823242188, 6.969512939453125, 5.748954772949219, 8.495136260986328, 0.7842254638671875, 6.197151184082031, 9.367034912109375, 9.874908447265625, 2.6267013549804688, -1.5081329345703125, 4.708286285400391, 0.875091552734375, 3.9717025756835938, -3.02276611328125, 1.4359893798828125, 4.253692626953125, 0.39658355712890625, 5.162620544433594, -0.8825569152832031, 2.044940948486328, 8.716712951660156, 1.8021430969238281, 12.922088623046875, 5.489715576171875, -0.20470809936523438, 1.9863605499267578, 8.012130737304688, -0.5357513427734375, -2.7448959350585938, 2.243864059448242, 2.3828125, 3.4855575561523438, 4.460491180419922, 7.316829681396484, 1.4768714904785156, 2.741626739501953, 3.471923828125, 3.2667236328125, 3.5207366943359375, 11.702484130859375, 6.7066802978515625, -0.7453384399414062, 1.4618911743164062, 2.3308639526367188, 1.3203105926513672, 1.3359031677246094, 7.3128509521484375, 8.563865661621094, 4.380851745605469, -1.9496231079101562, 5.003089904785156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000410.npy"}
{"epoch": 0.6198034769463341, "step": 411, "batch_size": 64, "mean": 3.5785140991210938, "std": 5.54014778137207, "min": -8.884246826171875, "p10": -3.130270004272461, "median": 3.613981246948242, "p90": 10.133203887939453, "max": 16.253005981445312, "pos_frac": 0.734375, "sample": [-3.8462371826171875, 10.007949829101562, 10.131610870361328, 10.989021301269531, 3.0985260009765625, 1.3197402954101562, -4.4846343994140625, -6.912731170654297, 4.115901947021484, 0.20036697387695312, 6.4952850341796875, 10.132514953613281, 6.167200088500977, 4.733146667480469, 11.323379516601562, 1.7656745910644531, 5.65632438659668, -1.9626235961914062, 6.615501403808594, 3.335479736328125, 7.061882019042969, 10.133499145507812, 9.75128173828125, 1.5388965606689453, -2.9629898071289062, 1.56829833984375, 0.4591484069824219, 9.83846664428711, -1.2336177825927734, -8.884246826171875, -2.8824462890625, -4.071649551391602, 0.13629150390625, 5.290748596191406, 3.521575927734375, 5.380413055419922, -1.1760520935058594, 15.788589477539062, -2.2119407653808594, 16.253005981445312, 7.238067626953125, 4.23481559753418, -1.35736083984375, 9.098745346069336, 6.3759307861328125, 11.979511260986328, -3.2019615173339844, 5.714508056640625, -0.6978225708007812, -1.5172576904296875, 0.7103195190429688, 0.7407989501953125, 6.275575637817383, 7.497241973876953, 1.1074085235595703, 6.311237335205078, 4.580236434936523, 2.091388702392578, 16.124340057373047, 5.1388702392578125, -1.6784133911132812, 0.6260414123535156, -4.2542572021484375, 3.7063865661621094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000411.npy"}
{"epoch": 0.6213151927437641, "step": 412, "batch_size": 64, "mean": 3.56341814994812, "std": 4.914975166320801, "min": -3.555004119873047, "p10": -2.412400817871094, "median": 2.7913665771484375, "p90": 10.072862243652343, "max": 15.97756576538086, "pos_frac": 0.734375, "sample": [-2.2775650024414062, 10.626033782958984, 4.248695373535156, 15.97756576538086, 2.7540283203125, 4.537111282348633, 3.856842041015625, 5.739458084106445, -3.555004119873047, 1.738912582397461, 4.839849472045898, 1.7774848937988281, 2.6787338256835938, 15.160894393920898, -1.119680404663086, -2.4653549194335938, 6.416961669921875, -0.7828788757324219, -2.2888412475585938, -0.6640739440917969, 8.201507568359375, 5.6895599365234375, 4.255531311035156, 2.5005874633789062, -0.176910400390625, 0.7901077270507812, 3.5576324462890625, 2.9499359130859375, 6.301548004150391, 0.244476318359375, 2.0881786346435547, 10.101852416992188, 3.4479923248291016, 15.055183410644531, -1.9901351928710938, 0.010040283203125, 2.828704833984375, -1.7222213745117188, 6.892599105834961, -0.7120361328125, -2.6638431549072266, -2.8967132568359375, 10.005218505859375, -3.20587158203125, 0.6137733459472656, 9.902542114257812, 10.196273803710938, 8.256904602050781, 13.862808227539062, -1.5053291320800781, 3.8597412109375, 6.736835479736328, 8.90340805053711, -2.49951171875, 4.967998504638672, 8.749526977539062, 1.9120159149169922, 1.5872726440429688, -3.0756492614746094, 1.6140365600585938, 5.541425704956055, 0.5768928527832031, 8.728713989257812, 0.376983642578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000412.npy"}
{"epoch": 0.6228269085411943, "step": 413, "batch_size": 64, "mean": 3.330319404602051, "std": 5.494553089141846, "min": -8.607799530029297, "p10": -3.6179740905761717, "median": 3.3422679901123047, "p90": 9.986621284484867, "max": 18.227127075195312, "pos_frac": 0.765625, "sample": [0.18685531616210938, -4.0365142822265625, 8.046852111816406, 2.1255626678466797, 0.25067901611328125, 0.7130279541015625, 2.7196273803710938, 13.658695220947266, -4.430364608764648, 4.1479339599609375, -8.607799530029297, 7.382530212402344, 4.120658874511719, -1.2811660766601562, -2.2716293334960938, 1.9334220886230469, -0.592681884765625, 14.442913055419922, 0.16819190979003906, 3.330608367919922, 16.121627807617188, 4.352333068847656, 4.238273620605469, 1.5051536560058594, 5.085441589355469, 4.062204360961914, 18.227127075195312, 10.721420288085938, 1.5622406005859375, 3.8765411376953125, 2.779693603515625, 5.07197380065918, 3.94915771484375, 4.065185546875, 12.828475952148438, -4.3237457275390625, 3.6351470947265625, 0.30254364013671875, 7.2860565185546875, 1.1715660095214844, -6.072013854980469, 8.241806030273438, 7.361381530761719, -3.4711990356445312, 5.7638092041015625, -2.036224365234375, -8.06060791015625, 0.29297637939453125, 2.0108489990234375, 3.0919456481933594, -0.63470458984375, -3.680877685546875, 9.057586669921875, -1.1394615173339844, 1.1203994750976562, 5.023979187011719, 3.3539276123046875, 8.740142822265625, 4.516422271728516, 8.016944885253906, -3.10931396484375, 7.8971405029296875, 7.9449310302734375, 10.38477897644043], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000413.npy"}
{"epoch": 0.6243386243386243, "step": 414, "batch_size": 64, "mean": 3.8041844367980957, "std": 5.155566692352295, "min": -8.26531982421875, "p10": -1.6621540069580072, "median": 3.65411376953125, "p90": 10.666909027099612, "max": 18.311203002929688, "pos_frac": 0.828125, "sample": [7.059053421020508, -1.93988037109375, 6.5488739013671875, 10.178604125976562, 0.30078125, 5.642269134521484, 0.865447998046875, 10.97085189819336, 3.59234619140625, 0.17681884765625, 5.189094543457031, 4.656475067138672, 6.4649505615234375, -4.035045623779297, 10.876182556152344, 4.750133514404297, -5.899444580078125, -5.765785217285156, 2.6526565551757812, 1.6190032958984375, 5.479326248168945, 1.8866348266601562, 16.26699447631836, 6.605659484863281, 12.373458862304688, -0.4073944091796875, 6.036579132080078, 5.824653625488281, 3.8082504272460938, 6.163429260253906, 9.66427230834961, 0.8651580810546875, 5.668558120727539, 0.675140380859375, 9.505630493164062, 2.9264659881591797, 8.736080169677734, 2.9307479858398438, 4.8372802734375, 9.353904724121094, 2.1691207885742188, -3.2746047973632812, 0.3033409118652344, 0.04207420349121094, 8.422771453857422, -1.0141258239746094, 0.3069610595703125, 5.795175552368164, -8.26531982421875, -0.86083984375, 3.71588134765625, 0.443115234375, -2.462116241455078, 0.5663547515869141, 12.835567474365234, 18.311203002929688, 1.5914840698242188, 0.310516357421875, 6.217464447021484, 3.9873905181884766, 0.5714569091796875, -0.9845657348632812, 0.3518791198730469, 11.283409118652344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000414.npy"}
{"epoch": 0.6258503401360545, "step": 415, "batch_size": 64, "mean": 3.5665831565856934, "std": 5.485740661621094, "min": -9.96298599243164, "p10": -1.9154747009277342, "median": 2.8983230590820312, "p90": 11.11465835571289, "max": 16.978660583496094, "pos_frac": 0.796875, "sample": [-0.1847667694091797, 7.407050132751465, 5.279474258422852, 4.984657287597656, 2.1319427490234375, 2.8775711059570312, 6.8642730712890625, 11.933815002441406, 0.2107086181640625, 9.040016174316406, 4.667575836181641, -7.3168182373046875, 8.192169189453125, 8.972831726074219, -0.9835357666015625, 13.309516906738281, 16.978660583496094, 2.4602737426757812, 11.146873474121094, -1.0599613189697266, 1.134765625, 2.22100830078125, 2.1523818969726562, 0.12194061279296875, 5.7623291015625, 15.068313598632812, -6.729789733886719, 1.4156837463378906, 5.362663269042969, 5.926605224609375, -4.2642822265625, 6.0311279296875, 2.6488075256347656, -1.3566856384277344, -7.259956359863281, 15.313232421875, 7.1502838134765625, 11.03948974609375, 6.2874755859375, -3.177520751953125, 3.6729888916015625, -1.822265625, -1.9554214477539062, -9.96298599243164, 3.8839874267578125, 5.629215240478516, 0.571746826171875, 1.5737113952636719, 1.1285820007324219, 0.5543270111083984, 0.08665847778320312, 2.9190750122070312, 1.2375526428222656, 0.8325233459472656, 3.39459228515625, 4.520469665527344, -0.03464317321777344, 5.232414245605469, 3.337646484375, 0.13282012939453125, 1.3366622924804688, 7.2270965576171875, 12.249130249023438, 10.755233764648438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000415.npy"}
{"epoch": 0.6273620559334845, "step": 416, "batch_size": 64, "mean": 2.9003496170043945, "std": 5.143900394439697, "min": -7.506702423095703, "p10": -2.530492782592773, "median": 2.4228639602661133, "p90": 9.823632621765137, "max": 17.90349578857422, "pos_frac": 0.75, "sample": [1.3138370513916016, -7.506702423095703, -0.6827888488769531, 8.606185913085938, 0.5088214874267578, 11.211645126342773, -1.1560821533203125, -1.48419189453125, 6.217632293701172, 10.275402069091797, 1.8662490844726562, 13.512245178222656, 15.973052978515625, 0.9391670227050781, 0.15111541748046875, 4.330860137939453, 2.0797805786132812, 4.352895736694336, -7.162799835205078, -5.31744384765625, 3.977785110473633, 5.516969680786133, 2.6971569061279297, 4.714256286621094, -0.9083976745605469, 6.623256683349609, 0.8013916015625, 17.90349578857422, 10.207843780517578, 4.973548889160156, 2.2346839904785156, 3.5666656494140625, 2.0696563720703125, -6.170526504516602, 1.7682437896728516, 0.02304840087890625, 0.35569000244140625, -5.318548202514648, 2.424802780151367, 1.9285411834716797, -2.771442413330078, 5.3167266845703125, -1.9682769775390625, 1.1897430419921875, -0.31658935546875, -0.3691902160644531, 9.782508850097656, -7.059814453125, 0.9175910949707031, 7.945228576660156, 2.4209251403808594, 3.2532215118408203, -1.7160911560058594, 6.3464508056640625, -1.1253852844238281, 3.044525146484375, 2.9113540649414062, 6.06640625, 3.69573974609375, 4.953960418701172, 4.0002899169921875, 6.9901580810546875, 9.841257095336914, 4.854633331298828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000416.npy"}
{"epoch": 0.6288737717309146, "step": 417, "batch_size": 64, "mean": 3.9919230937957764, "std": 5.190679550170898, "min": -10.819160461425781, "p10": -1.5692964553833006, "median": 3.925795555114746, "p90": 10.02219696044922, "max": 17.81005096435547, "pos_frac": 0.765625, "sample": [1.9178047180175781, -0.552520751953125, -0.6458358764648438, -10.819160461425781, 8.98797607421875, 7.250053405761719, -7.9758453369140625, 10.276908874511719, 3.9206809997558594, 4.4235076904296875, 14.371551513671875, 1.53253173828125, 4.2373199462890625, 5.614871978759766, 5.921478271484375, -3.763490676879883, 10.598487854003906, 3.12652587890625, 0.946380615234375, 9.344293594360352, 5.489128112792969, 7.20379638671875, 3.930910110473633, 1.910888671875, 3.049468994140625, -1.3400344848632812, 9.218021392822266, 5.306358337402344, 2.3832263946533203, 4.786216735839844, 0.012475967407226562, 13.099006652832031, 13.095609664916992, -2.1962738037109375, -1.599212646484375, -0.8526992797851562, -0.5506973266601562, 1.6369056701660156, 10.111007690429688, 3.9092559814453125, 9.814971923828125, 1.8883285522460938, 9.482315063476562, 4.0138702392578125, -0.900634765625, 6.511085510253906, 7.206125259399414, 2.9281158447265625, -1.5312328338623047, 6.3828125, 2.5353012084960938, 2.821401596069336, 1.4598922729492188, -4.678508758544922, 7.2059783935546875, -1.5856094360351562, 5.9101104736328125, 7.855648040771484, 1.6949501037597656, -1.0308990478515625, 9.204191207885742, 4.717512130737305, 8.450428009033203, 17.81005096435547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000417.npy"}
{"epoch": 0.6303854875283447, "step": 418, "batch_size": 64, "mean": 2.5099682807922363, "std": 5.68357515335083, "min": -7.447746276855469, "p10": -4.6198724746704105, "median": 1.9253778457641602, "p90": 8.745722198486328, "max": 23.210861206054688, "pos_frac": 0.65625, "sample": [0.9861526489257812, 9.499578475952148, 4.626678466796875, 3.328643798828125, 1.0919551849365234, 3.8687057495117188, 1.6039314270019531, 6.306816101074219, 14.983642578125, -3.7132492065429688, 8.623214721679688, 14.472404479980469, -3.1928482055664062, 4.0874786376953125, 3.6932601928710938, -4.629783630371094, 5.5936431884765625, 0.1273040771484375, 11.04360580444336, -0.5549087524414062, -0.08281326293945312, 23.210861206054688, 1.8810577392578125, 2.4000396728515625, 7.88031005859375, 1.6336441040039062, -1.978363037109375, 1.9611530303955078, 4.382205963134766, 3.4176177978515625, -7.182891845703125, -6.6029052734375, 3.707202911376953, 4.687200546264648, 14.716922760009766, -0.06200408935546875, -0.42571258544921875, -2.6843643188476562, 3.6290416717529297, 0.7479972839355469, -6.130821228027344, -5.031211853027344, -7.447746276855469, -0.8673343658447266, 1.8051319122314453, -2.2533950805664062, 3.4876022338867188, 3.02239990234375, -4.596746444702148, 1.0212554931640625, 1.8896026611328125, -1.5987701416015625, 8.798225402832031, 4.97979736328125, 5.927558898925781, -4.6370849609375, -3.6683425903320312, 5.84515380859375, 4.022560119628906, 8.465927124023438, 5.6617431640625, -0.16884231567382812, 5.210945129394531, -0.1820526123046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000418.npy"}
{"epoch": 0.6318972033257747, "step": 419, "batch_size": 64, "mean": 4.270475387573242, "std": 5.146946907043457, "min": -5.945045471191406, "p10": -1.8934326171874998, "median": 3.4920082092285156, "p90": 11.103828048706054, "max": 19.969453811645508, "pos_frac": 0.8125, "sample": [0.6770591735839844, 12.021133422851562, 7.716012954711914, 1.5337677001953125, -4.57716178894043, 2.893451690673828, 11.110931396484375, 2.55596923828125, -0.9762191772460938, 1.1251869201660156, -3.63690185546875, 9.295116424560547, 7.659698486328125, 4.3312835693359375, -1.6085205078125, 0.0153045654296875, 8.373977661132812, 2.3681182861328125, 9.682365417480469, -1.9703826904296875, 3.6126632690429688, 4.764495849609375, -2.2665481567382812, 6.248374938964844, 2.578521728515625, 0.36347198486328125, 8.399208068847656, -3.972747802734375, 6.8336029052734375, 11.283668518066406, -0.37746429443359375, -1.7138824462890625, -3.0358619689941406, 10.488136291503906, 15.35455322265625, 6.572086334228516, 7.475566864013672, 2.2379379272460938, 2.505706787109375, 4.076774597167969, 4.424964904785156, 11.880056381225586, 1.9401702880859375, 1.6913909912109375, 10.006011962890625, -0.6392593383789062, 8.154495239257812, 3.549884796142578, 0.9438972473144531, 19.969453811645508, 11.08725357055664, 0.5478324890136719, 3.434131622314453, 2.3152313232421875, 7.626777648925781, 1.8594608306884766, 6.120491027832031, 5.430042266845703, 12.326858520507812, 0.5396270751953125, 3.1765708923339844, 3.8518447875976562, -5.945045471191406, 8.999824523925781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000419.npy"}
{"epoch": 0.6334089191232048, "step": 420, "batch_size": 64, "mean": 2.4747402667999268, "std": 5.998582363128662, "min": -11.912750244140625, "p10": -4.271688079833984, "median": 2.189289093017578, "p90": 10.357061958312993, "max": 20.450881958007812, "pos_frac": 0.671875, "sample": [0.9713134765625, -8.72396469116211, 11.264715194702148, -0.35572052001953125, 12.394973754882812, -4.463415145874023, 4.9759063720703125, 1.5684127807617188, 2.0213088989257812, -10.03436279296875, -0.5890617370605469, 5.159690856933594, 0.4543304443359375, 5.56793212890625, -3.163463592529297, 6.7174530029296875, 14.37091064453125, 4.986621856689453, 15.508235931396484, 8.125862121582031, 2.191436767578125, -1.1551399230957031, 0.2692279815673828, 6.23017692565918, 1.3670520782470703, 10.904989242553711, -2.8704280853271484, -6.442569732666016, 2.6394214630126953, 1.7119522094726562, -4.0950164794921875, 3.0062332153320312, 0.6475753784179688, 0.4832496643066406, -2.889934539794922, -2.4142189025878906, -4.347404479980469, -3.406322479248047, 6.8342132568359375, 4.057823181152344, -0.9260711669921875, -11.912750244140625, -2.8345909118652344, 2.1871414184570312, 9.182136535644531, 2.6751441955566406, 3.272777557373047, 10.860601425170898, 2.7970237731933594, 5.492889404296875, 7.3779144287109375, 4.172332763671875, 6.949863433837891, -0.5867424011230469, 2.577739715576172, 5.178165435791016, 8.440185546875, -5.2274932861328125, 6.512332916259766, 5.436248779296875, 1.9703598022460938, 20.450881958007812, -1.9757843017578125, -3.1669273376464844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000420.npy"}
{"epoch": 0.6349206349206349, "step": 421, "batch_size": 64, "mean": 2.7551350593566895, "std": 6.2291483879089355, "min": -12.930017471313477, "p10": -4.917997169494629, "median": 2.4178829193115234, "p90": 10.299053955078126, "max": 20.327728271484375, "pos_frac": 0.6875, "sample": [-5.803287506103516, 6.0848388671875, -1.0751113891601562, 5.411983489990234, 5.005989074707031, 6.451484680175781, 7.972808837890625, -0.6076126098632812, 0.01367950439453125, 20.327728271484375, 14.100662231445312, -5.357307434082031, 9.786705017089844, 5.399265289306641, -4.300899505615234, 1.9993820190429688, 2.6309585571289062, -3.1342315673828125, 10.353660583496094, 5.0109710693359375, 10.16607666015625, 1.4326629638671875, 0.8082180023193359, -0.8525543212890625, 2.493976593017578, -3.487518310546875, 7.6458740234375, -12.930017471313477, -6.16705322265625, -5.665191650390625, 2.912050247192383, 14.636133193969727, 10.171638488769531, -0.21557998657226562, 2.48944091796875, 11.080131530761719, 10.559860229492188, 6.047393798828125, -1.1427803039550781, 0.06824874877929688, 12.931198120117188, 0.20308685302734375, 1.0312347412109375, -2.324981689453125, -5.145503997802734, 5.986700057983398, 0.19235992431640625, 2.346324920654297, -9.92578125, 0.9301605224609375, 9.58697509765625, 8.776058197021484, -0.32738494873046875, 5.797554016113281, 0.376617431640625, -4.093841552734375, 3.6673126220703125, 2.620349884033203, 4.137271881103516, -4.387147903442383, 6.179779052734375, 9.89727783203125, 1.8014907836914062, -4.2511444091796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000421.npy"}
{"epoch": 0.636432350718065, "step": 422, "batch_size": 64, "mean": 3.4610633850097656, "std": 5.247977256774902, "min": -8.228919982910156, "p10": -2.1625425338745115, "median": 3.167135238647461, "p90": 10.477049255371094, "max": 18.59284210205078, "pos_frac": 0.75, "sample": [10.287117004394531, 13.2392578125, -5.109804153442383, -0.6195888519287109, 7.478202819824219, -1.2280387878417969, 3.0090713500976562, -4.8188934326171875, 3.8344573974609375, 18.59284210205078, 3.2385025024414062, 3.6455650329589844, -1.7605743408203125, 4.78082275390625, 12.402992248535156, 0.13173294067382812, 4.944736480712891, 9.525096893310547, -0.013519287109375, 3.2543487548828125, 4.0395355224609375, 0.13946533203125, -2.669412612915039, 2.6448020935058594, 2.27569580078125, 7.0145111083984375, 4.267669677734375, 6.835113525390625, 15.452835083007812, 3.1771697998046875, 2.328216552734375, 6.981029510498047, 6.2364501953125, -2.197031021118164, 0.7402076721191406, 3.1571006774902344, 10.850292205810547, 0.9650154113769531, 10.558448791503906, 0.4071044921875, -2.0820693969726562, -1.1491165161132812, 6.1044464111328125, 8.475025177001953, 8.1273193359375, -6.077079772949219, -1.4523391723632812, 4.690864562988281, 0.825439453125, 5.355741500854492, -1.6106338500976562, 14.66046142578125, 0.44159889221191406, -1.5251846313476562, 0.7923049926757812, 4.6391143798828125, 0.7371444702148438, -8.228919982910156, 3.071044921875, 6.429290771484375, 6.029966354370117, 2.612516403198242, -2.2826080322265625, 4.905185699462891], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000422.npy"}
{"epoch": 0.6379440665154951, "step": 423, "batch_size": 64, "mean": 4.094779968261719, "std": 5.492380142211914, "min": -7.782463073730469, "p10": -1.9225898742675782, "median": 3.333637237548828, "p90": 11.539707183837892, "max": 18.154987335205078, "pos_frac": 0.796875, "sample": [1.6192092895507812, -2.7775497436523438, 4.989784240722656, 0.11681938171386719, 18.154987335205078, 0.3251609802246094, 2.6845016479492188, -7.782463073730469, 16.194700241088867, 2.04962158203125, -1.7928466796875, 8.96377944946289, 0.39801025390625, -1.9249496459960938, 2.6052627563476562, 1.55767822265625, -0.34792327880859375, -1.917083740234375, 11.825553894042969, -3.1696929931640625, 3.7759552001953125, 5.411842346191406, -3.374725341796875, 0.3824920654296875, -0.7034568786621094, 10.656112670898438, 9.967727661132812, 0.14441490173339844, -7.220489501953125, 0.3390045166015625, 11.190353393554688, 14.583423614501953, 6.1110992431640625, 6.2359161376953125, 5.988170623779297, 5.314033508300781, 8.296310424804688, 6.125732421875, 15.688713073730469, 11.689430236816406, 3.5952606201171875, 1.927541732788086, 0.3267822265625, 4.948638916015625, 6.774864196777344, 9.457298278808594, 8.611455917358398, 2.3732032775878906, 1.2674026489257812, 4.481235504150391, 10.773101806640625, 3.77587890625, 2.929483413696289, 4.139612197875977, 3.9966506958007812, 13.85040283203125, 3.0720138549804688, -0.5664749145507812, -2.4114761352539062, 5.8146514892578125, 8.370132446289062, 0.02203369140625, 2.1811046600341797, -0.019529342651367188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000423.npy"}
{"epoch": 0.6394557823129252, "step": 424, "batch_size": 64, "mean": 4.056219100952148, "std": 5.351885795593262, "min": -6.3142852783203125, "p10": -2.114935302734375, "median": 3.1420650482177734, "p90": 11.452562522888185, "max": 16.852783203125, "pos_frac": 0.75, "sample": [-2.1650924682617188, 10.199851989746094, 2.074920654296875, 7.91960334777832, 1.6554336547851562, 7.978239059448242, 16.852783203125, 5.738090515136719, -5.716911315917969, -6.3142852783203125, -2.1414031982421875, 3.328510284423828, 2.863981246948242, -4.498100280761719, 11.393331527709961, 2.425079345703125, 8.347892761230469, -2.0531768798828125, 0.781494140625, 0.6641616821289062, -0.05882453918457031, 7.763322830200195, 6.261909484863281, 1.9873733520507812, -0.6559486389160156, 5.1486358642578125, 12.945846557617188, -0.8004302978515625, 15.79437255859375, 2.750551223754883, 7.978050231933594, 4.823341369628906, -2.0425338745117188, 2.6294307708740234, 8.305644989013672, 0.2060394287109375, 3.783832550048828, 13.3494873046875, 7.802558898925781, 3.771270751953125, 1.5246353149414062, 5.2361602783203125, 2.3537826538085938, 12.449661254882812, 1.9202880859375, 15.408258438110352, 6.445274353027344, 0.3844318389892578, -2.9724197387695312, -0.20815086364746094, 7.933231353759766, 2.9556198120117188, 4.824310302734375, 9.620397567749023, -0.786651611328125, -0.6925277709960938, 5.6915740966796875, -4.098670959472656, 11.142303466796875, 11.477947235107422, 0.21014404296875, 3.9378128051757812, -0.48406219482421875, 4.246360778808594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000424.npy"}
{"epoch": 0.6409674981103552, "step": 425, "batch_size": 64, "mean": 3.610257625579834, "std": 4.993038177490234, "min": -6.283332824707031, "p10": -2.726511764526367, "median": 3.101522445678711, "p90": 10.002166748046879, "max": 16.007579803466797, "pos_frac": 0.78125, "sample": [8.515087127685547, 3.6188507080078125, 1.4704818725585938, 3.2177467346191406, 11.171287536621094, -0.7199802398681641, 14.076362609863281, 1.8035736083984375, 4.037696838378906, 3.211009979248047, 2.992034912109375, -3.6649627685546875, 5.459550857543945, 1.154632568359375, -2.412555694580078, 6.4967498779296875, -0.9032974243164062, 9.140510559082031, 1.1050796508789062, 3.455120086669922, -3.297353744506836, 5.7420654296875, 0.4318885803222656, 1.2589836120605469, 7.6489410400390625, 0.2142333984375, 2.7808074951171875, 12.81768798828125, 0.7105236053466797, 16.007579803466797, 8.440719604492188, -1.1627044677734375, 4.844673156738281, 1.5562934875488281, 5.294403076171875, 2.1803321838378906, -0.5614852905273438, 14.363407135009766, 6.260883331298828, 1.3297996520996094, 8.744178771972656, -6.283332824707031, 4.6261444091796875, 4.73712158203125, 3.6223068237304688, -0.33417701721191406, 4.27850341796875, 1.6917572021484375, 5.775230407714844, 2.657482147216797, 9.040386199951172, 2.509227752685547, -5.219961166381836, 9.328079223632812, -3.9613113403320312, 1.2624187469482422, 11.063362121582031, 6.706808090209961, -2.5181198120117188, 9.240089416503906, -2.8158226013183594, 2.4032135009765625, 10.291061401367188, -5.874824523925781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000425.npy"}
{"epoch": 0.6424792139077853, "step": 426, "batch_size": 64, "mean": 1.964627981185913, "std": 5.185563564300537, "min": -13.784133911132812, "p10": -3.280381774902344, "median": 1.6849117279052734, "p90": 8.282670974731447, "max": 14.70584487915039, "pos_frac": 0.65625, "sample": [0.902496337890625, 0.6171302795410156, 5.03729248046875, 8.823158264160156, 4.690895080566406, 7.6686553955078125, -1.427001953125, 14.70584487915039, 2.726318359375, 3.6647796630859375, -4.959495544433594, 0.7064361572265625, 3.004152297973633, -5.266532897949219, -0.8444061279296875, 2.898721694946289, 4.4954376220703125, -2.341583251953125, 2.7199230194091797, -1.9879302978515625, -3.2489471435546875, 3.071319580078125, -1.60626220703125, -0.0714569091796875, -3.293853759765625, 1.5705337524414062, 2.299928665161133, -13.784133911132812, 4.935047149658203, 3.3383255004882812, 6.1333160400390625, 9.717418670654297, 4.235929489135742, -0.4897022247314453, -8.55267333984375, 1.7992897033691406, -2.7385940551757812, 1.523397445678711, -0.18111419677734375, 7.865478515625, 13.429222106933594, 9.050643920898438, -6.973701477050781, -5.8773193359375, -2.078645706176758, 14.383453369140625, 1.2371673583984375, 1.142425537109375, -0.9912223815917969, 0.5586700439453125, 2.6740474700927734, 5.487762451171875, 1.2925262451171875, 7.63818359375, 3.2456436157226562, -3.0402297973632812, 4.938335418701172, 5.731178283691406, 0.29616546630859375, -0.104400634765625, 7.0486907958984375, 2.571044921875, 8.461467742919922, -2.7424583435058594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000426.npy"}
{"epoch": 0.6439909297052154, "step": 427, "batch_size": 64, "mean": 3.368882894515991, "std": 5.539160251617432, "min": -8.014789581298828, "p10": -3.2595756530761717, "median": 3.1857452392578125, "p90": 10.167301750183105, "max": 21.4169921875, "pos_frac": 0.71875, "sample": [5.730594635009766, 0.9510402679443359, -1.1125316619873047, -5.299957275390625, -1.620208740234375, 6.180997848510742, 7.883527755737305, -2.338428497314453, 9.453125, -3.6001815795898438, 11.583206176757812, -1.5377998352050781, -6.141595840454102, -3.41668701171875, 4.470977783203125, 4.9015045166015625, 2.974395751953125, 3.2864608764648438, 1.0739822387695312, 2.4678421020507812, -0.3826923370361328, 3.123523712158203, 9.46429443359375, 5.696613311767578, 2.5806198120117188, 6.434316635131836, 3.247966766357422, 7.611530303955078, 1.0475292205810547, 4.810760498046875, 10.104791641235352, 3.5366554260253906, 2.82525634765625, 2.5030746459960938, 4.266693115234375, 12.433734893798828, -0.9746475219726562, -8.014789581298828, 13.682327270507812, 8.361019134521484, 1.7747573852539062, 0.3571929931640625, 4.0801239013671875, -2.8929824829101562, -1.1997489929199219, -5.167707443237305, 6.61737060546875, 7.517364501953125, 2.158203125, -6.161037445068359, 10.464717864990234, 10.194091796875, 0.4138946533203125, 5.154640197753906, 4.668617248535156, 3.0113983154296875, 4.506340026855469, 4.456962585449219, 4.9786529541015625, -2.06072998046875, -0.9627571105957031, -2.0661773681640625, 21.4169921875, 16.0994873046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000427.npy"}
{"epoch": 0.6455026455026455, "step": 428, "batch_size": 64, "mean": 4.200922966003418, "std": 5.39163875579834, "min": -5.9671173095703125, "p10": -2.152132415771484, "median": 3.044011116027832, "p90": 11.505037879943849, "max": 17.099822998046875, "pos_frac": 0.75, "sample": [-5.9671173095703125, 9.210807800292969, 9.689140319824219, -1.3363018035888672, -0.06324958801269531, 2.5453414916992188, -2.673992156982422, -2.2841262817382812, -5.489219665527344, 2.1780929565429688, 12.81005859375, -2.287689208984375, 0.775360107421875, -0.9880752563476562, 8.911125183105469, 3.961334228515625, 14.868354797363281, 1.8007774353027344, -0.0125885009765625, 2.233123779296875, 6.4860076904296875, 3.126718521118164, 0.36786651611328125, 5.6863555908203125, 9.124900817871094, 3.8791427612304688, 3.191181182861328, -3.6453094482421875, 5.7968597412109375, -0.109375, 8.064483642578125, -2.4753036499023438, 1.2088127136230469, 4.201740264892578, 4.815269470214844, 16.065757751464844, 9.797882080078125, 11.315475463867188, 2.8414344787597656, -1.461984634399414, 8.1597900390625, 17.099822998046875, 7.1884765625, 10.132522583007812, 1.9748077392578125, -0.3553619384765625, 7.3278045654296875, 2.01287841796875, 1.4024124145507812, -1.844146728515625, 1.0301742553710938, 2.9613037109375, 0.32912635803222656, 0.01026153564453125, 11.586278915405273, 10.548612594604492, 13.362476348876953, 9.022428512573242, 6.840660095214844, 5.5183563232421875, 12.451690673828125, 6.931964874267578, 0.6264266967773438, -1.6187610626220703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000428.npy"}
{"epoch": 0.6470143613000756, "step": 429, "batch_size": 64, "mean": 3.6800642013549805, "std": 5.2918219566345215, "min": -8.816230773925781, "p10": -3.823564910888672, "median": 3.7396345138549805, "p90": 9.876776885986327, "max": 19.019264221191406, "pos_frac": 0.78125, "sample": [7.458335876464844, -4.783843994140625, 0.4013481140136719, 2.1813888549804688, 3.6371383666992188, 3.6303157806396484, 4.766841888427734, 9.732475280761719, -2.524188995361328, 2.1436309814453125, 2.8303985595703125, -3.8355560302734375, 2.111804962158203, 4.400917053222656, 1.2080154418945312, 4.230224609375, -8.816230773925781, 2.7704620361328125, 4.103607177734375, 4.778684616088867, 9.682937622070312, -0.434173583984375, 5.742454528808594, 5.082126617431641, 9.839942932128906, 12.649402618408203, 12.54829216003418, 4.866970062255859, 7.016258239746094, -4.603532791137695, -1.6917266845703125, -6.144954681396484, 0.02838897705078125, -3.7955856323242188, 8.765920639038086, 3.453723907470703, -1.4792938232421875, 9.704212188720703, -0.143341064453125, 1.0269889831542969, 7.436838150024414, 9.892562866210938, 2.120941162109375, 5.790010452270508, 12.460739135742188, -4.793144226074219, -4.9663543701171875, 2.1792545318603516, 2.4295692443847656, 6.2380828857421875, 1.8237495422363281, 11.815406799316406, 7.319343566894531, -2.883869171142578, 4.667072296142578, 4.144317626953125, 10.862274169921875, 2.1553173065185547, 19.019264221191406, 6.843925476074219, 1.9418487548828125, 3.842130661010742, 4.06085205078125, 8.58319091796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000429.npy"}
{"epoch": 0.6485260770975056, "step": 430, "batch_size": 64, "mean": 4.007453918457031, "std": 5.5173211097717285, "min": -9.733352661132812, "p10": -2.104048919677733, "median": 3.0390663146972656, "p90": 10.800631713867189, "max": 17.300334930419922, "pos_frac": 0.8125, "sample": [0.729736328125, -0.4050445556640625, 1.3031158447265625, 1.16668701171875, -3.6774215698242188, 9.209854125976562, 0.12865066528320312, 17.300334930419922, 1.6929874420166016, 3.9511642456054688, -3.9043006896972656, 10.934646606445312, 10.487930297851562, 3.2914981842041016, 13.120338439941406, 5.8114471435546875, 7.776939392089844, 0.383331298828125, 12.862442016601562, -0.20323944091796875, 0.8545074462890625, 16.96875, 2.8079376220703125, 1.6167984008789062, -0.5637588500976562, 14.062654495239258, 1.2079696655273438, 9.071067810058594, 6.657279968261719, 7.1297149658203125, 4.679515838623047, 2.5555877685546875, 4.161705017089844, 5.980560302734375, -2.663339614868164, 16.056224822998047, 9.908966064453125, 7.1130218505859375, -5.280372619628906, 8.795841217041016, 4.264347076416016, 7.176551818847656, 1.9547138214111328, 0.7162857055664062, 1.1042327880859375, 7.1649169921875, -1.0454559326171875, -0.04899787902832031, -9.733352661132812, -2.5577316284179688, 0.0644989013671875, 3.2701950073242188, 0.9885025024414062, 8.88232421875, 0.706878662109375, 8.374885559082031, 7.6774444580078125, 3.4417800903320312, 4.572803497314453, 7.72161865234375, 2.7015304565429688, -6.347288131713867, 0.99078369140625, 1.3538360595703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000430.npy"}
{"epoch": 0.6500377928949358, "step": 431, "batch_size": 64, "mean": 3.5732975006103516, "std": 5.2290239334106445, "min": -5.2077178955078125, "p10": -1.8829652786254882, "median": 2.7386465072631836, "p90": 10.257852172851562, "max": 19.900646209716797, "pos_frac": 0.734375, "sample": [-4.4757080078125, 6.429389953613281, 4.274440765380859, 0.07964897155761719, 0.3444976806640625, 7.2778167724609375, 9.4881591796875, 4.026527404785156, 5.5598907470703125, -1.4207305908203125, -0.3859672546386719, 16.230392456054688, 5.064502716064453, -0.6583900451660156, 2.5077953338623047, 3.2601146697998047, 14.675636291503906, -1.8850212097167969, 10.27752685546875, 3.737762451171875, 2.20025634765625, -4.623558044433594, 1.6895523071289062, 12.502737045288086, -5.08612060546875, 13.745075225830078, 10.211944580078125, -1.8781681060791016, 2.1380233764648438, -1.2055301666259766, 1.7276554107666016, 2.670808792114258, 4.5233154296875, 9.89837646484375, -1.9736175537109375, 0.1471099853515625, 11.59613037109375, -1.0008926391601562, -5.2077178955078125, 1.6759223937988281, 1.4784088134765625, -0.11009979248046875, -2.4854278564453125, 7.0027923583984375, 3.2581558227539062, 2.5117149353027344, 5.923540115356445, 3.647186279296875, 3.3357067108154297, 19.900646209716797, 0.5454254150390625, 2.8064842224121094, 1.590667724609375, 4.803302764892578, 4.454998016357422, 4.049480438232422, 0.25299072265625, -0.80108642578125, -0.08736038208007812, 9.538932800292969, 9.823314666748047, 4.031179428100586, 5.774147033691406, -0.713653564453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000431.npy"}
{"epoch": 0.6515495086923658, "step": 432, "batch_size": 64, "mean": 3.847005844116211, "std": 5.4685163497924805, "min": -10.047988891601562, "p10": -1.769528579711914, "median": 3.2542343139648438, "p90": 10.903890800476075, "max": 16.322608947753906, "pos_frac": 0.703125, "sample": [-0.43704986572265625, -2.6498870849609375, 0.136077880859375, -10.047988891601562, 0.8244533538818359, 1.8075008392333984, -1.598297119140625, 3.987152099609375, -3.4145278930664062, 10.982576370239258, 3.5164051055908203, -6.5190887451171875, -1.08770751953125, 2.2011146545410156, 9.337234497070312, -5.685455322265625, 6.540992736816406, -0.450592041015625, 13.224014282226562, 12.65350341796875, -1.0932769775390625, 7.410133361816406, 10.042510986328125, 3.3921051025390625, 0.5217056274414062, 10.189079284667969, 3.116363525390625, 10.720291137695312, 7.26226806640625, -0.696746826171875, -1.6543083190917969, 0.7810001373291016, 1.0928936004638672, -1.81890869140625, 6.315277099609375, 3.5132217407226562, 11.109733581542969, 8.014266967773438, 2.0408935546875, 7.727272033691406, -2.231151580810547, 5.501354217529297, 7.8256988525390625, 0.43415069580078125, 8.370330810546875, -0.0238037109375, -0.3538398742675781, 11.44017219543457, 0.8171272277832031, 15.122871398925781, 9.132726669311523, 5.0847320556640625, 8.128860473632812, 4.1939239501953125, 6.591522216796875, 16.322608947753906, 0.32489776611328125, 8.229015350341797, 8.237030029296875, -0.5050468444824219, -0.1023406982421875, 10.274368286132812, -0.4078216552734375, 2.494762420654297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000432.npy"}
{"epoch": 0.6530612244897959, "step": 433, "batch_size": 64, "mean": 4.640135288238525, "std": 6.04095458984375, "min": -7.93731689453125, "p10": -2.2432710647583005, "median": 3.336484909057617, "p90": 13.283320617675784, "max": 18.85723304748535, "pos_frac": 0.765625, "sample": [2.111164093017578, 8.476329803466797, -1.1542625427246094, 3.196300506591797, 5.335243225097656, 1.9140472412109375, 0.7334213256835938, 11.961395263671875, -1.3854522705078125, 3.1793289184570312, 2.1763839721679688, 3.0163116455078125, 1.7156524658203125, -3.7434158325195312, 14.082023620605469, -5.151630401611328, 3.860950469970703, 10.678504943847656, 7.186613082885742, -2.853118896484375, 8.907958984375, 0.81634521484375, 2.4652442932128906, 6.886068344116211, -7.93731689453125, 1.456207275390625, 9.957664489746094, 0.2643623352050781, 8.415042877197266, 9.427661895751953, 1.1982612609863281, 6.239219665527344, -2.303354263305664, 4.8575897216796875, 5.931911468505859, -0.11309051513671875, -2.546344757080078, 3.5087509155273438, 3.4766693115234375, 18.85723304748535, 14.380447387695312, 11.919219970703125, 14.353858947753906, 1.3188400268554688, 8.474174499511719, -7.221183776855469, -0.6755962371826172, 8.950164794921875, -0.06352996826171875, 13.59969711303711, 0.32918548583984375, 5.5853424072265625, 5.5896453857421875, -0.41892242431640625, 0.0558013916015625, 12.545108795166016, 7.572076797485352, -0.0091400146484375, 11.137451171875, 11.7421875, 16.41596221923828, 1.559326171875, 16.829750061035156, -2.103076934814453], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000433.npy"}
{"epoch": 0.654572940287226, "step": 434, "batch_size": 64, "mean": 4.452031135559082, "std": 6.1953606605529785, "min": -10.844451904296875, "p10": -3.4959043502807616, "median": 4.4333953857421875, "p90": 12.896005630493168, "max": 20.497150421142578, "pos_frac": 0.8125, "sample": [-9.949615478515625, 4.543037414550781, 8.323640823364258, 0.2740325927734375, 7.406242370605469, 11.94320297241211, -3.4753284454345703, 7.960594177246094, -4.4226837158203125, 4.0544586181640625, 1.0184917449951172, -4.873100280761719, -10.844451904296875, 6.9418182373046875, 6.228691101074219, 0.365692138671875, 7.916027069091797, 5.324750900268555, 13.18353271484375, 14.215839385986328, 4.323753356933594, -0.5005397796630859, 13.19952392578125, 1.482177734375, -7.424184799194336, 7.686622619628906, 13.46397590637207, 6.295310974121094, 2.2445640563964844, 5.404508590698242, -2.668842315673828, 8.789020538330078, 3.9674606323242188, 20.497150421142578, 15.272247314453125, 7.905055999755859, -3.5047225952148438, 5.598808288574219, 8.570114135742188, 0.1904754638671875, 12.225109100341797, -2.5251235961914062, 3.9995288848876953, 5.833080291748047, 3.7594680786132812, 2.0518569946289062, 2.079059600830078, 9.526390075683594, 8.764572143554688, 7.048152923583984, 0.2617168426513672, 14.702804565429688, -4.956134796142578, 0.33184242248535156, 1.29803466796875, 4.088897705078125, 7.892040252685547, 11.037689208984375, 3.124887466430664, -3.1812782287597656, 4.281925201416016, 5.428230285644531, 8.312440872192383, 2.617450714111328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000434.npy"}
{"epoch": 0.656084656084656, "step": 435, "batch_size": 64, "mean": 3.7065377235412598, "std": 5.3393754959106445, "min": -11.005455017089844, "p10": -2.4146783828735345, "median": 3.2033987045288086, "p90": 10.844041442871095, "max": 15.736217498779297, "pos_frac": 0.78125, "sample": [12.601812362670898, 1.263540267944336, -0.3633308410644531, 1.815948486328125, -4.16157341003418, 2.2685089111328125, 2.673797607421875, 0.8349266052246094, 9.77878189086914, -0.574859619140625, 4.852382659912109, 9.304496765136719, -0.9232940673828125, 6.442615509033203, 10.56230354309082, 10.563068389892578, 3.0492782592773438, -5.8617095947265625, -0.02535247802734375, 12.810829162597656, 5.996238708496094, 1.8114585876464844, 8.084278106689453, 13.033027648925781, 0.7302398681640625, 3.3785476684570312, -1.1978931427001953, 3.117889404296875, 10.964458465576172, 9.225242614746094, 1.3909378051757812, 8.091629028320312, -7.159555435180664, 3.841550827026367, 2.9052276611328125, 6.92498779296875, -1.1151657104492188, 2.685638427734375, 5.358953475952148, 5.0557861328125, 7.1589508056640625, 12.319671630859375, 15.736217498779297, 6.2259979248046875, 0.11774635314941406, 3.288908004760742, 1.9750747680664062, -1.6576290130615234, -2.7391281127929688, 12.095916748046875, -4.689113616943359, 3.642608642578125, 0.1623210906982422, 3.5058822631835938, 4.971912384033203, 3.445842742919922, 9.601455688476562, -3.1306724548339844, -11.005455017089844, 2.8527679443359375, 0.11314010620117188, 3.42547607421875, 9.31817626953125, 0.4467010498046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000435.npy"}
{"epoch": 0.6575963718820862, "step": 436, "batch_size": 64, "mean": 3.728400707244873, "std": 5.438070774078369, "min": -6.961889266967773, "p10": -3.151042938232422, "median": 3.824761390686035, "p90": 11.587465476989747, "max": 15.701187133789062, "pos_frac": 0.75, "sample": [3.1862640380859375, 1.2883129119873047, 12.332504272460938, 0.690216064453125, 3.3243255615234375, 1.0381965637207031, 3.869997024536133, 9.147865295410156, -3.7254638671875, 4.3561248779296875, 4.797119140625, 11.667352676391602, 2.2926559448242188, 8.47735595703125, 2.276500701904297, 12.540058135986328, 15.701187133789062, 12.375404357910156, -6.214752197265625, 7.8377532958984375, 5.832813262939453, 4.300630569458008, -3.9644775390625, 0.5415496826171875, 3.7795257568359375, -1.5118179321289062, 15.514312744140625, 1.1533050537109375, 7.549839019775391, 7.106647491455078, 4.430347442626953, -1.201995849609375, -6.961889266967773, 0.111175537109375, -0.8404922485351562, 3.35260009765625, 5.069694519042969, 0.6540126800537109, -1.7516021728515625, -1.2691802978515625, 6.6316680908203125, 5.112113952636719, 4.680141448974609, -3.1782684326171875, -2.830322265625, 3.5773868560791016, -3.470947265625, 15.067733764648438, 5.8687896728515625, 9.9033203125, 6.527549743652344, 1.274911880493164, 7.339256286621094, 5.863410949707031, -1.304840087890625, 4.922496795654297, 10.516029357910156, -3.0875167846679688, 4.3289642333984375, 0.37098121643066406, -5.3766021728515625, 11.40106201171875, -2.989622116088867, 8.315986633300781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000436.npy"}
{"epoch": 0.6591080876795162, "step": 437, "batch_size": 64, "mean": 3.5760719776153564, "std": 6.47239875793457, "min": -13.666015625, "p10": -3.7119132995605466, "median": 2.818655014038086, "p90": 11.821282386779785, "max": 21.662872314453125, "pos_frac": 0.703125, "sample": [14.23492431640625, 11.94818115234375, 7.011754989624023, 3.2207412719726562, 3.13885498046875, -3.1647911071777344, 11.563766479492188, 7.737800598144531, 21.662872314453125, 1.7295341491699219, 7.586204528808594, 1.235626220703125, -13.666015625, 5.779472351074219, -3.3524017333984375, -2.649932861328125, 4.6176910400390625, 3.947582244873047, 2.704425811767578, -3.8936996459960938, 15.411832809448242, 16.01634979248047, 11.816295623779297, -2.1265907287597656, -3.1099205017089844, 5.8401947021484375, 7.420501708984375, -3.915576934814453, -6.632537841796875, -5.092613220214844, -3.0753631591796875, -2.228292465209961, -0.9376602172851562, -0.5573654174804688, 2.3653182983398438, 13.046518325805664, 6.393165588378906, -3.2864456176757812, 3.0597763061523438, 2.5805130004882812, 2.7300033569335938, 0.5562210083007812, -1.4572563171386719, 8.98876953125, 7.689666748046875, 11.823419570922852, 0.353973388671875, 2.3954410552978516, 11.145790100097656, -3.8659896850585938, 2.37127685546875, 10.893539428710938, 4.692665100097656, 10.981903076171875, 5.9581451416015625, 2.907306671142578, 5.54852294921875, 6.7332000732421875, -7.425201416015625, 0.4970436096191406, -0.4194660186767578, 0.1069488525390625, 1.7851486206054688, 9.496826171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000437.npy"}
{"epoch": 0.6606198034769464, "step": 438, "batch_size": 64, "mean": 4.400524616241455, "std": 5.594223976135254, "min": -7.878145217895508, "p10": -2.067102432250976, "median": 3.893461227416992, "p90": 11.372162628173829, "max": 17.090606689453125, "pos_frac": 0.78125, "sample": [15.366409301757812, -2.2479934692382812, 17.090606689453125, -6.002719879150391, 0.4545936584472656, -1.6450233459472656, -0.40636634826660156, 5.00016975402832, 1.245168685913086, 10.156997680664062, 3.8089981079101562, 14.437019348144531, -1.4887237548828125, -0.8750152587890625, 7.058719635009766, 0.3694477081298828, 8.89838981628418, -7.878145217895508, 6.8086700439453125, 3.8456077575683594, 10.827983856201172, 7.3038482666015625, -2.490631103515625, 6.208526611328125, 6.8863677978515625, 7.193906784057617, 4.329261779785156, 1.0404930114746094, 12.747028350830078, 10.893913269042969, 6.471893310546875, 1.1632080078125, 10.768241882324219, 2.3177242279052734, 0.5799827575683594, 10.686203002929688, 5.619331359863281, -4.388113021850586, 4.632053375244141, 7.379331588745117, 11.376157760620117, 0.342987060546875, 2.9618473052978516, 12.613235473632812, 8.997283935546875, 3.941314697265625, 9.151588439941406, 1.5450935363769531, 0.15521240234375, 2.2499923706054688, -0.13232040405273438, -0.3188610076904297, 3.825510025024414, 11.280872344970703, 4.476043701171875, -4.803623199462891, 13.898979187011719, 1.6022415161132812, -4.1259765625, 11.36284065246582, 2.113971710205078, -1.2729682922363281, 4.325191497802734, 1.8995857238769531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000438.npy"}
{"epoch": 0.6621315192743764, "step": 439, "batch_size": 64, "mean": 3.913849353790283, "std": 5.248615264892578, "min": -9.95330810546875, "p10": -2.111870956420898, "median": 3.5733795166015625, "p90": 11.308341217041017, "max": 14.884208679199219, "pos_frac": 0.78125, "sample": [4.194812774658203, -0.16589736938476562, 2.2376041412353516, -6.151336669921875, 6.954275131225586, 5.276885986328125, 2.817354202270508, 3.286865234375, 3.903606414794922, 8.638946533203125, 14.884208679199219, -0.30844688415527344, 7.67888069152832, 2.4413681030273438, 1.9047088623046875, 9.304473876953125, 13.367431640625, 0.59051513671875, 6.51409912109375, -1.911590576171875, 1.3509254455566406, 7.030179977416992, 6.047065734863281, -3.024656295776367, 8.438461303710938, 3.3460159301757812, 13.6553955078125, -3.890972137451172, 0.7535991668701172, 13.044727325439453, -2.049335479736328, 11.390800476074219, -1.1332626342773438, -2.138671875, 6.847938537597656, 6.3559112548828125, 0.5830345153808594, 2.725109100341797, -9.95330810546875, -0.04689788818359375, -5.88397216796875, 4.7478179931640625, 11.115936279296875, 14.66473388671875, 5.439023971557617, 8.6484375, 3.8017425537109375, 11.954818725585938, 1.032989501953125, 3.207242965698242, 3.8048133850097656, 4.49310302734375, 2.7482986450195312, 6.8462066650390625, 4.586843490600586, 10.826461791992188, 2.10821533203125, 1.44781494140625, -5.304985046386719, -0.634368896484375, 3.4065589904785156, 7.1065826416015625, 3.7402000427246094, 1.7910232543945312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000439.npy"}
{"epoch": 0.6636432350718064, "step": 440, "batch_size": 64, "mean": 4.751244068145752, "std": 5.64109468460083, "min": -6.523672103881836, "p10": -1.4213565826416013, "median": 3.881566047668457, "p90": 13.410659790039064, "max": 16.418434143066406, "pos_frac": 0.8125, "sample": [-1.9181842803955078, 5.5092926025390625, 9.118675231933594, 13.505325317382812, 14.929595947265625, 15.717941284179688, 16.418434143066406, -0.44103431701660156, 13.189773559570312, -1.0725555419921875, -0.49698638916015625, 14.879533767700195, -0.10587310791015625, 12.186790466308594, 1.2492332458496094, 2.656646728515625, 8.882974624633789, -0.6155166625976562, -2.2080039978027344, 2.1463356018066406, 5.0689544677734375, 0.005710601806640625, 1.1349716186523438, 7.870750427246094, 6.774566650390625, 6.800607681274414, 4.473976135253906, 8.279067993164062, 0.9642925262451172, 2.2792587280273438, 2.0923423767089844, 2.8759307861328125, 4.105113983154297, 0.6324348449707031, 0.4776763916015625, 2.2795944213867188, 3.788198471069336, -6.523672103881836, 4.040733337402344, 12.328714370727539, 6.557636260986328, 9.927120208740234, 3.3441162109375, 10.142373085021973, -5.399837493896484, 2.0657501220703125, 14.694046020507812, 3.974933624267578, 7.2818603515625, 0.5415267944335938, 0.12933349609375, -3.5944595336914062, 8.202461242675781, 0.5344982147216797, 4.669584274291992, 0.8272380828857422, -1.6007728576660156, -1.5708427429199219, 9.86484146118164, 0.09012603759765625, 4.191253662109375, 16.095333099365234, 12.198995590209961, 7.6308746337890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000440.npy"}
{"epoch": 0.6651549508692366, "step": 441, "batch_size": 64, "mean": 4.451216220855713, "std": 4.860274791717529, "min": -7.4354248046875, "p10": -0.9415725708007812, "median": 3.4956016540527344, "p90": 12.17490177154541, "max": 15.736797332763672, "pos_frac": 0.828125, "sample": [6.432058334350586, 6.745046615600586, 13.150447845458984, 7.913932800292969, 0.9751358032226562, -0.7500724792480469, 9.71585464477539, 2.4056549072265625, 4.237823486328125, 0.6962966918945312, -2.9698753356933594, 6.661323547363281, 1.97515869140625, 13.83078384399414, 1.5236129760742188, 1.524810791015625, 5.316902160644531, 2.4650497436523438, -7.4354248046875, 14.018875122070312, 2.3134765625, 7.1142578125, 3.874176025390625, 11.121528625488281, 2.3781509399414062, 15.116323471069336, 2.627246856689453, 2.386016845703125, 15.736797332763672, -0.989715576171875, -0.8292388916015625, 3.198944091796875, -0.38743019104003906, 7.5340728759765625, 3.5009307861328125, -1.3888282775878906, -0.4085235595703125, 10.384267807006836, 11.90526008605957, 2.8507118225097656, 6.222869873046875, 8.674636840820312, 1.8150405883789062, 4.0364227294921875, 3.964752197265625, 3.746368408203125, 6.298061370849609, 1.6751632690429688, 3.0771942138671875, -1.6135978698730469, 8.142425537109375, 0.5545749664306641, 12.290462493896484, 13.17694091796875, -2.6201648712158203, 4.444145202636719, 2.9541854858398438, 3.4902725219726562, 3.43798828125, 3.605712890625, -3.0658416748046875, 6.6025390625, 5.421590805053711, 2.0742740631103516], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000441.npy"}
{"epoch": 0.6666666666666666, "step": 442, "batch_size": 64, "mean": 3.7578186988830566, "std": 5.494144439697266, "min": -7.640922546386719, "p10": -2.9085298538208004, "median": 3.4836807250976562, "p90": 10.122731399536134, "max": 18.77466583251953, "pos_frac": 0.75, "sample": [1.8769149780273438, 7.7647705078125, 4.717193603515625, -1.701507568359375, 1.3828048706054688, 18.77466583251953, 13.909820556640625, 9.196277618408203, 4.093208312988281, 7.562553405761719, 4.748912811279297, 9.417160034179688, 11.784820556640625, -3.331817626953125, 5.104656219482422, 5.709211349487305, -0.06896209716796875, 0.9461441040039062, 0.9669189453125, -3.1008358001708984, -0.34979248046875, 8.7471923828125, 4.7623138427734375, 0.9560165405273438, 7.1582489013671875, 7.212272644042969, -7.640922546386719, 5.266086578369141, -0.7966747283935547, 10.850791931152344, 3.4227561950683594, 2.6013946533203125, 5.1870574951171875, 8.035957336425781, 5.8087615966796875, 0.6338596343994141, -2.4598159790039062, 1.455392837524414, 18.32454490661621, 10.314842224121094, 0.9619560241699219, -1.45172119140625, 1.4994564056396484, 3.9462509155273438, -1.6574058532714844, -4.944332122802734, -3.615570068359375, 13.7877197265625, -6.0136260986328125, 0.8177947998046875, 9.3704833984375, 6.740119934082031, -6.057838439941406, 3.3270721435546875, 1.018829345703125, -1.0105323791503906, 3.544605255126953, 1.4272079467773438, -0.2383575439453125, 9.67447280883789, 6.5060577392578125, 8.106403350830078, 1.3664321899414062, 4.151708602905273], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000442.npy"}
{"epoch": 0.6681783824640968, "step": 443, "batch_size": 64, "mean": 4.921362400054932, "std": 6.207025527954102, "min": -9.759498596191406, "p10": -1.46353530883789, "median": 3.660310745239258, "p90": 13.347703552246097, "max": 17.836227416992188, "pos_frac": 0.84375, "sample": [0.038051605224609375, 10.033866882324219, 5.3978729248046875, 11.57476806640625, 1.891845703125, 8.432586669921875, 6.489776611328125, -5.698574066162109, 2.185211181640625, 1.6941986083984375, 0.06787872314453125, -2.4607467651367188, 2.511089324951172, 4.039955139160156, 1.7480964660644531, 8.567298889160156, -0.0051136016845703125, 5.5410919189453125, 3.9920272827148438, 9.26068115234375, 15.891571044921875, -9.715572357177734, -2.9912796020507812, 9.649147033691406, 0.7157669067382812, 16.182815551757812, 15.19970703125, 2.4502201080322266, 17.836227416992188, 12.168840408325195, 7.836322784423828, 2.740612030029297, 5.286725997924805, 7.955501556396484, 8.560455322265625, 12.355712890625, 9.902101516723633, 11.319221496582031, 10.312355041503906, 9.865585327148438, 2.3486671447753906, 7.883907318115234, 0.27747344970703125, 6.634620666503906, 14.323909759521484, 11.170032501220703, 0.10128021240234375, 3.328594207763672, 2.7226715087890625, -0.29276466369628906, -0.7903556823730469, -5.971290588378906, 2.18585205078125, 2.7245254516601562, 0.4167442321777344, 1.6109466552734375, -9.759498596191406, 2.4290313720703125, 13.772842407226562, 4.640192031860352, 0.12042999267578125, -1.7520408630371094, 0.712890625, 17.304641723632812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000443.npy"}
{"epoch": 0.6696900982615268, "step": 444, "batch_size": 64, "mean": 1.9520694017410278, "std": 5.443874835968018, "min": -9.194896697998047, "p10": -4.847284317016601, "median": 1.2057628631591797, "p90": 9.274267578125, "max": 13.622467041015625, "pos_frac": 0.65625, "sample": [11.047796249389648, 1.0455589294433594, -1.810882568359375, 0.09273719787597656, 1.9468994140625, 5.390010833740234, 6.0393829345703125, -5.1392059326171875, -3.292926788330078, 9.668716430664062, -0.25406646728515625, 10.012825012207031, -3.2161407470703125, -8.123237609863281, 0.2913818359375, 6.0388946533203125, 8.984743118286133, 2.862651824951172, 2.0175018310546875, -6.0119476318359375, 8.92633056640625, 8.776580810546875, 0.7680721282958984, 0.9397964477539062, -3.68817138671875, 0.00713348388671875, 0.6384925842285156, -1.3585090637207031, -3.4869155883789062, -3.3989410400390625, 9.34417724609375, 1.6289787292480469, 6.676666259765625, 1.365966796875, 2.791656494140625, 8.440425872802734, 0.9658355712890625, 7.318752288818359, -0.5322608947753906, -9.194896697998047, 2.6505203247070312, 13.622467041015625, -8.350082397460938, -4.166133880615234, 0.36182212829589844, 8.07879638671875, 3.69708251953125, 11.431427001953125, 10.828235626220703, 1.4540252685546875, 3.7247867584228516, 4.3395843505859375, 0.4306983947753906, -0.37543487548828125, 8.128547668457031, -1.3408164978027344, -2.486034393310547, 2.1297054290771484, 3.0435333251953125, -5.55926513671875, -8.574600219726562, 9.11114501953125, -1.4053516387939453, -0.36208152770996094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000444.npy"}
{"epoch": 0.671201814058957, "step": 445, "batch_size": 64, "mean": 4.423814296722412, "std": 5.549435138702393, "min": -6.85198974609375, "p10": -3.390853691101073, "median": 3.6296558380126953, "p90": 12.074411582946778, "max": 16.553565979003906, "pos_frac": 0.765625, "sample": [6.388927459716797, 14.627513885498047, 4.165473937988281, -0.7521820068359375, -3.9293212890625, 5.993675231933594, 11.37917709350586, 2.657958984375, 5.8577117919921875, 3.451160430908203, -0.6563796997070312, -4.198076248168945, -0.2033233642578125, 2.1100006103515625, -4.9416351318359375, 12.006235122680664, -3.7571258544921875, 0.8923263549804688, 1.367095947265625, 3.5700950622558594, 0.6168174743652344, 11.827781677246094, -6.85198974609375, 2.1673812866210938, -5.983985900878906, 9.195602416992188, 3.2292556762695312, 11.296932220458984, 2.7665023803710938, -0.13381195068359375, -4.904285430908203, -2.5362186431884766, 4.020696640014648, 12.914283752441406, 16.553565979003906, 6.584747314453125, 8.525520324707031, 12.92399787902832, 2.8787765502929688, -0.9634246826171875, 8.24710464477539, 7.1314544677734375, 3.5527877807617188, 12.35791015625, 6.862829208374023, 9.490348815917969, 11.129493713378906, 1.147216796875, 7.925840377807617, -1.535074234008789, 4.368412017822266, 2.1567001342773438, -1.0036773681640625, 2.0245590209960938, 7.204097747802734, 6.7242889404296875, 12.103630065917969, 9.164674758911133, 1.0161800384521484, 3.6892166137695312, 5.741081237792969, 7.912384033203125, 13.20186996459961, 2.35333251953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000445.npy"}
{"epoch": 0.672713529856387, "step": 446, "batch_size": 64, "mean": 5.587871551513672, "std": 5.209455490112305, "min": -3.991212844848633, "p10": -0.6627424240112303, "median": 4.998201370239258, "p90": 12.268514442443848, "max": 22.0445556640625, "pos_frac": 0.859375, "sample": [0.4075794219970703, -0.48503875732421875, -3.991212844848633, 5.418060302734375, 2.9677085876464844, 8.150619506835938, 2.8645057678222656, -3.7187728881835938, 22.0445556640625, 10.488279342651367, 5.248832702636719, 3.7917098999023438, 8.0904541015625, 8.405757904052734, 8.440032958984375, 14.462440490722656, 4.410514831542969, 6.192867279052734, 11.108558654785156, 4.702735900878906, 0.5755119323730469, 8.914680480957031, 1.425262451171875, 7.607818603515625, 12.76161003112793, 6.955591201782227, 1.5914115905761719, 10.202411651611328, 11.701080322265625, -1.778594970703125, 8.658891677856445, 2.224264144897461, 0.2836761474609375, 1.1770095825195312, -0.4670219421386719, -0.7389011383056641, 1.9066696166992188, -3.4955825805664062, 7.631072998046875, 4.033538818359375, 4.28521728515625, 16.074813842773438, 12.067806243896484, 4.052473068237305, 14.27117919921875, 2.113454818725586, 6.0367584228515625, 12.354532241821289, -1.1407184600830078, 4.992168426513672, 7.804695129394531, 1.6316795349121094, 6.0302581787109375, 9.233467102050781, 10.309793472290039, 14.735820770263672, 3.3246498107910156, 5.004234313964844, 2.8001632690429688, 9.530120849609375, -0.9548759460449219, 3.1220932006835938, 4.495399475097656, 5.277992248535156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000446.npy"}
{"epoch": 0.674225245653817, "step": 447, "batch_size": 64, "mean": 5.477807521820068, "std": 6.07163667678833, "min": -16.353885650634766, "p10": -1.7696388244628904, "median": 5.521266937255859, "p90": 13.130583190917969, "max": 19.403182983398438, "pos_frac": 0.84375, "sample": [13.839401245117188, 9.936988830566406, 7.206821441650391, 1.5292129516601562, 0.6724395751953125, -1.5456390380859375, 5.891395568847656, -1.8656387329101562, -6.052558898925781, 0.6972198486328125, 3.704021453857422, 4.655189514160156, 3.9943008422851562, 8.42425537109375, 8.153724670410156, 16.488685607910156, 3.1228504180908203, 7.422420501708984, 5.214437484741211, 5.092647552490234, 4.705108642578125, -3.3884506225585938, 5.2546844482421875, -3.4462814331054688, 2.2009124755859375, 11.309951782226562, 13.09173583984375, 11.161712646484375, 7.77313232421875, 4.73455810546875, 8.11224365234375, 9.772653579711914, 4.240810394287109, 8.8583984375, 7.158466339111328, 6.211883544921875, 16.795547485351562, 4.6927947998046875, 14.301877975463867, -0.5422573089599609, 4.725090026855469, 4.414154052734375, 13.147232055664062, -1.1796340942382812, 1.1912689208984375, -2.82000732421875, 1.4176712036132812, 14.308082580566406, 7.90875244140625, 6.769893646240234, 0.0645751953125, 10.904119491577148, -3.136089324951172, 12.24066162109375, 7.0744171142578125, 6.65997314453125, 5.787849426269531, -16.353885650634766, 2.9795761108398438, 19.403182983398438, 12.8172607421875, 0.621795654296875, 6.2028961181640625, 5.849180221557617], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000447.npy"}
{"epoch": 0.6757369614512472, "step": 448, "batch_size": 64, "mean": 4.395143508911133, "std": 5.65421199798584, "min": -11.244327545166016, "p10": -1.8078485488891598, "median": 4.178668975830078, "p90": 11.589239883422852, "max": 18.852083206176758, "pos_frac": 0.859375, "sample": [9.269996643066406, -7.113527297973633, 11.67409896850586, 1.0035858154296875, 6.20611572265625, 2.803844451904297, -0.2685508728027344, 6.804412841796875, 4.178489685058594, 0.32245635986328125, 9.51483154296875, 7.0799713134765625, 1.683929443359375, 3.1438217163085938, 4.269584655761719, 11.3912353515625, -1.9212303161621094, 13.381694793701172, 8.472450256347656, 0.3450050354003906, 1.9767913818359375, 18.852083206176758, 4.897041320800781, 6.1622161865234375, 3.1939697265625, 0.103759765625, 9.019586563110352, 3.711322784423828, 6.412506103515625, 4.1788482666015625, -3.3277111053466797, 0.6727752685546875, 4.942169189453125, 2.5623703002929688, 1.78875732421875, 6.5923614501953125, -3.2323226928710938, 5.530603408813477, 16.15533447265625, -3.006053924560547, 5.311407089233398, -11.244327545166016, 5.116020202636719, 0.668487548828125, 3.8443603515625, 8.528297424316406, 4.34320068359375, 10.285507202148438, 5.295778274536133, 0.5505218505859375, -1.5432910919189453, 1.5618133544921875, 1.92205810546875, 10.111320495605469, 0.0048542022705078125, 12.157196044921875, 1.14544677734375, -6.341011047363281, 12.70114517211914, 6.384101867675781, 9.974655151367188, 2.1529006958007812, 0.8173828125, 18.11273956298828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000448.npy"}
{"epoch": 0.6772486772486772, "step": 449, "batch_size": 64, "mean": 2.4102249145507812, "std": 4.724206447601318, "min": -7.323371887207031, "p10": -3.2259675979614246, "median": 2.2837705612182617, "p90": 7.43211898803711, "max": 17.799400329589844, "pos_frac": 0.671875, "sample": [-0.1875782012939453, 0.2103271484375, -5.9839019775390625, 6.9831085205078125, -1.01336669921875, -1.9440078735351562, 1.6748809814453125, 2.4082565307617188, -4.066020965576172, 7.659889221191406, 5.3015899658203125, 3.8347320556640625, 3.74127197265625, -1.3616600036621094, -0.141265869140625, 9.691162109375, 17.799400329589844, 3.2098522186279297, -0.36383819580078125, 3.6292877197265625, 1.8860282897949219, 3.4733715057373047, 12.381912231445312, 13.637401580810547, -4.088842391967773, 2.7294349670410156, 6.028404235839844, 1.8359375, 3.6834793090820312, 1.5536746978759766, 2.2342071533203125, -0.9315223693847656, 3.4397964477539062, -7.075839996337891, 7.341270446777344, 3.1652069091796875, 10.95947265625, -1.0255355834960938, 3.335803985595703, 5.295562744140625, 1.4314346313476562, -1.7659759521484375, -1.0487518310546875, 5.649406433105469, -1.265462875366211, 5.248889923095703, -2.138021469116211, 6.564666748046875, 2.333333969116211, 3.3555221557617188, 7.4710540771484375, -1.5988883972167969, 0.18862152099609375, 1.3336906433105469, -3.692230224609375, 3.5601806640625, 6.7742919921875, -7.323371887207031, 7.033542633056641, 0.7779769897460938, 4.341854095458984, -4.3368377685546875, 1.0828495025634766, -0.66473388671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000449.npy"}
{"epoch": 0.6787603930461074, "step": 450, "batch_size": 64, "mean": 3.461467981338501, "std": 5.58323335647583, "min": -8.74081802368164, "p10": -2.699443435668945, "median": 2.797793388366699, "p90": 11.040406799316406, "max": 18.784561157226562, "pos_frac": 0.71875, "sample": [0.30154991149902344, -0.4948997497558594, -1.8822479248046875, -0.521331787109375, 11.02691650390625, 11.542343139648438, 4.252052307128906, 1.4501190185546875, 9.371074676513672, 16.377826690673828, 6.261009216308594, 3.2912826538085938, 8.717864990234375, 1.9495010375976562, 0.2666587829589844, 13.253807067871094, -0.1029205322265625, 2.881298065185547, 6.9265289306640625, 18.784561157226562, -2.144367218017578, 4.621341705322266, 1.5332527160644531, -2.378936767578125, -1.9629364013671875, -2.836803436279297, 8.808547973632812, 11.046188354492188, 4.427978515625, -4.574460983276367, 2.5531463623046875, 9.302001953125, 2.617645263671875, 6.311367034912109, -8.74081802368164, 10.920249938964844, 7.172355651855469, -0.22077560424804688, 9.815155029296875, 3.369342803955078, 3.478221893310547, 5.7717742919921875, 2.7142887115478516, -7.463859558105469, 5.475044250488281, 1.7805347442626953, 4.494384765625, 2.041107177734375, 3.1751708984375, -2.0515365600585938, 0.013071060180664062, -4.86236572265625, 1.4881782531738281, 4.158794403076172, 11.803421020507812, 3.90478515625, -3.6147918701171875, 8.230077743530273, 1.0107421875, 11.794910430908203, -1.6213340759277344, 2.0242156982421875, -5.048948287963867, -0.45439910888671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000450.npy"}
{"epoch": 0.6802721088435374, "step": 451, "batch_size": 64, "mean": 3.3795528411865234, "std": 5.622935771942139, "min": -9.535987854003906, "p10": -2.3547622680664064, "median": 3.4311351776123047, "p90": 9.997018432617189, "max": 17.80975341796875, "pos_frac": 0.734375, "sample": [9.719970703125, 1.882110595703125, 5.745170593261719, 3.8469085693359375, 8.944290161132812, 1.3116455078125, -7.752826690673828, 4.4166259765625, 2.734262466430664, 10.340545654296875, 0.7216033935546875, 5.59014892578125, 9.217178344726562, 1.2270355224609375, 5.600635528564453, 3.2332000732421875, 4.358543395996094, 5.9165802001953125, 1.3852596282958984, 8.609928131103516, 1.4044742584228516, -9.535987854003906, -2.3570556640625, 17.80975341796875, 7.143243789672852, -5.8580322265625, 5.4791259765625, -7.137481689453125, -5.9900970458984375, -0.03659629821777344, 4.183218002319336, 10.115753173828125, -2.3494110107421875, 1.3613052368164062, 5.162191390991211, 3.05755615234375, 13.0390625, 8.925529479980469, 1.4167861938476562, 4.285163879394531, -0.8836574554443359, 8.896621704101562, 0.8745670318603516, -0.3290557861328125, 4.098371505737305, -1.269683837890625, 3.629070281982422, 13.599067687988281, 14.082595825195312, 4.030845642089844, 4.934722900390625, 0.4047393798828125, 4.618110656738281, -2.2948875427246094, -1.5226707458496094, -0.46485137939453125, 9.56971549987793, 0.9368324279785156, -1.5042667388916016, 5.822467803955078, 16.751951217651367, -3.3024520874023438, -2.3315505981445312, 0.7774505615234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000451.npy"}
{"epoch": 0.6817838246409675, "step": 452, "batch_size": 64, "mean": 4.123050212860107, "std": 5.872638702392578, "min": -10.48883056640625, "p10": -2.495762252807617, "median": 4.046555519104004, "p90": 12.08749008178711, "max": 17.074790954589844, "pos_frac": 0.734375, "sample": [7.260200500488281, -0.3948822021484375, 3.8805179595947266, 9.341171264648438, 0.8562850952148438, 8.711688995361328, 4.212593078613281, 12.598403930664062, 8.51822280883789, 12.619880676269531, 1.5441780090332031, 12.998138427734375, -10.48883056640625, 9.433158874511719, 11.492668151855469, 3.1109771728515625, 14.545639038085938, 5.923553466796875, 6.56071662902832, 9.857421875, 3.4175338745117188, -1.9126205444335938, 6.73944091796875, 3.42913818359375, 0.6518802642822266, -0.8921833038330078, 5.763126373291016, 3.4458999633789062, 3.6750640869140625, -0.216949462890625, 0.5571823120117188, 17.074790954589844, -4.140705108642578, 9.803813934326172, -2.3027076721191406, 12.089126586914062, 4.9798583984375, 5.034233093261719, -5.240211486816406, 1.6550350189208984, 2.7944488525390625, -2.5223846435546875, 10.679084777832031, 4.704151153564453, 7.955615997314453, -3.282682418823242, 13.616188049316406, 12.083671569824219, 4.5399322509765625, -0.609130859375, 2.971670150756836, -2.430624008178711, 4.217184066772461, 7.937885284423828, -2.433643341064453, -1.6843032836914062, 0.5555343627929688, 5.034587860107422, 8.771684646606445, -4.4160614013671875, -0.8162078857421875, 8.454452514648438, -10.379142761230469, 1.9408416748046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000452.npy"}
{"epoch": 0.6832955404383976, "step": 453, "batch_size": 64, "mean": 4.0383405685424805, "std": 6.156214714050293, "min": -8.87116813659668, "p10": -3.375982284545898, "median": 2.8662967681884766, "p90": 12.505528831481934, "max": 17.142974853515625, "pos_frac": 0.703125, "sample": [6.24969482421875, 8.114753723144531, -1.816375732421875, -0.32558441162109375, 14.678733825683594, 1.279022216796875, 0.6978530883789062, 6.786769866943359, 9.61627197265625, -2.48199462890625, -8.87116813659668, 2.9699935913085938, 8.563697814941406, -1.68426513671875, 10.690910339355469, 9.119644165039062, 12.468276977539062, 1.3724212646484375, 10.912818908691406, 6.872066497802734, 1.7084712982177734, -3.4959564208984375, 0.2606353759765625, 0.3409423828125, 0.49726104736328125, -3.9421005249023438, -4.176139831542969, 12.521493911743164, -2.9005088806152344, 5.955297470092773, 17.142974853515625, 2.0713462829589844, 5.8303375244140625, -7.228279113769531, -0.7007789611816406, 2.429483413696289, 12.139152526855469, 6.633766174316406, -3.670330047607422, 17.08167266845703, 3.5774002075195312, 13.941925048828125, -3.0960426330566406, -0.47879791259765625, -0.84765625, 4.80902099609375, -1.2164287567138672, 0.3142127990722656, 6.1798553466796875, 6.645904541015625, 4.713191986083984, 0.8222503662109375, 2.7625999450683594, 7.460050582885742, 4.1875152587890625, -5.04449462890625, -1.6565895080566406, 13.369781494140625, 10.154712677001953, -0.130218505859375, 11.657844543457031, 10.941442489624023, 13.322868347167969, 2.3511581420898438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000453.npy"}
{"epoch": 0.6848072562358276, "step": 454, "batch_size": 64, "mean": 4.606368541717529, "std": 6.272418975830078, "min": -15.234664916992188, "p10": -2.262660980224609, "median": 3.5461158752441406, "p90": 13.022731399536134, "max": 19.612884521484375, "pos_frac": 0.78125, "sample": [11.849031448364258, -5.522829055786133, -0.8608779907226562, 8.159111022949219, -15.234664916992188, -1.7919425964355469, 7.8301544189453125, 0.5069122314453125, 3.2072715759277344, 2.6463394165039062, 5.605434417724609, 12.248147964477539, 4.567840576171875, -2.9642410278320312, 9.206123352050781, 4.985347747802734, 12.509990692138672, 8.987083435058594, 8.81402587890625, 8.879646301269531, -1.1240615844726562, -6.837028503417969, -1.0360279083251953, 8.742454528808594, 13.869987487792969, 12.46002197265625, -3.906393051147461, 3.1422271728515625, 3.4498748779296875, 0.9448623657226562, 1.1546630859375, 4.460685729980469, 13.242477416992188, -0.2599678039550781, 5.5634613037109375, 1.0367393493652344, 17.03907012939453, -2.464397430419922, 19.612884521484375, 2.423795700073242, 0.6889686584472656, 1.1800384521484375, 1.6760787963867188, 1.5357275009155273, 2.443634033203125, 2.5641098022460938, 13.40085220336914, 8.352203369140625, 1.5526809692382812, 5.915702819824219, 9.050933837890625, 5.0687103271484375, 6.371795654296875, 14.964506149291992, 7.933938980102539, -0.7460403442382812, 1.7646446228027344, 11.997100830078125, 3.6423568725585938, 13.462409973144531, 9.260818481445312, -0.1711578369140625, 1.1728954315185547, -3.4185428619384766], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000454.npy"}
{"epoch": 0.6863189720332578, "step": 455, "batch_size": 64, "mean": 4.611947059631348, "std": 5.7726149559021, "min": -9.124679565429688, "p10": -1.595651626586914, "median": 3.901163101196289, "p90": 11.81742553710938, "max": 20.918472290039062, "pos_frac": 0.8125, "sample": [9.660430908203125, 2.2071685791015625, 5.7868499755859375, 10.780014038085938, 6.508056640625, -1.5566787719726562, 15.215576171875, 15.689037322998047, 0.12615203857421875, 3.2506332397460938, 1.7946052551269531, -0.27027130126953125, 4.7796630859375, 4.093578338623047, 4.3140716552734375, 20.918472290039062, 1.0024032592773438, -2.2275238037109375, 3.0135269165039062, -2.4525833129882812, 13.946861267089844, 2.426898956298828, 6.2745361328125, -0.08796501159667969, -0.17371177673339844, 10.66986083984375, 3.4336166381835938, 10.00469970703125, 5.070915222167969, 1.4796524047851562, 4.659465789794922, 2.2034854888916016, 12.212997436523438, 0.9246597290039062, 4.5180816650390625, 0.34918212890625, -9.124679565429688, 7.837226867675781, 10.841148376464844, 10.894424438476562, -3.3897857666015625, -1.1175460815429688, 5.6298675537109375, -6.407684326171875, 7.159769058227539, 10.57586669921875, 5.693473815917969, 1.9735794067382812, 3.7087478637695312, 1.0005722045898438, 18.90597915649414, 6.614593505859375, -1.6123542785644531, 8.975421905517578, 0.3761749267578125, 1.8076171875, 6.213245391845703, 2.1125411987304688, -1.9936065673828125, 1.6622962951660156, 0.47498130798339844, 5.570217132568359, 5.208850860595703, 15.027267456054688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000455.npy"}
{"epoch": 0.6878306878306878, "step": 456, "batch_size": 64, "mean": 4.128653526306152, "std": 5.489447116851807, "min": -9.743881225585938, "p10": -1.800516891479492, "median": 3.9403886795043945, "p90": 11.146748733520509, "max": 16.63262939453125, "pos_frac": 0.75, "sample": [10.191864013671875, -3.5772171020507812, -0.02381134033203125, 6.1989288330078125, 16.63262939453125, -0.5741767883300781, -1.8691825866699219, 6.666187286376953, -0.7703285217285156, 11.25784683227539, 7.7713775634765625, 4.860481262207031, 9.770858764648438, 14.274253845214844, 5.732246398925781, 0.8942947387695312, -3.9597930908203125, 7.840782165527344, 2.4230728149414062, 6.50811767578125, -3.010345458984375, 2.3720016479492188, 5.155487060546875, 1.091156005859375, 8.381490707397461, 0.6815166473388672, -8.081134796142578, -3.51690673828125, 6.01472282409668, 0.17405319213867188, 2.3537864685058594, 2.1920928955078125, 3.585916519165039, 3.434600830078125, 5.047698974609375, 7.242731094360352, 2.7515792846679688, -1.6402969360351562, 6.782806396484375, 1.347421646118164, -1.274850845336914, 4.409969329833984, 1.3970375061035156, 6.7185821533203125, -0.9416465759277344, -9.743881225585938, 12.017715454101562, 3.4794464111328125, -0.24701690673828125, -1.21466064453125, 10.193367004394531, 15.99002456665039, 0.4902801513671875, 10.887519836425781, 5.3546600341796875, 9.711196899414062, 10.681503295898438, 2.3590774536132812, 5.367729187011719, 14.089500427246094, 5.806684494018555, -0.4121856689453125, 4.29486083984375, 12.2100830078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000456.npy"}
{"epoch": 0.6893424036281179, "step": 457, "batch_size": 64, "mean": 3.6349313259124756, "std": 4.880740642547607, "min": -7.6634521484375, "p10": -2.2231031417846676, "median": 3.4880361557006836, "p90": 10.67347679138184, "max": 14.221565246582031, "pos_frac": 0.734375, "sample": [1.9623565673828125, -3.9666595458984375, 0.2349376678466797, 4.926456451416016, 6.224512100219727, -1.4519462585449219, 5.81414794921875, 0.1015625, -2.3123207092285156, 0.901702880859375, 2.123655319213867, 13.092803955078125, 6.557748794555664, 9.835159301757812, -2.0886287689208984, -2.5492095947265625, 9.87899398803711, 4.5017242431640625, 0.6610145568847656, 8.022415161132812, 11.374198913574219, 4.979001998901367, 14.221565246582031, -0.9432830810546875, 0.2632293701171875, 6.347761154174805, -2.085399627685547, 7.878194808959961, -7.6634521484375, 0.9347267150878906, -2.35107421875, -2.9799880981445312, 1.576324462890625, 7.91900634765625, 4.617977142333984, -1.0633392333984375, -0.6031494140625, 9.520294189453125, 1.0025177001953125, 3.3349533081054688, 8.877342224121094, -2.2807350158691406, 7.903419494628906, 12.205718994140625, -1.5632076263427734, 0.5767974853515625, 9.415512084960938, -0.5669918060302734, 4.547641754150391, 12.111391067504883, 11.013969421386719, 2.6821327209472656, 11.393768310546875, 0.04070281982421875, 5.090000152587891, 7.975008010864258, 4.439186096191406, 3.3375930786132812, 3.638479232788086, -0.048862457275390625, 4.2832794189453125, 3.9087448120117188, 5.215219497680664, -0.31099700927734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000457.npy"}
{"epoch": 0.690854119425548, "step": 458, "batch_size": 64, "mean": 4.668992519378662, "std": 5.97877311706543, "min": -12.4283447265625, "p10": -2.605168342590331, "median": 4.083609580993652, "p90": 12.904219818115235, "max": 16.517353057861328, "pos_frac": 0.78125, "sample": [3.4841270446777344, 7.520271301269531, 0.7871112823486328, 8.22119140625, 4.012548446655273, 9.693767547607422, 3.6969680786132812, -12.4283447265625, -0.6469535827636719, 1.1285591125488281, -1.23602294921875, -3.2204856872558594, -5.056549072265625, 12.354522705078125, 13.451255798339844, 2.2998905181884766, 9.053016662597656, 8.53668212890625, 3.2438201904296875, 5.382293701171875, 3.770956039428711, 5.241386413574219, 5.940910339355469, -1.5503311157226562, 9.30889892578125, 4.154670715332031, 10.763412475585938, 6.246637344360352, 2.9233779907226562, 10.16143798828125, 12.307243347167969, 1.6422996520996094, 2.44744873046875, -4.9998779296875, 2.7175140380859375, -0.4682273864746094, 3.0915756225585938, 4.2664642333984375, 8.80029296875, 16.517353057861328, 5.390464782714844, 2.9240074157714844, -8.738174438476562, -0.08232879638671875, 10.69696044921875, 1.7799930572509766, 4.741416931152344, -0.4300823211669922, 15.49329948425293, 13.432830810546875, -3.5397472381591797, 12.649917602539062, 8.705032348632812, 5.500482559204102, -3.057241439819336, 16.37804412841797, 1.3156585693359375, 13.719818115234375, -1.4260406494140625, 1.7025833129882812, 3.457103729248047, 5.889495849609375, 13.013206481933594, 5.737701416015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000458.npy"}
{"epoch": 0.6923658352229781, "step": 459, "batch_size": 64, "mean": 4.762661457061768, "std": 5.636676788330078, "min": -5.749092102050781, "p10": -2.849330139160156, "median": 4.535423278808594, "p90": 12.80901508331299, "max": 15.486701965332031, "pos_frac": 0.765625, "sample": [4.789314270019531, 8.703109741210938, -5.749092102050781, 5.843536376953125, -2.7002906799316406, -0.112823486328125, 2.833995819091797, 7.52606201171875, 3.3997955322265625, 6.499897003173828, 13.044281005859375, -1.3174171447753906, 9.015308380126953, -3.5614089965820312, 13.52464485168457, 15.135505676269531, 3.9978103637695312, 6.5018310546875, 5.0892486572265625, 5.233205795288086, 6.7341461181640625, 6.1830902099609375, 3.357257843017578, 3.5842361450195312, 1.0878963470458984, 10.254707336425781, 1.263946533203125, 7.915279388427734, 6.984523773193359, 11.065980911254883, 14.526870727539062, 0.14801025390625, -2.4589385986328125, 2.7957611083984375, -0.9411544799804688, 3.1331863403320312, 4.708549499511719, -4.9324798583984375, 10.576059341430664, -2.9132041931152344, 1.0623188018798828, 9.286163330078125, 6.517000198364258, 1.8254508972167969, 11.8980712890625, -0.277679443359375, 9.700874328613281, -0.17185211181640625, -5.5220489501953125, 12.260061264038086, -0.992401123046875, 2.381439208984375, -3.893087387084961, 10.603195190429688, 4.362297058105469, -3.1470489501953125, 5.945323944091797, 2.7825927734375, 14.514923095703125, 11.280929565429688, 14.399246215820312, 0.4596710205078125, 15.486701965332031, 3.2779769897460938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000459.npy"}
{"epoch": 0.6938775510204082, "step": 460, "batch_size": 64, "mean": 5.358639717102051, "std": 5.991425514221191, "min": -4.0963287353515625, "p10": -1.3746665954589838, "median": 3.8542137145996094, "p90": 14.428275108337404, "max": 17.266700744628906, "pos_frac": 0.78125, "sample": [12.698928833007812, 10.107799530029297, 17.266700744628906, -0.1618804931640625, 2.4053955078125, 13.335914611816406, 13.978164672851562, 3.029388427734375, 13.685653686523438, -0.859039306640625, -0.14090728759765625, 3.815013885498047, 0.43325042724609375, 4.047615051269531, 0.3352813720703125, 2.5629043579101562, 3.893413543701172, 7.554161071777344, -1.9825897216796875, 16.738161087036133, 1.164693832397461, 16.879837036132812, 5.893592834472656, 8.869071960449219, 15.791007995605469, 8.652225494384766, 4.105537414550781, 3.5414581298828125, 10.189308166503906, 3.0635852813720703, 4.089019775390625, 12.999244689941406, 9.321403503417969, 3.4269256591796875, 0.15728759765625, 5.263824462890625, 4.5500335693359375, 1.8439407348632812, 1.564361572265625, 2.5694713592529297, 14.621179580688477, 9.420585632324219, -0.39588165283203125, -0.010433197021484375, 10.187145233154297, 14.825599670410156, -3.8670654296875, -3.86151123046875, 4.9566192626953125, -4.0963287353515625, -1.5956497192382812, 1.835906982421875, 11.15423583984375, 1.4092769622802734, -0.36240386962890625, 7.31585693359375, 1.2956390380859375, 13.885101318359375, -3.5576515197753906, -2.472381591796875, 7.06671142578125, 3.5164337158203125, -0.2429351806640625, 15.245716094970703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000460.npy"}
{"epoch": 0.6953892668178382, "step": 461, "batch_size": 64, "mean": 4.7339019775390625, "std": 5.904404640197754, "min": -7.9907379150390625, "p10": -2.262007141113281, "median": 4.268786430358887, "p90": 12.96729965209961, "max": 18.615005493164062, "pos_frac": 0.75, "sample": [7.952995300292969, 6.1832733154296875, 3.1194610595703125, 4.299299240112305, 13.984413146972656, 8.967544555664062, 5.939125061035156, 7.40544319152832, 9.304031372070312, -2.1068153381347656, -7.9907379150390625, 8.627395629882812, 6.827972412109375, 2.6943721771240234, 7.304338455200195, -0.709381103515625, -0.0882568359375, 12.912481307983398, 1.080810546875, 3.0824947357177734, 4.838550567626953, 11.61129379272461, 7.695274353027344, 3.7917098999023438, 2.213014602661133, 10.45048713684082, -4.572731018066406, 0.3680095672607422, -3.0041580200195312, 2.357166290283203, 7.053779602050781, -2.0451316833496094, -6.535888671875, 4.536325454711914, 4.985809326171875, 7.472747802734375, 13.712249755859375, 2.7005386352539062, 2.398681640625, 16.851348876953125, 8.275604248046875, 12.990793228149414, 9.142646789550781, 3.498016357421875, 1.4073200225830078, -1.7290992736816406, 3.1552047729492188, -1.8230705261230469, 8.767068862915039, 4.238273620605469, 11.099395751953125, 7.569887161254883, -0.030513763427734375, 1.9272880554199219, 12.777137756347656, -1.3145065307617188, 15.774276733398438, -2.7858200073242188, -1.6794719696044922, -3.698863983154297, 13.041755676269531, -2.3285179138183594, 18.615005493164062, 0.41056060791015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000461.npy"}
{"epoch": 0.6969009826152683, "step": 462, "batch_size": 64, "mean": 5.382818222045898, "std": 5.679330348968506, "min": -8.887876510620117, "p10": -1.076251029968261, "median": 4.832061767578125, "p90": 12.914230728149414, "max": 21.68585205078125, "pos_frac": 0.859375, "sample": [13.364545822143555, 13.982837677001953, 13.594799041748047, 9.046333312988281, -0.3439178466796875, 4.5431976318359375, 6.030143737792969, 3.5123825073242188, 0.03990936279296875, 0.9715232849121094, -1.860870361328125, 15.588653564453125, 11.378608703613281, 3.8145828247070312, 6.5252685546875, 12.98543930053711, 0.7203369140625, 8.692855834960938, 2.0500106811523438, 7.314788818359375, 4.179058074951172, 4.534904479980469, 12.748077392578125, 5.1209259033203125, 7.26495361328125, 0.8327217102050781, 8.409677505493164, 8.664093017578125, 0.37079429626464844, 8.080570220947266, 0.64764404296875, -2.1517562866210938, 6.565101623535156, 7.768415451049805, 13.395179748535156, -6.156890869140625, -1.3901081085205078, 7.749603271484375, 6.591035842895508, 3.4543838500976562, 8.2574462890625, 9.605636596679688, 7.835216522216797, 2.346160888671875, 4.3017425537109375, 10.577106475830078, 4.085578918457031, 4.340389251708984, 11.726146697998047, 7.695568084716797, 2.7684326171875, -8.263389587402344, 10.492774963378906, 9.81639289855957, 8.934410095214844, 1.6724014282226562, 2.640878677368164, 21.68585205078125, 4.335350036621094, 0.6266384124755859, -8.887876510620117, -2.1317176818847656, -0.12251472473144531, 1.5319442749023438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000462.npy"}
{"epoch": 0.6984126984126984, "step": 463, "batch_size": 64, "mean": 4.03656005859375, "std": 4.984346866607666, "min": -5.584228515625, "p10": -1.5966873168945308, "median": 3.1948184967041016, "p90": 10.893154907226563, "max": 15.739677429199219, "pos_frac": 0.75, "sample": [-1.0365180969238281, 7.776435852050781, 7.763164520263672, -5.584228515625, 8.562814712524414, 7.201360702514648, -0.5957469940185547, 14.821590423583984, 8.246253967285156, 6.890308380126953, -1.1907844543457031, 2.4294891357421875, 2.0151615142822266, 11.0499267578125, 6.311485290527344, -0.4069671630859375, -0.9932632446289062, 2.649139404296875, -0.41977691650390625, 0.5494461059570312, -1.2713260650634766, 0.2845458984375, -0.1134033203125, 12.224700927734375, 7.366481781005859, 8.048843383789062, 2.9354248046875, 6.938819885253906, -4.099948883056641, -0.8373641967773438, 3.3700408935546875, 1.2873687744140625, 10.956802368164062, 12.115528106689453, 1.8795280456542969, 8.74090576171875, 4.1913299560546875, 3.0552749633789062, 4.412879943847656, 12.63238525390625, 2.9566612243652344, 1.7368621826171875, 1.00494384765625, 0.748870849609375, -2.0305843353271484, -3.1562881469726562, 15.739677429199219, 1.7031021118164062, 10.744644165039062, 9.071792602539062, 4.722869873046875, -3.52386474609375, 3.334362030029297, -1.7361278533935547, 1.3144683837890625, 7.7900238037109375, 10.055145263671875, 6.4375457763671875, 7.318611145019531, 4.161827087402344, -4.105690002441406, 4.1854248046875, 0.11344528198242188, 9.59402847290039], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000463.npy"}
{"epoch": 0.6999244142101285, "step": 464, "batch_size": 64, "mean": 4.018474102020264, "std": 6.304259777069092, "min": -11.422988891601562, "p10": -3.8299484252929688, "median": 3.304058074951172, "p90": 12.355094909667969, "max": 16.497230529785156, "pos_frac": 0.765625, "sample": [-2.529693603515625, 2.6528091430664062, 5.470298767089844, -5.922607421875, 11.212251663208008, -1.158721923828125, -4.703998565673828, 12.430522918701172, 7.638008117675781, 3.753173828125, 9.724105834960938, -2.3703231811523438, 13.27484130859375, 4.758110046386719, 3.124542236328125, -4.235748291015625, 7.814453125, 2.107044219970703, 3.1443099975585938, 7.321258544921875, 3.6581802368164062, 13.853073120117188, -3.871845245361328, 6.959516525268555, 0.7535438537597656, 9.578948974609375, 2.238983154296875, 11.63204574584961, 7.772060394287109, 5.859245300292969, 1.6543426513671875, 0.81494140625, 11.890369415283203, 14.0382080078125, -11.422988891601562, 3.1118011474609375, 12.179096221923828, 5.902656555175781, -6.001258850097656, -3.732189178466797, 12.986785888671875, 1.9014129638671875, 2.961576461791992, 12.019569396972656, 0.444732666015625, 0.6967239379882812, 1.3577041625976562, 1.6827116012573242, -1.9762802124023438, 11.591522216796875, 4.845813751220703, 11.707061767578125, -10.482826232910156, -2.387216567993164, 1.9869613647460938, -2.872800827026367, 3.46380615234375, 13.441877365112305, 3.8156776428222656, 16.497230529785156, -2.5209007263183594, 7.836528778076172, 7.0997314453125, 0.711578369140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000464.npy"}
{"epoch": 0.7014361300075586, "step": 465, "batch_size": 64, "mean": 4.356546401977539, "std": 6.035623550415039, "min": -6.633258819580078, "p10": -4.381014251708984, "median": 3.63912296295166, "p90": 12.524020385742187, "max": 15.717453002929688, "pos_frac": 0.71875, "sample": [-5.057403564453125, 9.965667724609375, 5.019325256347656, -0.611602783203125, 6.65423583984375, 8.499893188476562, -0.8065261840820312, 15.717453002929688, -1.0074462890625, 9.814811706542969, 3.0633468627929688, 11.895431518554688, 9.825891494750977, 14.41845703125, -4.68830680847168, 12.430335998535156, -4.434661865234375, 11.85761833190918, 9.22869873046875, 2.6503677368164062, 1.2104644775390625, -5.6097564697265625, 6.974119186401367, 12.528717041015625, 10.262893676757812, 12.905902862548828, -0.8796863555908203, -4.452033996582031, -0.92181396484375, 2.3809776306152344, 1.8699417114257812, -4.255836486816406, 1.042144775390625, 8.517303466796875, 6.017181396484375, 4.4274139404296875, -2.185365676879883, -1.006866455078125, -1.9133739471435547, 5.554941177368164, -6.611362457275391, 1.889434814453125, 3.6903820037841797, 5.935050964355469, -1.6093521118164062, -6.633258819580078, 0.7586250305175781, 3.626932144165039, 6.742607116699219, 14.945873260498047, 1.4269027709960938, 12.5130615234375, 3.2242279052734375, 13.569385528564453, 3.6513137817382812, -0.9073696136474609, 2.420989990234375, 2.4708919525146484, 4.8146514892578125, 7.522300720214844, 14.930633544921875, 3.458749771118164, 9.955299377441406, 10.130142211914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000465.npy"}
{"epoch": 0.7029478458049887, "step": 466, "batch_size": 64, "mean": 2.915306568145752, "std": 4.852566719055176, "min": -7.113864898681641, "p10": -2.631377029418945, "median": 2.75032901763916, "p90": 8.650850677490237, "max": 15.252853393554688, "pos_frac": 0.75, "sample": [-1.2360687255859375, 6.982692718505859, -5.271049499511719, -4.019012451171875, 3.493867874145508, 0.3017406463623047, 0.00496673583984375, 0.12738037109375, 1.2500534057617188, -6.673919677734375, 3.6063613891601562, 5.1871795654296875, 10.945831298828125, -3.093463897705078, 3.3089675903320312, -2.494415283203125, 4.112892150878906, 14.00177001953125, 4.9789886474609375, 13.266342163085938, 6.902667999267578, 4.766218185424805, 11.046915054321289, -0.49196624755859375, 3.798004150390625, 1.9182205200195312, 7.028663635253906, -1.7689666748046875, 15.252853393554688, 3.0344390869140625, 0.9678802490234375, -0.2879180908203125, -2.2154312133789062, 2.83807373046875, 6.664684295654297, 4.390205383300781, 8.84649658203125, 2.8963775634765625, -2.690074920654297, 2.2525100708007812, 0.5867576599121094, 0.82659912109375, -0.5023517608642578, -7.113864898681641, 7.1997833251953125, 0.13964080810546875, -0.7139129638671875, 6.399627685546875, 14.012130737304688, 1.3261222839355469, -4.718269348144531, 7.42364501953125, -0.653900146484375, 6.3023529052734375, 2.8352298736572266, 0.5273303985595703, 3.6548709869384766, 1.175252914428711, 1.5555286407470703, 5.727327346801758, 8.194343566894531, 4.656772613525391, 2.6654281616210938, 1.1422195434570312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000466.npy"}
{"epoch": 0.7044595616024187, "step": 467, "batch_size": 64, "mean": 4.520857334136963, "std": 5.5244293212890625, "min": -6.894712448120117, "p10": -1.6275142669677733, "median": 4.529502868652344, "p90": 12.858882713317872, "max": 19.758148193359375, "pos_frac": 0.78125, "sample": [0.9970703125, 15.31894302368164, 5.134986877441406, 7.579933166503906, -0.19910430908203125, 1.5976543426513672, -1.551483154296875, -1.3838024139404297, 6.14764404296875, 6.172767639160156, 6.140617370605469, -3.654399871826172, 10.789474487304688, -2.4178733825683594, 8.86590576171875, 12.954221725463867, 0.3875617980957031, 3.02825927734375, 0.1522674560546875, 2.095325469970703, 8.26898193359375, -3.924957275390625, 0.29715728759765625, 14.926551818847656, 12.97198486328125, 0.36016082763671875, 8.706634521484375, 6.382179260253906, -6.894712448120117, 7.683013916015625, -1.9390792846679688, 10.454116821289062, 1.7365856170654297, 3.188232421875, -1.6600990295410156, 3.93914794921875, 6.7649383544921875, 9.642715454101562, 4.50634765625, 5.153778076171875, -0.33127784729003906, 0.3992328643798828, -0.1365814208984375, 4.965734481811523, 1.9188785552978516, 0.2180633544921875, -1.1636505126953125, -0.4414939880371094, 6.724897384643555, 12.636425018310547, 19.758148193359375, 3.805917739868164, 5.250553131103516, 0.7665538787841797, 5.291814804077148, 4.719844818115234, -4.229515075683594, 14.241905212402344, 2.4127025604248047, 15.152938842773438, 4.5526580810546875, 7.3899383544921875, 10.311763763427734, 6.399772644042969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000467.npy"}
{"epoch": 0.7059712773998488, "step": 468, "batch_size": 64, "mean": 5.825956344604492, "std": 6.266327857971191, "min": -5.227512359619141, "p10": -1.3483221054077146, "median": 4.914005279541016, "p90": 14.658386993408207, "max": 22.64224624633789, "pos_frac": 0.828125, "sample": [6.537408828735352, 0.21155929565429688, 6.595624923706055, 16.127609252929688, 5.9624481201171875, -1.1713581085205078, 0.3392524719238281, 9.99615478515625, 5.054290771484375, 4.0911407470703125, 8.84058952331543, 16.196632385253906, 10.098922729492188, -3.3238067626953125, 4.773719787597656, 8.85455322265625, -0.3751220703125, -3.970947265625, -1.424163818359375, 7.168096542358398, 13.831703186035156, -0.2523040771484375, 9.097618103027344, 15.012680053710938, 1.5913581848144531, 2.300191879272461, 16.751129150390625, 2.7959117889404297, -3.8739242553710938, -5.226844787597656, 13.46750259399414, 0.6353759765625, -0.7646484375, -5.227512359619141, 3.2831039428710938, -2.4564208984375, 7.859649658203125, 7.695411682128906, 0.1243896484375, 11.643768310546875, 2.654998779296875, 2.54388427734375, 9.49765396118164, 13.7989501953125, 7.428199768066406, 3.505718231201172, 3.289213180541992, 7.0654449462890625, 2.945768356323242, 3.3534183502197266, 15.403949737548828, 11.056259155273438, 8.411235809326172, 3.3445281982421875, 19.072601318359375, 2.9182281494140625, 0.6707229614257812, 22.64224624633789, 8.965110778808594, 10.61594009399414, 12.832321166992188, 9.113945007324219, 1.9235610961914062, 0.9365463256835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000468.npy"}
{"epoch": 0.7074829931972789, "step": 469, "batch_size": 64, "mean": 4.6940531730651855, "std": 5.954168319702148, "min": -6.957178115844727, "p10": -1.4564138412475582, "median": 3.445831298828125, "p90": 12.084004211425782, "max": 20.546688079833984, "pos_frac": 0.75, "sample": [3.1956634521484375, 9.090171813964844, 7.5551910400390625, 5.700347900390625, 2.3013763427734375, 1.7797164916992188, 2.329803466796875, 3.554811477661133, 8.813556671142578, -3.2196578979492188, 11.928604125976562, 8.000083923339844, 7.3732452392578125, -3.2786178588867188, 0.7437591552734375, 10.474493026733398, 3.425048828125, 2.2796707153320312, 7.991674423217773, 13.058082580566406, 12.662773132324219, 2.6769561767578125, 1.2282943725585938, -1.137664794921875, 8.117630004882812, -3.8326644897460938, 2.9696273803710938, 1.5085601806640625, -1.5578250885009766, 2.7203636169433594, -3.3324508666992188, -0.04450416564941406, 9.830078125, 20.546688079833984, 11.661310195922852, -1.21978759765625, -0.030426025390625, -0.6635932922363281, 11.670602798461914, 17.897085189819336, 10.763885498046875, 6.645149230957031, 3.851259231567383, 5.366752624511719, 0.05987548828125, -0.8840789794921875, 3.974456787109375, -0.4838066101074219, 19.38115119934082, 4.426094055175781, 2.0584030151367188, 0.3179054260253906, 18.068416595458984, 0.32315826416015625, 12.150604248046875, -3.1404190063476562, 6.553274154663086, 3.46661376953125, 6.139789581298828, -6.957178115844727, 6.226753234863281, -0.3431110382080078, -0.5955657958984375, 8.281929016113281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000469.npy"}
{"epoch": 0.708994708994709, "step": 470, "batch_size": 64, "mean": 4.441651344299316, "std": 5.567893981933594, "min": -14.990276336669922, "p10": -1.7034170150756835, "median": 4.409173965454102, "p90": 11.343154907226564, "max": 15.551006317138672, "pos_frac": 0.78125, "sample": [10.175392150878906, 6.6824951171875, 15.551006317138672, -14.990276336669922, 2.5729522705078125, 11.525390625, 9.982284545898438, 4.663978576660156, 10.341171264648438, 8.276382446289062, 8.106163024902344, -4.773258209228516, 11.809173583984375, 4.342048645019531, 9.030403137207031, 3.0258026123046875, 10.917938232421875, 0.06799507141113281, -0.40142059326171875, 5.931404113769531, -0.2750358581542969, 6.9582366943359375, 2.7949981689453125, -4.507965087890625, 9.585594177246094, 3.202730178833008, 14.203033447265625, 5.7706146240234375, 1.6878890991210938, -1.0243110656738281, 6.520978927612305, 0.34908103942871094, 8.261268615722656, 1.6766204833984375, -1.6356143951416016, 0.1475677490234375, 2.991504669189453, 0.6581649780273438, 5.514122009277344, -4.0352783203125, 4.1839599609375, 9.000885009765625, 10.609725952148438, 1.6031265258789062, -1.7324752807617188, -2.6144638061523438, 4.944169998168945, 7.9503021240234375, 7.88427734375, 12.832374572753906, 0.69317626953125, 8.935028076171875, -2.7656707763671875, 4.476299285888672, 1.7087478637695312, 7.510429382324219, 2.1568984985351562, 12.567794799804688, -0.45354461669921875, 3.839437484741211, 13.769519805908203, -0.41475486755371094, 6.371952056884766, -0.4727611541748047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000470.npy"}
{"epoch": 0.7105064247921391, "step": 471, "batch_size": 64, "mean": 5.427962303161621, "std": 6.322539806365967, "min": -14.964004516601562, "p10": -1.9397811889648433, "median": 5.571990966796875, "p90": 14.860739517211915, "max": 16.388351440429688, "pos_frac": 0.828125, "sample": [0.9228515625, 11.986968994140625, 10.229387283325195, 8.344396591186523, 2.186737060546875, -1.5168609619140625, 9.608680725097656, 6.29449462890625, 15.231986999511719, 12.094749450683594, 7.955467224121094, 2.8527679443359375, 15.3902587890625, 0.11837005615234375, -5.674457550048828, -0.4329833984375, 4.266130447387695, 15.021476745605469, 14.721294403076172, -2.12103271484375, 2.5156383514404297, 8.654277801513672, 8.083198547363281, 6.7230987548828125, 9.209869384765625, -14.964004516601562, 7.203069686889648, 4.87481689453125, 16.15896224975586, 0.4406852722167969, 7.862934112548828, 3.905078887939453, 0.735748291015625, 2.5739288330078125, 2.9565696716308594, 4.559455871582031, 12.000892639160156, -0.011505126953125, 2.5324745178222656, 15.029708862304688, 6.2691650390625, -2.6771240234375, 14.153177261352539, 11.339950561523438, 0.8455524444580078, 0.14580535888671875, 9.23486328125, 8.130104064941406, -5.903923034667969, 9.388076782226562, -4.1613616943359375, 11.882667541503906, 1.7828598022460938, 1.6749496459960938, 6.933647155761719, -0.07177352905273438, 6.929840087890625, 7.021760940551758, 2.3152923583984375, 16.388351440429688, 2.9999313354492188, 1.7053985595703125, -2.3836898803710938, 14.920501708984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000471.npy"}
{"epoch": 0.7120181405895691, "step": 472, "batch_size": 64, "mean": 4.831416606903076, "std": 6.580316543579102, "min": -10.081779479980469, "p10": -3.9118667602539063, "median": 4.539205551147461, "p90": 14.015734291076662, "max": 19.370758056640625, "pos_frac": 0.78125, "sample": [3.1604461669921875, 4.147773742675781, 3.09625244140625, 5.97900390625, 12.244010925292969, 8.010982513427734, 0.8732757568359375, 10.991195678710938, 11.304367065429688, 14.241226196289062, 4.036701202392578, 19.370758056640625, 5.08057975769043, -3.1957244873046875, 1.5007953643798828, 14.193105697631836, 2.5341930389404297, 6.467041015625, 19.057926177978516, 3.228973388671875, -5.32720947265625, 5.203590393066406, -3.8208580017089844, 9.440185546875, -1.716400146484375, 6.440010070800781, 2.3062896728515625, 14.56094741821289, 16.31639862060547, -2.622617721557617, 5.233489990234375, 6.899162292480469, 2.463043212890625, -7.241462707519531, -3.9508705139160156, 8.214736938476562, 9.249374389648438, 11.640838623046875, 1.3678398132324219, 3.1207427978515625, 9.493814468383789, 16.15245819091797, -10.081779479980469, 13.60186767578125, -7.3687286376953125, -6.130977630615234, 7.897258758544922, 1.688608169555664, -5.3863677978515625, 6.543853759765625, 2.7749156951904297, 2.224264144897461, 5.6280517578125, 0.7890090942382812, 3.601043701171875, -0.28199005126953125, 6.337371826171875, 5.256885528564453, 12.31373405456543, -0.5720291137695312, 4.930637359619141, 3.7071762084960938, -0.15406036376953125, 12.145519256591797], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000472.npy"}
{"epoch": 0.7135298563869993, "step": 473, "batch_size": 64, "mean": 4.822985649108887, "std": 7.162439346313477, "min": -8.889141082763672, "p10": -4.530198097229004, "median": 3.662750244140625, "p90": 14.938375854492188, "max": 22.10455322265625, "pos_frac": 0.75, "sample": [14.819580078125, 13.834953308105469, 0.7900676727294922, -5.100141525268555, -4.967809677124023, 17.142425537109375, 2.4061737060546875, 6.86883544921875, 2.389036178588867, 13.166954040527344, 14.989288330078125, 0.446380615234375, -1.4650115966796875, 4.7413177490234375, 2.8204498291015625, -5.22442626953125, 17.749771118164062, 13.32147216796875, 5.794410705566406, 11.59844970703125, -8.889141082763672, -2.7825469970703125, 8.3121337890625, -3.9480361938476562, -3.2081298828125, 8.776893615722656, 6.446941375732422, 7.21893310546875, 2.593912124633789, 1.0393905639648438, 3.783966064453125, -0.0003509521484375, -6.449899673461914, -3.7596282958984375, -4.327169418334961, 11.314641952514648, 22.10455322265625, 16.538108825683594, 3.541534423828125, 0.7607383728027344, 11.98199462890625, 6.044803619384766, 9.22357177734375, 15.064449310302734, 2.581218719482422, 1.6250419616699219, 2.130565643310547, 0.47188568115234375, 5.627647399902344, 9.612092971801758, 6.433101654052734, 1.9325332641601562, 10.354410171508789, 4.207611083984375, -0.6305255889892578, 6.197826385498047, 14.389442443847656, -4.617210388183594, -0.5160751342773438, 2.0591354370117188, 1.79241943359375, 16.883228302001953, 8.241336822509766, -7.608440399169922], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000473.npy"}
{"epoch": 0.7150415721844293, "step": 474, "batch_size": 64, "mean": 4.1439433097839355, "std": 6.445921421051025, "min": -8.026054382324219, "p10": -4.326510047912596, "median": 3.056820869445801, "p90": 13.268001556396488, "max": 16.506927490234375, "pos_frac": 0.703125, "sample": [14.251617431640625, 7.3963470458984375, 7.244945526123047, -5.171257019042969, -2.3818931579589844, -1.2883262634277344, 10.452560424804688, 15.288009643554688, 7.4601898193359375, -2.5358963012695312, 0.56207275390625, 6.6449127197265625, -7.737361907958984, 9.460441589355469, 5.388069152832031, 13.693225860595703, 2.637481689453125, 1.2528095245361328, -3.0822906494140625, 6.717315673828125, -4.859746932983398, -2.0584468841552734, 11.516033172607422, 6.002847671508789, 9.967300415039062, 1.4766559600830078, -0.42811012268066406, -6.5230865478515625, 1.0625686645507812, -0.9038848876953125, 11.043148040771484, 3.628101348876953, -6.125804901123047, 1.7219314575195312, -2.524852752685547, 12.299274444580078, 0.42504119873046875, 12.390945434570312, 9.255989074707031, -1.519378662109375, 1.9172344207763672, 2.8649749755859375, 11.483383178710938, 6.493309020996094, -1.0950202941894531, 16.506927490234375, 10.09124755859375, 2.2453155517578125, 11.450469970703125, -1.5007190704345703, 13.800031661987305, 13.643882751464844, 1.9357070922851562, 4.4347076416015625, 3.4722137451171875, -5.243961334228516, 13.856277465820312, -2.996307373046875, 3.248666763305664, 8.247833251953125, 10.952823638916016, -8.026054382324219, 2.5071945190429688, 2.822742462158203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000474.npy"}
{"epoch": 0.7165532879818595, "step": 475, "batch_size": 64, "mean": 6.32227897644043, "std": 6.047003746032715, "min": -4.057441711425781, "p10": -1.1419540405273436, "median": 5.780312538146973, "p90": 15.54647789001465, "max": 19.65393829345703, "pos_frac": 0.84375, "sample": [0.37847900390625, 4.550159454345703, 7.290809631347656, 3.9081573486328125, 9.779153823852539, 5.076530456542969, 11.629915237426758, 15.647525787353516, 4.748987197875977, -1.2126922607421875, 5.914360046386719, 0.281005859375, 13.484146118164062, 16.025970458984375, 3.7709827423095703, 6.150909423828125, 8.582494735717773, 6.302150726318359, 3.764516830444336, 9.557785034179688, 2.8033523559570312, 2.4061279296875, 7.387613296508789, 2.442779541015625, 5.444004058837891, 2.5133514404296875, 1.0779037475585938, 13.88214111328125, 5.705787658691406, -4.057441711425781, 17.81647491455078, 15.949420928955078, 13.804229736328125, 19.65393829345703, 4.184814453125, -0.679290771484375, 6.138757705688477, -0.3380546569824219, 5.8838958740234375, -2.1117095947265625, 13.819244384765625, 9.94171142578125, 1.1453609466552734, 2.0850372314453125, 6.270448684692383, -3.532562255859375, -1.948232650756836, 5.444841384887695, 15.310699462890625, 1.0765457153320312, 17.547420501708984, 19.446136474609375, 9.6568603515625, 8.582469940185547, 8.910642623901367, -0.976898193359375, -3.8413238525390625, 9.622772216796875, 10.008026123046875, 10.308708190917969, 3.917285919189453, 4.043891906738281, -3.6074981689453125, 5.854837417602539], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000475.npy"}
{"epoch": 0.7180650037792895, "step": 476, "batch_size": 64, "mean": 3.802440643310547, "std": 6.6882710456848145, "min": -8.420951843261719, "p10": -3.7603057861328124, "median": 3.5019378662109375, "p90": 12.58026180267334, "max": 20.37548828125, "pos_frac": 0.6875, "sample": [12.56500244140625, 7.834465026855469, -3.615957260131836, 5.447959899902344, -6.00604248046875, 5.6396331787109375, -2.8700103759765625, 14.158975601196289, 20.37548828125, 2.5655364990234375, 13.890594482421875, -0.2207050323486328, 18.21971321105957, 8.805953979492188, 9.600330352783203, 3.7554931640625, -1.20758056640625, 2.6601181030273438, 5.716850280761719, 4.405670166015625, -3.367767333984375, 0.330078125, 7.95527458190918, 7.400932312011719, 5.0371856689453125, -8.140768051147461, -2.4363784790039062, 2.168987274169922, 0.5966567993164062, 1.7355079650878906, 17.985794067382812, -8.420951843261719, 5.1757965087890625, -0.8161659240722656, 6.049190521240234, -5.339790344238281, -3.7016448974609375, -8.121307373046875, 8.436614990234375, 1.0886497497558594, -3.7854461669921875, 5.80206298828125, 3.827880859375, 1.0010986328125, 7.851036071777344, 3.204498291015625, 3.248382568359375, 0.6936454772949219, -1.4446563720703125, 11.518547058105469, 4.27685546875, -7.138580322265625, 9.20306396484375, -0.6521453857421875, 8.848922729492188, 16.468994140625, -0.9777641296386719, 2.2758407592773438, 9.964862823486328, 6.99786376953125, -2.6544017791748047, 12.586801528930664, 8.714630126953125, -1.813161849975586], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000476.npy"}
{"epoch": 0.7195767195767195, "step": 477, "batch_size": 64, "mean": 3.9657044410705566, "std": 7.173821449279785, "min": -12.37750244140625, "p10": -6.444756698608399, "median": 4.354369163513184, "p90": 12.353472137451172, "max": 18.940040588378906, "pos_frac": 0.75, "sample": [-6.7884368896484375, -3.3530120849609375, 9.700000762939453, 1.4771957397460938, 10.279067993164062, 6.936946868896484, 1.342559814453125, 1.0528812408447266, 0.127166748046875, -1.5389137268066406, 12.258308410644531, 5.584239959716797, -10.970426559448242, 2.446033477783203, 5.410055160522461, 1.813009262084961, -2.2453231811523438, 13.612701416015625, 6.4118804931640625, -2.142507553100586, 8.67999267578125, 13.596799850463867, 5.5986480712890625, 6.623603820800781, 10.55626106262207, -1.2541046142578125, -6.481239318847656, 2.8214035034179688, 10.705352783203125, 2.5846500396728516, -8.418228149414062, 1.0977401733398438, 5.108577728271484, 0.18854522705078125, -11.88461685180664, -1.2494697570800781, -8.079727172851562, 2.8625640869140625, 4.775299072265625, 3.9562606811523438, 16.35498809814453, 0.13018417358398438, 10.922752380371094, 18.940040588378906, 4.588468551635742, 14.952781677246094, -6.359630584716797, 8.697357177734375, 9.935165405273438, -4.4772796630859375, 10.954864501953125, 2.8477420806884766, 5.550004959106445, 9.811851501464844, 15.120941162109375, 8.887847900390625, -12.37750244140625, 4.120269775390625, 9.291786193847656, 12.394256591796875, 0.9353828430175781, 10.899011611938477, -2.2736053466796875, 10.755672454833984], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000477.npy"}
{"epoch": 0.7210884353741497, "step": 478, "batch_size": 64, "mean": 4.023830890655518, "std": 6.533648490905762, "min": -7.245323181152344, "p10": -3.4096769332885737, "median": 3.354389190673828, "p90": 14.40032024383545, "max": 18.649656295776367, "pos_frac": 0.703125, "sample": [-0.26556968688964844, 18.649656295776367, 1.1756134033203125, 9.095882415771484, 5.88365364074707, -1.5395889282226562, 15.140159606933594, 8.631729125976562, -0.9417953491210938, 8.787185668945312, -1.418548583984375, 0.44976043701171875, 5.773124694824219, 15.544761657714844, -3.6326828002929688, 2.2927284240722656, 0.1248321533203125, 3.34637451171875, -1.93511962890625, 6.952781677246094, -2.347665786743164, -2.8893299102783203, 7.006229400634766, 5.871040344238281, 9.620128631591797, -6.146186828613281, 5.5211944580078125, 8.231893539428711, 9.250984191894531, -6.6043853759765625, 7.650249481201172, 0.27105140686035156, -7.245323181152344, -7.242729187011719, 0.5767021179199219, 14.323959350585938, 12.926109313964844, -0.959503173828125, 8.748634338378906, 3.3624038696289062, 14.433046340942383, -0.8473434448242188, -6.457054138183594, 3.9109363555908203, 17.691162109375, 3.9002456665039062, 0.4531097412109375, 13.341751098632812, 3.0677566528320312, -1.4784164428710938, 5.095115661621094, 9.694555282592773, -1.452728271484375, 6.74125862121582, 0.9330520629882812, 14.8670654296875, 14.57205581665039, -1.99676513671875, 2.1679153442382812, 5.0582733154296875, 0.470123291015625, -5.734708786010742, 3.9945449829101562, 3.0598297119140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000478.npy"}
{"epoch": 0.7226001511715797, "step": 479, "batch_size": 64, "mean": 3.858086585998535, "std": 6.76672887802124, "min": -18.79351806640625, "p10": -3.26920051574707, "median": 3.819939613342285, "p90": 11.956349182128907, "max": 18.998493194580078, "pos_frac": 0.71875, "sample": [6.024204254150391, 12.763980865478516, -1.029510498046875, -2.8951759338378906, -1.2795696258544922, 3.685628890991211, 18.998493194580078, 4.722646713256836, 11.441898345947266, 0.466705322265625, 5.641651153564453, -5.167285919189453, -0.82281494140625, -0.9745597839355469, 0.36383819580078125, 7.18212890625, 12.081314086914062, 10.554786682128906, -0.0388946533203125, 1.5002307891845703, -1.6480712890625, 3.130901336669922, 11.664764404296875, 9.379497528076172, 3.3953189849853516, -7.212490081787109, 4.243595123291016, 1.1783294677734375, -5.3772125244140625, 1.73870849609375, 7.198600769042969, 7.193098068237305, 0.9905281066894531, 14.512115478515625, 10.562667846679688, 0.8000698089599609, -9.774099349975586, -2.3150405883789062, 4.311725616455078, 8.34310531616211, 18.021087646484375, -18.79351806640625, -3.4294967651367188, 2.643270492553711, 9.11435317993164, 18.03146743774414, 6.259834289550781, -6.51690673828125, 2.6415939331054688, 3.374359130859375, 5.107666015625, 9.713508605957031, -1.0529556274414062, 12.569488525390625, 1.220041275024414, 5.3990478515625, 8.382007598876953, 4.33241081237793, 3.9542503356933594, 7.205028533935547, -1.282012939453125, -1.5185546875, 7.068058013916016, 8.937704086303711], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000479.npy"}
{"epoch": 0.7241118669690099, "step": 480, "batch_size": 64, "mean": 4.419934272766113, "std": 7.053965091705322, "min": -17.13833999633789, "p10": -4.562220764160156, "median": 4.918939590454102, "p90": 12.883065414428712, "max": 19.808486938476562, "pos_frac": 0.765625, "sample": [4.980892181396484, -5.74322509765625, 8.900238037109375, -4.353126525878906, 6.545566558837891, 6.225744247436523, 7.559144973754883, -17.13833999633789, 2.5824241638183594, 19.808486938476562, 0.86041259765625, 3.3592185974121094, 9.644706726074219, -2.2686996459960938, 5.612892150878906, -1.5343475341796875, -0.457672119140625, 5.322395324707031, -3.313995361328125, 2.2281761169433594, -7.517696380615234, 12.356010437011719, 5.9920501708984375, -5.514152526855469, 12.473365783691406, 13.163948059082031, 16.421890258789062, 7.33367919921875, 17.930313110351562, 4.3410186767578125, 7.336688995361328, 0.5067253112792969, 3.6851806640625, 4.856986999511719, 7.068878173828125, 4.996675491333008, 10.190010070800781, 5.9850311279296875, -14.132743835449219, 5.024871826171875, 16.090866088867188, 8.380607604980469, 6.059795379638672, 4.581001281738281, 0.496337890625, 13.058650970458984, 4.047100067138672, -3.419708251953125, 4.837306976318359, 4.79243278503418, 4.0515899658203125, -5.2754669189453125, 12.127235412597656, 9.394729614257812, 4.638153076171875, 8.82588005065918, 11.072088241577148, 7.661676406860352, 1.2062797546386719, -2.0637168884277344, 1.633575439453125, 15.351028442382812, -1.3394546508789062, -4.651832580566406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000480.npy"}
{"epoch": 0.7256235827664399, "step": 481, "batch_size": 64, "mean": 5.092473983764648, "std": 7.002762317657471, "min": -8.56515884399414, "p10": -3.5603345870971674, "median": 3.757512092590332, "p90": 14.0602653503418, "max": 24.09796905517578, "pos_frac": 0.78125, "sample": [5.5089874267578125, 1.8616561889648438, 19.683292388916016, 1.9186019897460938, -4.292743682861328, 6.858312606811523, 13.386817932128906, 6.210458755493164, -4.634281158447266, -3.7283859252929688, 13.652175903320312, 3.24365234375, 5.902412414550781, 9.640731811523438, -1.9370574951171875, 13.285171508789062, 7.766410827636719, 5.478641510009766, 7.497230529785156, 6.740058898925781, -8.56515884399414, -0.04418373107910156, 14.235160827636719, 3.6758480072021484, 0.053867340087890625, -2.305143356323242, 0.6142730712890625, 10.985036849975586, 1.6899566650390625, 8.141700744628906, -0.4358081817626953, 12.00381851196289, 1.2960052490234375, 2.29254150390625, 2.662212371826172, 4.37457275390625, 9.460601806640625, 3.9675559997558594, 2.7805557250976562, 2.88421630859375, -2.857330322265625, 3.0396652221679688, 11.20079231262207, 20.75906753540039, -0.1755504608154297, 15.945659637451172, 2.031749725341797, 2.1402359008789062, 24.09796905517578, 8.045015335083008, 9.698532104492188, 3.5754928588867188, 0.876922607421875, -5.2274169921875, -3.168214797973633, 14.509429931640625, -8.392547607421875, 8.629737854003906, 10.869817733764648, 18.66357421875, 9.056045532226562, 3.8391761779785156, -5.2967529296875, 0.2474822998046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000481.npy"}
{"epoch": 0.72713529856387, "step": 482, "batch_size": 64, "mean": 4.016700267791748, "std": 7.319019794464111, "min": -11.058124542236328, "p10": -4.8783424377441404, "median": 4.096733093261719, "p90": 14.63852310180664, "max": 22.15960693359375, "pos_frac": 0.75, "sample": [-0.6545867919921875, -7.56060791015625, -0.09032440185546875, 5.409252166748047, 2.0093460083007812, 1.1806716918945312, 6.759925842285156, -10.67572021484375, 0.10199356079101562, 17.11090087890625, 5.225360870361328, 14.673484802246094, 7.1568603515625, 3.9641647338867188, 6.283306121826172, 8.402084350585938, 7.283203125, 4.72467041015625, -11.058124542236328, 7.5078887939453125, 14.98065185546875, 1.1465301513671875, 22.15960693359375, -3.1511993408203125, -0.9286994934082031, 7.955167770385742, 16.418670654296875, 8.112659454345703, -2.0723800659179688, 1.3238201141357422, -4.903022766113281, 6.237926483154297, -4.573873519897461, 3.156177520751953, 16.584388732910156, 8.213912963867188, 10.865936279296875, 0.6624183654785156, 2.1858062744140625, -3.942718505859375, 14.55694580078125, 4.02618408203125, 5.4440460205078125, 1.7706451416015625, 3.6995983123779297, 8.721179962158203, 6.948387145996094, 4.2438507080078125, 19.953506469726562, -9.712310791015625, 7.295928955078125, 1.231903076171875, 7.751773834228516, 4.1672821044921875, 10.106754302978516, 0.6020145416259766, -9.320785522460938, 13.036514282226562, -4.8207550048828125, -5.075008392333984, 7.876304626464844, 0.5593757629394531, -4.73974609375, 0.5597114562988281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000482.npy"}
{"epoch": 0.7286470143613001, "step": 483, "batch_size": 64, "mean": 5.271034240722656, "std": 6.892204761505127, "min": -15.110847473144531, "p10": -1.7741102218627929, "median": 4.442093849182129, "p90": 14.153607177734377, "max": 20.565460205078125, "pos_frac": 0.734375, "sample": [-0.8205413818359375, 14.407867431640625, -1.1532211303710938, 1.8329124450683594, 10.806488037109375, 19.1412353515625, 9.343048095703125, 1.35809326171875, -0.7935791015625, -2.414886474609375, 4.076509475708008, 6.369144439697266, 10.558761596679688, 17.440101623535156, -1.2024192810058594, 9.700906753540039, 2.526752471923828, -3.2710952758789062, -0.27063560485839844, 12.871971130371094, 10.533096313476562, 14.857513427734375, 13.560333251953125, 1.9809112548828125, 13.296958923339844, 5.839778900146484, 9.836639404296875, -1.415924072265625, 1.5233688354492188, 4.0475921630859375, 6.706275939941406, -10.25372314453125, 17.738372802734375, 0.6808357238769531, 2.780181884765625, 11.722930908203125, 5.018592834472656, -15.110847473144531, 7.669763565063477, -1.8623580932617188, 2.8941574096679688, 4.148488998413086, 9.844001770019531, 0.8185615539550781, -2.337860107421875, 7.846096038818359, 20.565460205078125, 10.415092468261719, 6.352272033691406, 10.888154983520508, 2.732666015625, 6.5831756591796875, 1.9711074829101562, 15.11246109008789, -1.6530780792236328, 0.2836132049560547, -0.6185379028320312, 4.735698699951172, 10.160934448242188, 8.47823715209961, -0.269500732421875, -1.8259811401367188, -0.9543724060058594, 11.517631530761719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000483.npy"}
{"epoch": 0.7301587301587301, "step": 484, "batch_size": 64, "mean": 4.021857261657715, "std": 5.366608619689941, "min": -7.216712951660156, "p10": -2.3216228485107417, "median": 4.017633438110352, "p90": 10.510314178466798, "max": 18.555068969726562, "pos_frac": 0.734375, "sample": [3.14666748046875, 4.517478942871094, -7.216712951660156, 4.259223937988281, 18.555068969726562, 1.7458629608154297, 1.265106201171875, -4.602069854736328, 12.107444763183594, -2.6646804809570312, 3.4505691528320312, -2.515960693359375, 3.853597640991211, 0.9440040588378906, 5.024999618530273, 7.423961639404297, -1.8681678771972656, -1.5381965637207031, 3.4858016967773438, 10.062522888183594, -1.0478153228759766, 7.545661926269531, 9.388450622558594, 5.87846565246582, 4.972286224365234, 7.497568130493164, 10.702224731445312, 1.370361328125, 6.518932342529297, 1.417154312133789, -0.09046554565429688, 9.181221008300781, 0.729248046875, -3.6269798278808594, 15.785270690917969, -0.4959373474121094, 4.353059768676758, 4.181669235229492, 5.2368927001953125, 0.47544097900390625, 3.8531837463378906, -1.5548019409179688, 17.868610382080078, -0.5829010009765625, 12.054533004760742, 3.0745716094970703, 5.747154235839844, 1.6863288879394531, 5.1689910888671875, 5.369110107421875, 15.122940063476562, 5.378381729125977, 7.499656677246094, -0.2156524658203125, -1.4518508911132812, -2.894073486328125, 8.986370086669922, -0.9827880859375, 4.365383148193359, 5.638969421386719, 3.8147125244140625, 8.109573364257812, 6.631298065185547, -4.6980438232421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000484.npy"}
{"epoch": 0.7316704459561603, "step": 485, "batch_size": 64, "mean": 4.771087646484375, "std": 6.470158100128174, "min": -12.600788116455078, "p10": -1.1687685012817381, "median": 4.093477249145508, "p90": 15.295933532714844, "max": 18.677780151367188, "pos_frac": 0.78125, "sample": [5.728599548339844, 4.999212265014648, 7.942333221435547, 8.668403625488281, 4.0106353759765625, 5.0196685791015625, 4.270076751708984, 0.1470813751220703, 6.8524017333984375, -1.5286808013916016, -12.600788116455078, 3.2222747802734375, 17.484342575073242, 9.174232482910156, -0.0772552490234375, 5.7984466552734375, 2.963163375854492, 15.088512420654297, 18.677780151367188, 3.3080368041992188, 1.8640727996826172, 5.212310791015625, 0.4287109375, 16.3660888671875, -0.2988929748535156, 0.10182952880859375, -9.696849822998047, 4.464012145996094, 3.6018753051757812, 5.406017303466797, -3.7083969116210938, -0.7173576354980469, 9.21173095703125, -0.2655200958251953, 9.058128356933594, -3.7369384765625, 15.38446044921875, 16.287620544433594, 0.8751983642578125, 8.311542510986328, 3.817485809326172, -3.163015365600586, 4.98187255859375, 1.2527084350585938, 3.182331085205078, -1.0150222778320312, -1.1869068145751953, 12.3685302734375, 15.089370727539062, 2.80340576171875, 0.98895263671875, 4.596406936645508, 6.677398681640625, 17.1809024810791, 5.0320892333984375, 14.205013275146484, 4.144924163818359, 17.859580993652344, -0.9823760986328125, -1.1264457702636719, 0.6477813720703125, 2.3175048828125, 4.042030334472656, 4.3369598388671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000485.npy"}
{"epoch": 0.7331821617535903, "step": 486, "batch_size": 64, "mean": 4.345630168914795, "std": 7.320948600769043, "min": -11.068582534790039, "p10": -4.449695777893065, "median": 2.9212732315063477, "p90": 14.840796661376952, "max": 27.298065185546875, "pos_frac": 0.765625, "sample": [2.4989013671875, 1.9856185913085938, 10.5029296875, 4.785638809204102, 0.698089599609375, -1.0152931213378906, 6.622230529785156, 27.298065185546875, 6.821571350097656, -1.415557861328125, -5.194868087768555, 9.380851745605469, 4.9779510498046875, 3.026012420654297, 9.426177978515625, 3.8918533325195312, 1.7129592895507812, 15.947065353393555, -0.6434326171875, 6.02557373046875, 5.873014450073242, 2.7462158203125, -5.182914733886719, 1.7757110595703125, 18.987442016601562, -6.172615051269531, 3.65948486328125, -5.688568115234375, 8.87451171875, -5.961250305175781, 12.198699951171875, 0.384002685546875, 0.622894287109375, -10.291007995605469, 6.364707946777344, 1.0441741943359375, 3.3958702087402344, 11.082889556884766, -2.575479507446289, 2.279582977294922, 0.6533012390136719, 14.854808807373047, 6.0623016357421875, 20.073654174804688, 14.808101654052734, 0.1126556396484375, 1.8757476806640625, 7.636293411254883, 6.021026611328125, 18.56073570251465, 2.112213134765625, 3.3475875854492188, 3.3929290771484375, 11.451568603515625, -2.738851547241211, -0.1482696533203125, 2.461162567138672, 2.8165340423583984, 18.474258422851562, -1.192300796508789, 7.7450714111328125, -11.068582534790039, 2.017120361328125, -1.9584426879882812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000486.npy"}
{"epoch": 0.7346938775510204, "step": 487, "batch_size": 64, "mean": 4.4489240646362305, "std": 6.0998854637146, "min": -8.319232940673828, "p10": -2.587565231323242, "median": 3.5198745727539062, "p90": 11.853128051757814, "max": 22.4462890625, "pos_frac": 0.78125, "sample": [9.557281494140625, -4.329254150390625, 0.17082977294921875, 6.501800537109375, 9.083381652832031, 2.4902000427246094, 0.7515411376953125, 7.080936431884766, 13.592620849609375, 8.067840576171875, 5.495941162109375, -3.4138107299804688, 2.0236892700195312, -1.9569778442382812, -2.377857208251953, 18.57107925415039, 12.173477172851562, 9.708892822265625, 12.8270263671875, 8.554130554199219, -4.555091857910156, 2.38427734375, 2.7622947692871094, 11.239130020141602, 5.262201309204102, -0.7417182922363281, -0.15752410888671875, -5.14483642578125, 0.20790863037109375, 0.0032806396484375, 17.807632446289062, 5.888671875, 6.819454193115234, -2.520404815673828, 11.909286499023438, 0.7282257080078125, 3.1971797943115234, 0.797454833984375, 3.086254119873047, 4.242555618286133, 9.86846923828125, 1.6100749969482422, -1.907623291015625, 6.303806304931641, -3.5198020935058594, 4.600986480712891, 4.870883941650391, 0.4734077453613281, -0.8625926971435547, -8.319232940673828, 0.5694370269775391, 10.11372184753418, 5.1175537109375, 1.4092483520507812, 3.377899169921875, 11.355506896972656, 3.491729736328125, 3.5480194091796875, -2.6163482666015625, 11.485687255859375, 11.722091674804688, 8.023542404174805, 22.4462890625, 3.7794132232666016], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000487.npy"}
{"epoch": 0.7362055933484505, "step": 488, "batch_size": 64, "mean": 5.484686374664307, "std": 6.317531108856201, "min": -7.3466644287109375, "p10": -1.8196119308471677, "median": 4.838794708251953, "p90": 13.749971008300783, "max": 23.882530212402344, "pos_frac": 0.796875, "sample": [0.6954803466796875, -1.583770751953125, -0.7494869232177734, -3.8546791076660156, 5.235649108886719, -1.9206867218017578, -4.41729736328125, 8.459890365600586, 12.230850219726562, 8.903846740722656, 3.7672481536865234, 4.8194122314453125, 16.112930297851562, 2.9173202514648438, 3.4668426513671875, 23.882530212402344, 5.4776458740234375, 13.864974975585938, 0.013135910034179688, 0.2647743225097656, 7.08984375, 5.694236755371094, 1.6604537963867188, -0.053131103515625, 3.3671646118164062, 8.43896484375, -5.044921875, 14.227645874023438, 0.6508407592773438, 12.349414825439453, 1.3076553344726562, -0.6208724975585938, -0.4939117431640625, 16.387496948242188, 9.249649047851562, 3.8488807678222656, 12.416391372680664, -0.3482246398925781, 13.48162841796875, 11.739303588867188, 10.296730041503906, 5.44744873046875, 10.876762390136719, 5.410186767578125, 10.572380065917969, 3.4418563842773438, 0.04184722900390625, 4.858177185058594, -7.3466644287109375, 4.651283264160156, 3.5545501708984375, 5.736413955688477, -6.651914596557617, 8.161773681640625, 4.42352294921875, 11.132301330566406, 10.89239501953125, 11.110393524169922, 14.193790435791016, 8.344013214111328, 3.9710845947265625, 1.907958984375, -3.0113601684570312, 16.069869995117188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000488.npy"}
{"epoch": 0.7377173091458806, "step": 489, "batch_size": 64, "mean": 2.744333267211914, "std": 6.065982818603516, "min": -9.134029388427734, "p10": -5.396685791015624, "median": 1.4847230911254883, "p90": 10.026060104370119, "max": 16.049938201904297, "pos_frac": 0.703125, "sample": [-9.134029388427734, 7.469871520996094, 4.706302642822266, 8.163780212402344, 9.716480255126953, 11.382949829101562, 0.19904327392578125, 5.833595275878906, 7.975772857666016, 14.818710327148438, 8.17380142211914, 0.9737701416015625, 8.312469482421875, -6.876621246337891, -1.8306159973144531, 1.775360107421875, -5.016532897949219, 0.36994171142578125, 10.158737182617188, 4.7984619140625, 16.049938201904297, 9.540901184082031, 1.3333263397216797, 0.051036834716796875, 7.0347442626953125, 11.487983703613281, -3.2591018676757812, -2.6861038208007812, 9.26898193359375, 1.6361198425292969, -5.559608459472656, -6.969333648681641, 13.641250610351562, 3.00994873046875, 2.1529998779296875, -1.8743000030517578, -0.5278549194335938, 0.9893531799316406, 9.11590576171875, 1.7730712890625, 2.457479476928711, 1.2263946533203125, -0.42041015625, 9.560028076171875, 3.30218505859375, -6.290187835693359, 7.590080261230469, 1.2020034790039062, 6.011604309082031, 5.59375, -8.055870056152344, 1.075225830078125, 0.5323066711425781, 0.4883289337158203, -2.469928741455078, -8.779525756835938, 0.89508056640625, 0.7178878784179688, -3.2604827880859375, -0.853912353515625, -2.1900997161865234, 16.011268615722656, -0.9354648590087891, 4.0490875244140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000489.npy"}
{"epoch": 0.7392290249433107, "step": 490, "batch_size": 64, "mean": 3.642141342163086, "std": 7.590019226074219, "min": -12.215156555175781, "p10": -5.255521011352538, "median": 3.489826202392578, "p90": 13.400612640380864, "max": 19.86141014099121, "pos_frac": 0.6875, "sample": [0.4034309387207031, -4.4952545166015625, -12.203399658203125, 8.00164794921875, -2.2369461059570312, 4.634456634521484, -3.6918258666992188, 11.040267944335938, 18.770599365234375, 12.384552001953125, -12.215156555175781, 13.836067199707031, -11.996124267578125, 3.239168167114258, 8.350265502929688, 9.551517486572266, 12.196022033691406, 3.9556655883789062, 14.22528076171875, 0.9038925170898438, 1.6722183227539062, 6.3348846435546875, 8.973941802978516, 10.806171417236328, -4.9693756103515625, 3.00677490234375, 19.86141014099121, 0.7849502563476562, 0.8426094055175781, 3.8758544921875, -10.314620971679688, 6.851173400878906, 4.0005645751953125, 8.875423431396484, -1.1034908294677734, -2.73382568359375, -0.40373992919921875, 16.89594841003418, -5.98231315612793, -0.3948097229003906, 5.698585510253906, -0.84539794921875, 2.0896453857421875, 10.619905471801758, -3.2112789154052734, 6.222980499267578, -0.7299880981445312, -1.3435707092285156, -7.166053771972656, 0.52508544921875, 0.423828125, 3.7404842376708984, 0.17639541625976562, 5.873203277587891, 19.35760498046875, 2.2939109802246094, 11.21406364440918, 5.708381652832031, 4.489143371582031, 15.200733184814453, -5.378154754638672, -3.796661376953125, 11.803146362304688, 8.597183227539062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000490.npy"}
{"epoch": 0.7407407407407407, "step": 491, "batch_size": 64, "mean": 4.3256707191467285, "std": 5.720901012420654, "min": -9.795005798339844, "p10": -2.721922302246093, "median": 3.827852249145508, "p90": 12.296775054931645, "max": 14.451026916503906, "pos_frac": 0.734375, "sample": [12.860429763793945, -2.2855224609375, 13.024175643920898, 6.272590637207031, 8.768089294433594, 6.626228332519531, -0.030149459838867188, 9.029296875, 10.072883605957031, -0.07985687255859375, -0.1700439453125, -4.975982666015625, 7.265529632568359, 0.3431110382080078, 14.319528579711914, -1.2397384643554688, 1.9238395690917969, 9.366058349609375, 3.0184326171875, 7.139801025390625, -0.079071044921875, 4.482452392578125, 10.59074592590332, 4.184055328369141, 2.375070571899414, 12.780059814453125, 3.0905075073242188, 1.328369140625, 1.0463485717773438, 2.556232452392578, 13.546279907226562, 8.713546752929688, -3.8350906372070312, 4.116096496582031, 14.451026916503906, 4.900341033935547, -0.8349380493164062, 8.209396362304688, -0.44306182861328125, -3.6394996643066406, 12.696189880371094, -0.230682373046875, 1.4247474670410156, 3.0181884765625, 2.9420433044433594, 10.3255615234375, -2.9089508056640625, -8.030120849609375, 11.36480712890625, 9.468574523925781, 4.051643371582031, 8.603740692138672, 11.196815490722656, 10.599140167236328, 3.6040611267089844, 0.8390655517578125, 4.654541015625, 1.625213623046875, 8.696067810058594, -9.795005798339844, -5.14508056640625, 2.7405643463134766, -2.265697479248047, 8.579925537109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000491.npy"}
{"epoch": 0.7422524565381708, "step": 492, "batch_size": 64, "mean": 3.610086441040039, "std": 6.82371187210083, "min": -14.090641021728516, "p10": -4.807935333251953, "median": 3.2816944122314453, "p90": 11.7689603805542, "max": 18.759857177734375, "pos_frac": 0.671875, "sample": [16.196746826171875, 3.156005859375, -4.183338165283203, 1.980682373046875, 11.600692749023438, -3.8986988067626953, 11.390274047851562, 2.902494430541992, 5.1755523681640625, -1.0807075500488281, 9.614608764648438, 7.726432800292969, 4.305694580078125, 0.34023284912109375, 6.1182861328125, 3.189056396484375, -9.962547302246094, 4.859199523925781, 1.1424217224121094, 9.980056762695312, 7.588935852050781, -0.3355560302734375, 1.9510269165039062, -1.63739013671875, 4.6014862060546875, 3.9352645874023438, 3.5014801025390625, 18.759857177734375, -0.03936767578125, -6.202880859375, 12.968231201171875, 7.86955451965332, 0.0218353271484375, 5.288948059082031, -1.2360191345214844, -0.902374267578125, -4.8870849609375, 17.40302276611328, -5.552764892578125, -2.1659317016601562, 10.769538879394531, 6.370613098144531, 9.197929382324219, -2.8191394805908203, -2.2093048095703125, 7.575538635253906, 11.691387176513672, 3.3743324279785156, -0.08605194091796875, 11.802206039428711, -14.090641021728516, -6.0450897216796875, 0.48944091796875, 11.478302001953125, 8.333770751953125, 1.9692497253417969, 13.73764419555664, 4.5773162841796875, 9.497676849365234, -2.2502288818359375, 2.1790542602539062, -6.369182586669922, -4.623252868652344, 15.011001586914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000492.npy"}
{"epoch": 0.7437641723356009, "step": 493, "batch_size": 64, "mean": 5.1600661277771, "std": 6.396934986114502, "min": -13.283458709716797, "p10": -2.135987854003906, "median": 4.8587799072265625, "p90": 12.608269500732423, "max": 18.167922973632812, "pos_frac": 0.8125, "sample": [3.2739601135253906, 3.5095176696777344, -1.9790496826171875, 9.332054138183594, 4.9854888916015625, -0.1144256591796875, -11.260345458984375, 5.072872161865234, 2.0019378662109375, 4.463909149169922, 4.141288757324219, 10.690338134765625, 12.49273681640625, 15.083637237548828, 0.8911590576171875, -2.5125808715820312, 11.6239013671875, 1.2653484344482422, 12.10634994506836, 12.657783508300781, 1.5342159271240234, 3.0064544677734375, -1.2612018585205078, 6.242164611816406, 2.5069656372070312, -4.540657043457031, 12.215171813964844, 2.9148826599121094, 18.011669158935547, 5.515295028686523, 3.0541152954101562, 9.370536804199219, -2.82598876953125, 5.189731597900391, -2.2032470703125, 4.916023254394531, 17.133800506591797, -0.8659210205078125, 13.584930419921875, 1.1060333251953125, 11.024101257324219, 7.840087890625, 6.079723358154297, -2.76910400390625, 2.3820037841796875, 18.167922973632812, -0.0217132568359375, 10.590682983398438, 10.568357467651367, 1.03900146484375, 0.34638214111328125, 9.830810546875, 0.9095230102539062, 2.0400161743164062, 17.87392807006836, 8.099403381347656, 5.992706298828125, 4.987632751464844, 8.763153076171875, 9.895179748535156, 4.801536560058594, 3.3810691833496094, 9.37445068359375, -13.283458709716797], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000493.npy"}
{"epoch": 0.745275888133031, "step": 494, "batch_size": 64, "mean": 3.572193145751953, "std": 7.092184066772461, "min": -14.651573181152344, "p10": -3.168009567260742, "median": 3.0169076919555664, "p90": 13.068380546569825, "max": 19.40729331970215, "pos_frac": 0.671875, "sample": [12.623092651367188, 0.6795730590820312, 1.5628509521484375, 4.08026123046875, 7.344692230224609, -0.5402755737304688, -1.2469329833984375, 12.551858901977539, 4.688093185424805, -0.6190338134765625, 0.19225502014160156, -2.3645782470703125, 2.0951385498046875, 7.925289154052734, 2.1150264739990234, 5.235565185546875, 7.457853317260742, 1.1540679931640625, 7.377681732177734, -2.08148193359375, 1.9744529724121094, 14.81256103515625, 8.553897857666016, 3.2564868927001953, 5.984230041503906, 19.40729331970215, 6.563682556152344, -0.778289794921875, 6.15032958984375, 12.396429061889648, 5.9827423095703125, -13.920013427734375, -11.90838623046875, 13.259218215942383, -1.8603572845458984, -4.9034423828125, -8.824134826660156, 7.432914733886719, 4.199287414550781, 4.870685577392578, 2.7555389404296875, -0.4686565399169922, -14.651573181152344, -0.7849540710449219, 7.052104949951172, 2.7773284912109375, 13.779876708984375, 2.4827651977539062, 1.9062881469726562, 3.27227783203125, 17.315216064453125, 5.21234130859375, 7.467132568359375, 15.395401000976562, -0.9272079467773438, -0.529144287109375, -2.392730712890625, -4.0069122314453125, 7.041038513183594, -3.17633056640625, 11.663619995117188, -1.171234130859375, -3.1485939025878906, 18.876174926757812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000494.npy"}
{"epoch": 0.7467876039304611, "step": 495, "batch_size": 64, "mean": 3.4341232776641846, "std": 6.0034685134887695, "min": -11.194564819335938, "p10": -2.8360029220581056, "median": 3.0884323120117188, "p90": 12.078001785278321, "max": 19.248685836791992, "pos_frac": 0.671875, "sample": [-4.1949920654296875, -1.1168365478515625, -2.9613800048828125, -11.194564819335938, 11.847286224365234, -2.630788803100586, -3.1172027587890625, 3.7201080322265625, 5.538356781005859, 0.7797088623046875, -2.755708694458008, -8.448829650878906, -0.10146713256835938, 3.0427169799804688, 4.946662902832031, 16.040321350097656, -3.0659408569335938, 7.260307312011719, 6.564029693603516, 0.6350440979003906, 3.811826705932617, 5.0205841064453125, 7.297050476074219, -0.7938213348388672, 9.733604431152344, 12.1768798828125, -1.6250076293945312, 6.257720947265625, 0.3761882781982422, 1.7434310913085938, 16.1378173828125, 6.195436477661133, 1.5932693481445312, -1.1431522369384766, 7.34559440612793, 0.691802978515625, 0.18539810180664062, 4.723396301269531, 12.530975341796875, 9.280654907226562, -2.8704147338867188, 1.971160888671875, 8.408851623535156, 5.065399169921875, 15.93093490600586, 3.1341476440429688, 0.9676380157470703, -0.8944435119628906, -0.3170318603515625, 19.248685836791992, -1.2930488586425781, 10.943889617919922, 3.8949851989746094, -2.119720458984375, -1.09271240234375, 11.515151977539062, 3.21185302734375, 0.45116424560546875, 3.2883224487304688, -0.9061431884765625, 4.093288421630859, 3.370086669921875, 12.485183715820312, -1.0298271179199219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000495.npy"}
{"epoch": 0.7482993197278912, "step": 496, "batch_size": 64, "mean": 4.473696708679199, "std": 7.3889479637146, "min": -10.036956787109375, "p10": -3.43405532836914, "median": 2.855672836303711, "p90": 13.504339218139654, "max": 26.7064208984375, "pos_frac": 0.6875, "sample": [6.334648132324219, 6.16522216796875, 21.062942504882812, 6.836097717285156, -2.1922531127929688, 4.406002044677734, 7.054618835449219, 8.60589599609375, 10.934240341186523, 13.980167388916016, -2.0670547485351562, -0.7015609741210938, 9.696954727172852, 2.771984100341797, 0.33310890197753906, -0.3466033935546875, -0.10731315612792969, 5.1770782470703125, 7.262300491333008, 17.8634033203125, -8.989021301269531, 26.7064208984375, -0.07317543029785156, 12.394073486328125, 2.5814590454101562, -10.036956787109375, 17.812713623046875, 9.020034790039062, -0.16913986206054688, 12.318145751953125, -5.4757232666015625, 2.136791229248047, 17.061214447021484, 11.8658447265625, -2.888622283935547, 1.2081317901611328, -5.373064041137695, -1.8070926666259766, 1.5740966796875, 2.6613311767578125, 11.207778930664062, 2.939361572265625, 8.78338623046875, 8.578533172607422, 6.573974609375, 4.304311752319336, -2.57354736328125, 4.101308822631836, -2.7955875396728516, 1.6774330139160156, 19.916732788085938, -6.376682281494141, -4.279060363769531, 0.1653289794921875, 8.468780517578125, 7.522315979003906, 2.465015411376953, 10.003829956054688, -2.3859100341796875, 2.3656692504882812, -1.6306991577148438, 3.688741683959961, -3.6678123474121094, 1.6660614013671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000496.npy"}
{"epoch": 0.7498110355253212, "step": 497, "batch_size": 64, "mean": 5.949126243591309, "std": 7.076747894287109, "min": -10.875251770019531, "p10": -1.7276742935180662, "median": 6.191214561462402, "p90": 14.738764190673832, "max": 24.105972290039062, "pos_frac": 0.765625, "sample": [21.461599349975586, 3.7121200561523438, 24.105972290039062, 10.993022918701172, 5.506378173828125, -1.6579570770263672, 6.20867919921875, 1.427490234375, 3.276548385620117, 8.949432373046875, 8.165437698364258, 2.7833023071289062, 11.654359817504883, 4.322731018066406, 3.5307464599609375, -0.125457763671875, 9.893020629882812, -10.875251770019531, 15.10415267944336, 7.251823425292969, -0.0755157470703125, 7.269420623779297, -1.3123092651367188, -0.4242095947265625, 2.6425209045410156, -1.7575531005859375, -1.2563018798828125, 7.378856658935547, 9.617324829101562, 13.978752136230469, 0.2106781005859375, 4.183929443359375, 6.3502197265625, 9.295356750488281, -6.944465637207031, 9.101835250854492, 13.658798217773438, 9.166793823242188, 12.97439193725586, -3.303163528442383, 15.064483642578125, 4.57929801940918, 0.16402816772460938, 6.398017883300781, 11.60400390625, -3.5915908813476562, 19.612342834472656, 19.72405242919922, -0.07992935180664062, 2.3394851684570312, 6.173749923706055, 7.020782470703125, 8.691473007202148, 7.026157379150391, 3.5079803466796875, 22.109371185302734, -5.243621826171875, 1.7535152435302734, -0.9016151428222656, 9.203460693359375, 9.264373779296875, 2.4475173950195312, -3.0897369384765625, 10.522979736328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000497.npy"}
{"epoch": 0.7513227513227513, "step": 498, "batch_size": 64, "mean": 5.149151802062988, "std": 6.371250629425049, "min": -6.763885498046875, "p10": -1.607603073120117, "median": 3.769436836242676, "p90": 14.512225914001467, "max": 20.74117088317871, "pos_frac": 0.828125, "sample": [-3.2349472045898438, 4.909526824951172, 12.404983520507812, 5.226856231689453, -4.199554443359375, -3.6117019653320312, 8.685028076171875, 13.336517333984375, -1.6878547668457031, -1.33526611328125, 6.831642150878906, 3.808666229248047, 2.8220291137695312, 3.3987579345703125, 17.11492156982422, 3.3332366943359375, 1.4189910888671875, 1.4494247436523438, 0.9493789672851562, 8.01605224609375, 14.98577880859375, 3.6260948181152344, 10.662721633911133, 0.5364818572998047, 4.7283782958984375, 2.1865081787109375, -1.42034912109375, 12.819849014282227, 13.216854095458984, 0.6231498718261719, 9.67474365234375, 15.696052551269531, 1.435068130493164, 3.9225616455078125, 2.9316177368164062, -1.9283447265625, -0.9103851318359375, 6.459192276000977, 0.059444427490234375, 0.16250038146972656, 12.688106536865234, 14.683862686157227, -6.763885498046875, 7.476753234863281, 3.7345829010009766, 1.488372802734375, 0.16060256958007812, 3.804290771484375, 4.6867523193359375, 4.791067123413086, 13.846122741699219, 20.74117088317871, 8.213676452636719, 0.8086090087890625, 14.111740112304688, 0.06624412536621094, 16.413238525390625, 4.930877685546875, 0.2778797149658203, 0.221923828125, -2.5709075927734375, -0.26602935791015625, 6.4253387451171875, 20.470733642578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000498.npy"}
{"epoch": 0.7528344671201814, "step": 499, "batch_size": 64, "mean": 4.367107391357422, "std": 7.682387828826904, "min": -10.73785400390625, "p10": -6.044127273559568, "median": 3.40185546875, "p90": 14.766441154479983, "max": 22.265167236328125, "pos_frac": 0.703125, "sample": [11.950973510742188, 15.04022216796875, -10.73785400390625, 12.092376708984375, 6.268791198730469, 1.6434822082519531, 1.5672779083251953, -9.464256286621094, 2.519998550415039, 3.8208465576171875, 7.136882781982422, 6.5814666748046875, 19.795806884765625, -4.245845794677734, 8.661483764648438, 0.00719451904296875, 22.265167236328125, -9.054847717285156, 17.64348602294922, 14.127618789672852, 9.738388061523438, -1.424163818359375, -3.441396713256836, -3.9096412658691406, -6.875736236572266, 8.828384399414062, 2.9047698974609375, 8.364059448242188, -3.0526123046875, -4.150764465332031, -0.8131027221679688, -1.3900184631347656, 5.281293869018555, 3.3619232177734375, 6.470294952392578, 0.3346099853515625, 3.188243865966797, 6.979209899902344, -1.028411865234375, 2.0179100036621094, 13.546836853027344, -0.26341819763183594, 12.713462829589844, 3.463663101196289, -6.8148193359375, 3.4417877197265625, 3.2653427124023438, 8.05068588256836, 10.7601318359375, 19.13446044921875, 7.636878967285156, 1.2722511291503906, 8.746545791625977, -7.3300018310546875, -8.076034545898438, 12.246490478515625, 9.256324768066406, 8.207054138183594, -2.1130523681640625, 15.075576782226562, 1.0552902221679688, 16.28925323486328, 2.9783554077148438, -2.05169677734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000499.npy"}
{"epoch": 0.7543461829176115, "step": 500, "batch_size": 64, "mean": 4.159477233886719, "std": 6.94029426574707, "min": -11.3907470703125, "p10": -4.782805252075195, "median": 3.5405235290527344, "p90": 13.95846290588379, "max": 19.996692657470703, "pos_frac": 0.75, "sample": [-4.390724182128906, 3.8841629028320312, 1.8439407348632812, 1.3165435791015625, 5.3116607666015625, 5.71380615234375, 5.987144470214844, 15.294570922851562, -0.57928466796875, 14.016185760498047, 4.87139892578125, 15.443183898925781, 11.244613647460938, 0.02816009521484375, 13.49505615234375, 9.828189849853516, 19.996692657470703, -5.5834197998046875, 3.2282638549804688, -1.8820877075195312, 10.526153564453125, 7.493583679199219, 8.482772827148438, 9.273757934570312, -0.8403396606445312, 17.08935546875, 7.8841705322265625, -0.0558013916015625, -2.218374252319336, -8.348541259765625, -4.950839996337891, 0.8308296203613281, -0.42954254150390625, 2.967926025390625, -7.44066047668457, -2.3203678131103516, 3.0181427001953125, 2.7458343505859375, 9.645782470703125, 0.07392120361328125, 6.897609710693359, 0.2398700714111328, 5.867805480957031, 1.3047370910644531, -3.8509559631347656, 11.751871109008789, 2.0118751525878906, -11.3907470703125, 10.414604187011719, -10.05108642578125, 15.280990600585938, 3.852783203125, 0.6350154876708984, 4.580348968505859, 10.66754150390625, 2.9827938079833984, 0.764617919921875, 5.2224273681640625, 8.211458206176758, 0.49437713623046875, -6.428459167480469, 5.597757339477539, 14.829730987548828, 13.823776245117188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000500.npy"}
{"epoch": 0.7558578987150416, "step": 501, "batch_size": 64, "mean": 4.054576873779297, "std": 5.173795223236084, "min": -6.5204010009765625, "p10": -1.6113819122314452, "median": 2.950821876525879, "p90": 11.61905670166016, "max": 19.469940185546875, "pos_frac": 0.78125, "sample": [2.9964218139648438, 10.815673828125, 3.19122314453125, 8.147628784179688, 7.078495025634766, 8.066726684570312, 3.220977783203125, 4.0430450439453125, 5.537517547607422, 13.618904113769531, 13.458053588867188, 1.7665977478027344, 7.10943603515625, 1.525390625, -1.9715194702148438, -0.5833759307861328, 8.022377014160156, 12.791831970214844, -3.8008346557617188, -1.7146034240722656, 0.6566848754882812, -0.9774303436279297, -0.24832916259765625, 8.363006591796875, 19.469940185546875, 12.281822204589844, 3.5965194702148438, 17.08245849609375, 1.621063232421875, -0.7638397216796875, -1.48907470703125, 2.456939697265625, -1.6637992858886719, 1.1867179870605469, 6.8282012939453125, 1.35260009765625, 6.57172966003418, -2.602567672729492, 2.952787399291992, 0.8083744049072266, 2.394552230834961, 11.963363647460938, 0.4425048828125, 2.9488563537597656, 1.5977706909179688, -6.5204010009765625, 1.1263656616210938, 1.9273529052734375, -3.9622650146484375, 1.7530593872070312, 3.9973831176757812, 7.000343322753906, 2.5901565551757812, -0.33234405517578125, 4.6429595947265625, -0.2568683624267578, 3.1121063232421875, 2.4254379272460938, 9.034172058105469, 10.31500244140625, 10.621631622314453, 5.170551300048828, 4.567880630493164, 2.1295623779296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000501.npy"}
{"epoch": 0.7573696145124716, "step": 502, "batch_size": 64, "mean": 3.9702377319335938, "std": 5.517757892608643, "min": -6.828948974609375, "p10": -2.4325424194335934, "median": 3.43377685546875, "p90": 9.82161560058594, "max": 23.11041259765625, "pos_frac": 0.828125, "sample": [-0.8323211669921875, 0.3532257080078125, 3.2416305541992188, -2.565673828125, 9.127532958984375, 6.042839050292969, 2.488250732421875, 3.0263671875, -6.183418273925781, 5.1847381591796875, 5.224155426025391, 11.808181762695312, 1.2800922393798828, -6.643348693847656, 4.069732666015625, 17.993820190429688, 3.971466064453125, -2.5779571533203125, 3.7004928588867188, 1.9133758544921875, 4.968162536621094, 0.11208724975585938, 5.588062286376953, 6.377937316894531, 3.4710235595703125, 1.9598121643066406, 8.578193664550781, -3.7102088928222656, 0.6526966094970703, 7.422065734863281, 0.6978759765625, 0.7072277069091797, 6.190204620361328, 1.5255584716796875, 4.247627258300781, 3.3965301513671875, 16.15178680419922, 23.11041259765625, -6.828948974609375, 0.8465156555175781, -1.7454299926757812, 1.76861572265625, 5.981218338012695, -0.02214813232421875, 2.771047592163086, 0.6950836181640625, 2.505126953125, 7.177192687988281, 8.78619384765625, -2.1219024658203125, 6.418840408325195, 4.79217529296875, 5.8224029541015625, 1.050537109375, 9.21929931640625, -4.5098876953125, 8.479446411132812, 10.068954467773438, 0.28070831298828125, 10.688568115234375, 1.3777427673339844, 9.244491577148438, 13.0103759765625, 6.268743515014648], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000502.npy"}
{"epoch": 0.7588813303099018, "step": 503, "batch_size": 64, "mean": 4.167027473449707, "std": 6.335958003997803, "min": -11.334884643554688, "p10": -2.4649351119995115, "median": 2.7288665771484375, "p90": 12.486080360412599, "max": 16.009838104248047, "pos_frac": 0.828125, "sample": [5.597381591796875, -2.5710887908935547, -0.9178619384765625, 0.7946701049804688, 10.410614013671875, 3.1615371704101562, 10.818031311035156, -10.862030029296875, 1.3576431274414062, 7.978208541870117, 8.988449096679688, 1.8916778564453125, 2.6551742553710938, 15.810840606689453, 1.8579177856445312, 8.31583023071289, -2.077909469604492, 1.02301025390625, 13.629745483398438, -3.8125534057617188, 2.0536422729492188, 1.8283424377441406, 16.009838104248047, 2.9548492431640625, 2.8025588989257812, 2.9273529052734375, 7.435966491699219, 14.534957885742188, 2.055408477783203, 7.775493621826172, -2.217243194580078, 2.0782814025878906, 15.945228576660156, 8.746818542480469, 13.685455322265625, 0.31818389892578125, 7.773536682128906, -0.40363311767578125, -11.334884643554688, 1.6871414184570312, 2.5164222717285156, 2.3862152099609375, 0.8241748809814453, 7.629606246948242, 1.0619831085205078, 2.2867202758789062, 1.59576416015625, 12.066307067871094, 12.665983200073242, 4.658180236816406, 10.492008209228516, 11.11699104309082, -10.152034759521484, 0.9464035034179688, 10.464988708496094, 10.653247833251953, 1.4198970794677734, -5.132024765014648, 3.8908615112304688, 6.932273864746094, 1.855264663696289, 3.57025146484375, 10.723623275756836, -8.489927291870117], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000503.npy"}
{"epoch": 0.7603930461073318, "step": 504, "batch_size": 64, "mean": 5.8000006675720215, "std": 5.878600120544434, "min": -3.9700088500976562, "p10": -0.5937572479248044, "median": 4.870877265930176, "p90": 14.10512638092041, "max": 20.208499908447266, "pos_frac": 0.828125, "sample": [-1.743194580078125, 13.988855361938477, -0.294097900390625, 9.892183303833008, 6.5471038818359375, 0.01715850830078125, 7.622100830078125, 5.694936752319336, 20.208499908447266, -0.6885967254638672, 1.3585281372070312, 6.688957214355469, 2.3024215698242188, 14.154956817626953, 2.582416534423828, 12.780496597290039, 10.684646606445312, -1.7150344848632812, 7.72772216796875, 17.33112335205078, 6.019741058349609, 3.0775146484375, 1.433279037475586, 11.977609634399414, 14.712295532226562, 0.5259780883789062, 2.632627487182617, 15.921764373779297, 0.7213191986083984, 20.162452697753906, 8.86737060546875, 10.16900634765625, -0.3724651336669922, 8.630502700805664, 9.378707885742188, -0.0775146484375, 3.5017929077148438, -3.9700088500976562, 9.823074340820312, 4.221099853515625, 10.342613220214844, 1.2368621826171875, 1.8621292114257812, 7.0001983642578125, -0.741485595703125, 7.016448974609375, 6.422306060791016, 3.959035873413086, 8.047088623046875, 1.6745414733886719, 13.706188201904297, 1.245676040649414, -3.6799259185791016, -2.41357421875, 4.084339141845703, 3.9964599609375, -0.2138233184814453, 5.2739410400390625, 7.43988037109375, 0.4286003112792969, 4.467813491821289, 18.406890869140625, 5.85400390625, 3.286489486694336], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000504.npy"}
{"epoch": 0.7619047619047619, "step": 505, "batch_size": 64, "mean": 3.942927598953247, "std": 7.071103096008301, "min": -10.886310577392578, "p10": -2.34366397857666, "median": 2.6914405822753906, "p90": 13.81332244873047, "max": 22.36743927001953, "pos_frac": 0.703125, "sample": [4.501157760620117, 0.32834434509277344, 0.54656982421875, 2.0909080505371094, -0.8269157409667969, 12.77667236328125, 1.0001373291015625, 21.128067016601562, 8.574003219604492, 10.298988342285156, 14.45733642578125, -2.4606094360351562, 1.3208389282226562, -0.012348175048828125, 4.598472595214844, -1.17694091796875, 2.0654144287109375, 0.2411651611328125, 7.231172561645508, 4.205005645751953, -1.5760498046875, 6.686435699462891, 9.811386108398438, 4.741970062255859, 10.504371643066406, 22.36743927001953, 5.9349517822265625, -0.322601318359375, 4.932716369628906, 0.015115737915039062, 4.365211486816406, -5.91302490234375, 13.998916625976562, -8.826065063476562, 16.895401000976562, 10.301727294921875, 4.221076965332031, 10.354669570922852, -1.523550033569336, -1.7323226928710938, 4.779975891113281, 2.8964767456054688, -1.8445968627929688, -10.319450378417969, 1.0995025634765625, 1.1849384307861328, 5.433086395263672, -2.070791244506836, 15.745979309082031, -10.886310577392578, -1.4861831665039062, 10.423837661743164, 13.48735237121582, 3.79632568359375, 12.470878601074219, 2.4864044189453125, 6.781196594238281, -1.6947097778320312, 2.2738113403320312, 13.953023910522461, 2.168210983276367, -0.30204010009765625, -4.548820495605469, -9.605941772460938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000505.npy"}
{"epoch": 0.763416477702192, "step": 506, "batch_size": 64, "mean": 3.8978564739227295, "std": 6.797451496124268, "min": -12.362930297851562, "p10": -4.326982116699218, "median": 3.9053192138671875, "p90": 11.200627708435059, "max": 22.858238220214844, "pos_frac": 0.765625, "sample": [5.825857162475586, 8.244651794433594, -1.266744613647461, 0.3079681396484375, 4.321380615234375, 3.450775146484375, 6.0880889892578125, -4.719036102294922, 4.443883895874023, -0.27899169921875, 1.2040901184082031, 9.082733154296875, 6.9646759033203125, 3.2028732299804688, 1.470672607421875, 1.9000396728515625, -8.656768798828125, 18.977317810058594, 22.858238220214844, 1.8697586059570312, 9.559276580810547, -8.3616943359375, 3.1383743286132812, -12.362930297851562, -6.36322021484375, -11.867439270019531, 1.9047622680664062, 18.267486572265625, 12.05047607421875, 10.69997787475586, 16.42923355102539, -4.6270599365234375, 10.997550964355469, -3.626800537109375, 11.287660598754883, 2.5359878540039062, 8.836421966552734, 1.10943603515625, 14.049827575683594, -1.8704681396484375, -1.441741943359375, 0.247894287109375, 1.2485923767089844, 6.285057067871094, 6.939849853515625, 3.3351173400878906, -0.7961578369140625, 3.4892578125, 4.947929382324219, 6.266548156738281, 8.186502456665039, 6.0181121826171875, 1.007415771484375, 6.922248840332031, 9.10699462890625, 5.795112609863281, 0.9421920776367188, 4.953086853027344, -3.3802871704101562, -3.4836883544921875, 5.4095306396484375, 6.8307037353515625, 7.144584655761719, 6.409637451171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000506.npy"}
{"epoch": 0.764928193499622, "step": 507, "batch_size": 64, "mean": 4.713381767272949, "std": 7.17141056060791, "min": -8.262199401855469, "p10": -3.577173042297363, "median": 4.573822021484375, "p90": 14.522031402587896, "max": 25.337921142578125, "pos_frac": 0.734375, "sample": [9.612894058227539, 4.6351318359375, -2.8101348876953125, 10.7872314453125, 25.337921142578125, 2.5453720092773438, 13.344696044921875, -6.162841796875, -5.794654846191406, 3.7654495239257812, 0.6770477294921875, 3.978118896484375, -2.3773269653320312, 15.026603698730469, -2.3097877502441406, 8.239883422851562, 2.071014404296875, -1.8192825317382812, 8.717700958251953, 1.5996131896972656, 1.9957122802734375, 7.400634765625, 6.607147216796875, -3.257009506225586, 16.092628479003906, 1.0777168273925781, 2.588489532470703, 5.4376983642578125, 5.363687515258789, 4.761444091796875, 4.51251220703125, 0.162261962890625, 21.551345825195312, -3.714385986328125, 5.961009979248047, 6.5379638671875, -1.864990234375, 10.319389343261719, -6.041994094848633, -1.8670787811279297, -1.150054931640625, 8.15557861328125, 0.8704319000244141, 11.690240859985352, 5.169530868530273, 1.8620758056640625, 7.653068542480469, 2.9029388427734375, 16.2777099609375, 19.448471069335938, -8.262199401855469, 11.188934326171875, -0.08941650390625, 0.8295135498046875, 9.522933959960938, -1.2040634155273438, 12.696590423583984, -5.257789611816406, 6.4072723388671875, -8.030689239501953, 5.785491943359375, 12.552299499511719, 4.893917083740234, 15.05483627319336], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000507.npy"}
{"epoch": 0.7664399092970522, "step": 508, "batch_size": 64, "mean": 4.83477783203125, "std": 6.542921543121338, "min": -7.68426513671875, "p10": -2.812068176269531, "median": 4.902353286743164, "p90": 14.352578735351562, "max": 20.273101806640625, "pos_frac": 0.765625, "sample": [8.20791244506836, 2.1360855102539062, 11.382858276367188, 5.770362854003906, 1.1739349365234375, -5.1820831298828125, 20.233680725097656, -1.4661178588867188, 8.114456176757812, 14.365753173828125, 20.273101806640625, 6.855560302734375, 0.9916496276855469, 2.9526443481445312, -1.650421142578125, 7.695350646972656, -6.940637588500977, 3.6489715576171875, 4.879261016845703, -5.050699234008789, 4.925445556640625, -2.76239013671875, 7.780221939086914, 0.5506782531738281, 0.18209075927734375, -7.68426513671875, 5.746091842651367, 1.8927783966064453, 7.05181884765625, 6.975536346435547, 2.46600341796875, 11.186996459960938, 4.5259246826171875, 3.3673324584960938, 14.899276733398438, 7.3670806884765625, 1.8047142028808594, 5.149444580078125, 17.368377685546875, -0.35503387451171875, -0.6780033111572266, -2.4473800659179688, 12.89422607421875, 11.60611343383789, -0.7577247619628906, -1.6964874267578125, 6.000984191894531, 14.32183837890625, 15.602264404296875, 2.014333724975586, 7.359516143798828, 1.5445995330810547, 3.1133766174316406, -2.8753833770751953, 0.7464828491210938, -7.249080657958984, -2.8333587646484375, 17.97039031982422, 7.003929138183594, 6.46441650390625, 7.765340805053711, 6.7829132080078125, 5.895133972167969, 10.047611236572266], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000508.npy"}
{"epoch": 0.7679516250944822, "step": 509, "batch_size": 64, "mean": 4.454644680023193, "std": 6.249189853668213, "min": -11.8660888671875, "p10": -3.826739501953125, "median": 3.5270586013793945, "p90": 11.536935424804687, "max": 18.930641174316406, "pos_frac": 0.734375, "sample": [6.969547271728516, -1.7279434204101562, 0.3311767578125, 8.16317367553711, 2.429534912109375, -0.1392498016357422, 2.287281036376953, -2.482715606689453, -7.6036224365234375, -0.44094085693359375, 5.312110900878906, 11.286422729492188, 4.303253173828125, 3.6334171295166016, 10.720680236816406, -11.8660888671875, 18.77398681640625, 8.296089172363281, -0.7424163818359375, 1.9538116455078125, 9.010078430175781, -3.84228515625, 7.382049560546875, -5.199378967285156, 11.727195739746094, 3.2789840698242188, -1.663421630859375, 1.7315864562988281, 11.62112045288086, 2.06988525390625, 10.083709716796875, 10.89168930053711, 6.529121398925781, 6.623619079589844, 1.2903060913085938, -0.2316608428955078, 11.97524642944336, 3.1933212280273438, 10.052352905273438, 11.378190994262695, 3.4207000732421875, 0.13800430297851562, 11.58673095703125, -5.1250762939453125, 10.481033325195312, 8.172752380371094, 3.3628311157226562, 8.834487915039062, 18.930641174316406, 11.029373168945312, 11.420745849609375, -0.7511672973632812, -1.7522735595703125, -5.9568328857421875, 2.3911590576171875, 6.617095947265625, 7.840841293334961, -3.928997039794922, -3.79046630859375, 2.7893218994140625, 6.413877487182617, 3.2984771728515625, 12.90960693359375, 9.405166625976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000509.npy"}
{"epoch": 0.7694633408919124, "step": 510, "batch_size": 64, "mean": 4.791869163513184, "std": 6.894128322601318, "min": -8.79339599609375, "p10": -3.438182830810546, "median": 4.141348838806152, "p90": 13.767371749877933, "max": 20.865333557128906, "pos_frac": 0.71875, "sample": [1.8426055908203125, 14.034473419189453, 11.949241638183594, 3.2733612060546875, -7.65673828125, -0.39910888671875, 6.412925720214844, 19.30925750732422, 1.739410400390625, 2.4076690673828125, -0.0280914306640625, -0.19766998291015625, 1.3529853820800781, 2.5102767944335938, -5.914430618286133, -0.4596214294433594, -0.23418807983398438, 7.215232849121094, 0.35107421875, 4.262639999389648, 6.339508056640625, 11.802938461303711, 12.516716003417969, -2.3941192626953125, 4.020057678222656, 10.635700225830078, 9.720230102539062, -0.1695556640625, -0.9223556518554688, 11.00650405883789, 8.2781982421875, 6.647430419921875, 16.563430786132812, 6.259632110595703, 3.7585220336914062, 7.6382904052734375, 6.214088439941406, 19.77025604248047, -1.9460601806640625, -3.88037109375, 1.578500747680664, -8.79339599609375, 7.42303466796875, 10.452600479125977, 7.65728759765625, 1.6504364013671875, 6.267646789550781, 20.865333557128906, 2.974761962890625, -2.4064102172851562, 7.222236633300781, 3.4782142639160156, 14.918643951416016, 4.838964462280273, -7.378883361816406, 5.260705947875977, 15.720184326171875, 12.489265441894531, -0.8367748260498047, 7.617715835571289, 13.144134521484375, 2.4422531127929688, -4.754610061645508, -8.7825927734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000510.npy"}
{"epoch": 0.7709750566893424, "step": 511, "batch_size": 64, "mean": 6.163692951202393, "std": 6.683724403381348, "min": -7.002105712890625, "p10": -2.6585411071777343, "median": 5.650791168212891, "p90": 16.290493392944335, "max": 18.81463623046875, "pos_frac": 0.8125, "sample": [18.81463623046875, 13.609214782714844, 6.30504035949707, 16.498058319091797, 5.366580963134766, 2.93292236328125, 1.4091167449951172, 0.8146705627441406, 14.924720764160156, 13.791492462158203, 6.11163330078125, -3.5788955688476562, 5.747894287109375, 4.8148651123046875, 12.828731536865234, 7.0965118408203125, 8.77427864074707, 1.8944168090820312, 13.372142791748047, 9.896263122558594, 13.85239028930664, -5.773956298828125, 4.017417907714844, -7.002105712890625, -2.5621490478515625, -0.854278564453125, 16.243274688720703, 9.141830444335938, 5.582923889160156, 3.72869873046875, -0.3515434265136719, 4.4993896484375, 5.718658447265625, 2.4721221923828125, 6.873382568359375, 8.70831298828125, 11.802837371826172, 2.722522735595703, -2.6998519897460938, 2.3724803924560547, 0.967742919921875, 9.676021575927734, 12.893524169921875, 18.21540069580078, 16.8681640625, 9.53057861328125, -4.891117095947266, 1.3866043090820312, -0.09775924682617188, 17.561410903930664, 3.0988616943359375, 2.909738540649414, 7.1963043212890625, 8.09572982788086, 1.1561222076416016, 2.201711654663086, -3.7385387420654297, 17.95037841796875, 6.921783447265625, 12.863384246826172, 16.31072998046875, -0.67962646484375, -3.5653533935546875, 1.7279090881347656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000511.npy"}
{"epoch": 0.7724867724867724, "step": 512, "batch_size": 64, "mean": 5.151974201202393, "std": 12.586156845092773, "min": -10.4595947265625, "p10": -4.040453338623047, "median": 3.410646438598633, "p90": 13.904624938964846, "max": 89.63299560546875, "pos_frac": 0.734375, "sample": [10.72091293334961, 5.71484375, 9.493614196777344, -7.974578857421875, -4.0666351318359375, 17.03351593017578, 0.2294483184814453, 1.3891410827636719, 1.4806365966796875, 4.469337463378906, 7.89088249206543, 11.343421936035156, -10.4595947265625, -1.3945999145507812, 5.431297302246094, -0.22278594970703125, 1.3392410278320312, -1.1375160217285156, 13.085725784301758, 0.31117820739746094, 11.296554565429688, 8.842182159423828, 1.3823318481445312, 4.8760528564453125, 3.3370819091796875, 7.840606689453125, -8.556076049804688, 4.80560302734375, 4.992271423339844, 15.045059204101562, -1.1392402648925781, 6.595611572265625, 14.298736572265625, 0.4434242248535156, 6.34326171875, 0.834228515625, -0.17147445678710938, 7.466499328613281, 0.6857948303222656, 14.133598327636719, 2.0686569213867188, -6.802453994750977, -0.17098045349121094, -3.9793624877929688, 4.35955810546875, 12.359603881835938, 0.42069244384765625, 3.484210968017578, 2.885711669921875, -1.1702003479003906, 13.370353698730469, -1.6752700805664062, 0.3173370361328125, -7.986503601074219, 2.5305328369140625, 3.5101547241210938, 89.63299560546875, 15.206356048583984, 20.968780517578125, 12.939865112304688, 4.5400390625, -7.053291320800781, 4.941440582275391, -3.0014724731445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000512.npy"}
{"epoch": 0.7739984882842026, "step": 513, "batch_size": 64, "mean": 5.091045379638672, "std": 5.972817897796631, "min": -5.494392395019531, "p10": -2.0115798950195307, "median": 3.998615264892578, "p90": 15.569582366943363, "max": 21.804508209228516, "pos_frac": 0.828125, "sample": [17.246009826660156, 8.924697875976562, 18.129844665527344, 7.121856689453125, 4.435905456542969, 3.779560089111328, 4.357215881347656, 5.191799163818359, 2.080564498901367, -0.8758316040039062, 3.3203811645507812, -4.271411895751953, 5.664226531982422, 1.95062255859375, 2.402547836303711, 5.051874160766602, -1.6236457824707031, 3.979034423828125, 21.804508209228516, 7.8934326171875, 8.138294219970703, 17.31240463256836, 5.88043212890625, 3.1237335205078125, 14.809009552001953, 0.36235809326171875, 5.4658050537109375, 8.136741638183594, 2.004711151123047, 6.02197265625, 4.018196105957031, 0.3711204528808594, 8.917152404785156, -5.494392395019531, 3.8742446899414062, 3.132913589477539, -2.177837371826172, 4.257171630859375, 17.61083984375, 1.1545143127441406, -2.615938186645508, -3.256956100463867, 16.319808959960938, -0.42420196533203125, 10.76904296875, 12.35689926147461, 15.89554214477539, 3.3662261962890625, 8.743911743164062, 3.481342315673828, 4.297088623046875, 4.309234619140625, 9.840911865234375, 2.9520263671875, 3.4326019287109375, -4.385295867919922, 2.4219284057617188, 1.5250511169433594, 0.9718093872070312, 2.1066131591796875, 8.287078857421875, -2.2087173461914062, -1.2366943359375, 5.395017623901367], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000513.npy"}
{"epoch": 0.7755102040816326, "step": 514, "batch_size": 64, "mean": 5.028005599975586, "std": 6.5118536949157715, "min": -13.691490173339844, "p10": -3.3434486389160156, "median": 5.615470886230469, "p90": 13.786643981933596, "max": 17.61316680908203, "pos_frac": 0.796875, "sample": [0.2252826690673828, 3.6006507873535156, 7.318378448486328, 9.926097869873047, 5.739410400390625, 4.512483596801758, 13.097824096679688, -1.9521865844726562, 5.5371856689453125, 0.9649391174316406, 4.858333587646484, 11.375736236572266, 6.818763732910156, 8.06439208984375, -1.9259452819824219, 1.0398368835449219, -7.1223297119140625, 6.543525695800781, -0.3113288879394531, 6.168418884277344, -13.691490173339844, 1.6081466674804688, 7.91783332824707, 8.548173904418945, 17.50322723388672, -3.389129638671875, 0.4182891845703125, 6.628364562988281, 8.592826843261719, 9.621994018554688, -1.8817291259765625, -4.994283676147461, 10.074813842773438, 6.392921447753906, 9.887046813964844, 8.423530578613281, 3.761016845703125, 14.024421691894531, 2.1378631591796875, 4.092607498168945, 0.9349517822265625, -3.3840179443359375, -2.58074951171875, 3.3676605224609375, 9.528564453125, 9.086387634277344, 5.693756103515625, 1.2364273071289062, 4.60991096496582, 9.254974365234375, -3.2487869262695312, 0.12619400024414062, 3.7566280364990234, 17.61316680908203, -5.918670654296875, 13.401748657226562, 13.95159912109375, 6.940330505371094, 14.723464965820312, 0.09885787963867188, 17.52301025390625, 15.931739807128906, -3.3982696533203125, 12.387557983398438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000514.npy"}
{"epoch": 0.7770219198790628, "step": 515, "batch_size": 64, "mean": 5.077951431274414, "std": 6.363601207733154, "min": -8.42364501953125, "p10": -2.631349182128906, "median": 5.548102378845215, "p90": 14.014167404174808, "max": 18.596078872680664, "pos_frac": 0.796875, "sample": [1.9124336242675781, 5.712186813354492, 1.52825927734375, 6.919242858886719, 17.517257690429688, 13.277046203613281, 7.890285491943359, 11.596488952636719, 1.8198165893554688, 9.928085327148438, 8.599464416503906, 7.122642517089844, 11.040594100952148, -2.2541866302490234, 5.581533432006836, 6.48297119140625, -2.3117904663085938, 11.377227783203125, -4.258049011230469, 0.277679443359375, 5.450080871582031, 0.8871994018554688, 0.779876708984375, 5.772674560546875, -8.42364501953125, 4.518119812011719, 18.596078872680664, 5.962799072265625, 5.589149475097656, 0.01654815673828125, 4.894676208496094, 7.044410705566406, -1.8129768371582031, -1.544973373413086, 6.7897491455078125, 14.53897476196289, -7.233436584472656, 7.72308349609375, 0.3815174102783203, 3.3280487060546875, -4.2054595947265625, 3.7256622314453125, 14.3128662109375, -4.516139984130859, 4.776519775390625, 16.976425170898438, 5.913885116577148, 0.33379554748535156, 10.981430053710938, 12.445837020874023, 5.764122009277344, 5.514671325683594, 1.9429931640625, 16.735549926757812, 14.752822875976562, -4.841709136962891, 8.754858016967773, 0.6536293029785156, 13.317203521728516, -2.7683029174804688, 3.0560073852539062, 11.184417724609375, -1.2826957702636719, -1.556640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000515.npy"}
{"epoch": 0.7785336356764928, "step": 516, "batch_size": 64, "mean": 4.166782379150391, "std": 6.105935573577881, "min": -13.665775299072266, "p10": -1.2441034317016597, "median": 4.5631818771362305, "p90": 11.332461166381837, "max": 19.127609252929688, "pos_frac": 0.8125, "sample": [15.0845947265625, 19.127609252929688, 10.113937377929688, -3.927154541015625, 0.5549545288085938, 11.45322036743164, -4.037391662597656, 4.490850448608398, 3.5335521697998047, 6.666965484619141, 0.01507568359375, 6.586185455322266, 9.6630859375, 16.578018188476562, 2.7440357208251953, 0.5332660675048828, 6.672966003417969, 8.866872787475586, 11.050689697265625, 7.006256103515625, 0.2255706787109375, -13.665775299072266, 4.640800476074219, 4.6355133056640625, 16.745277404785156, 7.3710784912109375, 5.1959075927734375, 0.014583587646484375, 11.510234832763672, 6.619384765625, -7.70050048828125, -0.34496307373046875, 0.2402973175048828, -0.15740203857421875, 3.6772232055664062, 5.9429779052734375, 7.634494781494141, 0.12188720703125, 1.5197677612304688, -0.11480712890625, 4.873863220214844, -12.043327331542969, 4.8291778564453125, 7.085521697998047, 6.300910949707031, 2.70343017578125, 10.524177551269531, 4.998638153076172, 2.2060165405273438, 4.4899444580078125, 1.3938140869140625, 4.9152069091796875, 0.5692977905273438, 0.49222755432128906, -2.252105712890625, 9.305557250976562, 0.3277091979980469, 6.171895980834961, -0.7400665283203125, -1.4601192474365234, 9.037132263183594, 1.8239421844482422, -0.46904754638671875, 14.701126098632812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000516.npy"}
{"epoch": 0.780045351473923, "step": 517, "batch_size": 64, "mean": 4.333193778991699, "std": 6.655924320220947, "min": -8.479166030883789, "p10": -4.014149284362793, "median": 3.280215263366699, "p90": 11.849555969238283, "max": 24.391998291015625, "pos_frac": 0.734375, "sample": [6.1005706787109375, -1.966705322265625, -1.8733596801757812, 6.364105224609375, -4.7834320068359375, 2.668424606323242, 2.7707901000976562, 3.0872020721435547, 15.576324462890625, 5.056018829345703, 7.394927978515625, -5.008066177368164, -1.87066650390625, 2.8634796142578125, -0.4525146484375, -4.2217559814453125, 0.8655490875244141, 6.9122467041015625, 8.245702743530273, -0.577545166015625, 8.499488830566406, -4.368976593017578, 9.79815673828125, 17.812286376953125, -2.184173583984375, 1.4911441802978516, 2.1175193786621094, 3.6331863403320312, 6.886199951171875, 8.015453338623047, 7.916038513183594, 12.038314819335938, 7.855291366577148, 2.503814697265625, -1.341552734375, -8.479166030883789, 3.4732284545898438, 11.40911865234375, -1.7444343566894531, -4.6037139892578125, 8.550178527832031, 11.367086410522461, 20.455787658691406, 1.7511062622070312, 4.1331329345703125, 15.94529914855957, 4.317646026611328, 6.689085006713867, -2.7869110107421875, 3.7222824096679688, 1.7943840026855469, 6.872062683105469, 2.2437896728515625, 24.391998291015625, 19.615676879882812, 10.069074630737305, 0.8614349365234375, -3.529733657836914, 3.083486557006836, 2.937816619873047, 4.98295783996582, -6.230480194091797, 6.105937957763672, 2.1027908325195312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000517.npy"}
{"epoch": 0.781557067271353, "step": 518, "batch_size": 64, "mean": 3.5995752811431885, "std": 6.458415985107422, "min": -7.649871826171875, "p10": -3.3374111175537107, "median": 3.046706199645996, "p90": 12.576962280273438, "max": 22.085716247558594, "pos_frac": 0.671875, "sample": [3.6565628051757812, -1.3057479858398438, 0.47112274169921875, 7.6543731689453125, -0.9300537109375, -2.1554031372070312, 1.1836395263671875, 8.826351165771484, -6.745784759521484, 2.597278594970703, 0.9329948425292969, -1.9309806823730469, 3.21728515625, 9.01171875, 5.8971099853515625, -6.1985626220703125, 3.3363418579101562, 14.787042617797852, -6.204261779785156, 5.2818450927734375, 12.649169921875, -7.649871826171875, -0.17096710205078125, 5.073543548583984, 7.9438629150390625, 6.521244049072266, -2.7732696533203125, 4.821014404296875, -7.320610046386719, 13.770011901855469, -1.1815261840820312, -3.4470558166503906, -0.5736217498779297, 9.311227798461914, 7.63470458984375, -1.6279220581054688, 5.082851409912109, -3.081573486328125, 5.504508972167969, 22.085716247558594, 7.9216766357421875, 0.7384033203125, 1.7063980102539062, 20.16997528076172, 9.01776123046875, 12.408477783203125, -0.7382965087890625, 14.560636520385742, 4.86761474609375, -4.815704345703125, -1.1925544738769531, -1.9851016998291016, 0.965789794921875, 0.08161163330078125, 2.5729827880859375, 7.782249450683594, 8.028182983398438, 5.3776702880859375, -0.07538986206054688, 2.876127243041992, 3.821502685546875, 0.06239128112792969, 17.383880615234375, 4.882232666015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000518.npy"}
{"epoch": 0.783068783068783, "step": 519, "batch_size": 64, "mean": 2.83676815032959, "std": 7.709024429321289, "min": -17.863189697265625, "p10": -5.459035491943359, "median": 1.8335418701171875, "p90": 12.514951515197756, "max": 18.552642822265625, "pos_frac": 0.625, "sample": [-10.426939010620117, 8.568901062011719, -1.0694580078125, -14.7742919921875, -1.7435302734375, 7.298362731933594, -1.2909088134765625, 16.9835205078125, 5.144279479980469, 4.5113525390625, -0.061920166015625, 4.656095504760742, 10.678680419921875, -0.9580764770507812, 1.83685302734375, -0.9397106170654297, 8.545982360839844, 1.5254688262939453, -4.950290679931641, -8.497039794921875, 12.697591781616211, 16.56866455078125, -3.379486083984375, 4.744651794433594, 8.5648193359375, 16.19806671142578, -4.705589294433594, 4.749324798583984, 8.811332702636719, 1.1705474853515625, -3.768890380859375, 10.911872863769531, 0.547576904296875, -3.1396331787109375, 5.621173858642578, -4.241325378417969, 1.830230712890625, 2.5119285583496094, 11.965240478515625, -4.8360137939453125, 1.6116600036621094, 3.410186767578125, 10.734344482421875, 3.0009288787841797, 3.680694580078125, -17.863189697265625, 1.273223876953125, -2.524871826171875, 9.449081420898438, -5.570806503295898, 15.454872131347656, -3.1892929077148438, -5.663814544677734, 18.552642822265625, 1.68804931640625, 4.923755645751953, 11.281814575195312, 12.088790893554688, 6.690921783447266, -5.198236465454102, 1.5965557098388672, -1.4340267181396484, 16.33275604248047, -6.632307052612305], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000519.npy"}
{"epoch": 0.7845804988662132, "step": 520, "batch_size": 64, "mean": 4.272979736328125, "std": 5.490602970123291, "min": -5.949005126953125, "p10": -2.971097183227539, "median": 3.9857025146484375, "p90": 13.6473991394043, "max": 15.766891479492188, "pos_frac": 0.796875, "sample": [5.673501968383789, -1.6165847778320312, -0.5878200531005859, 4.506229400634766, 9.288375854492188, 10.458183288574219, 5.703762054443359, 0.1658477783203125, 1.7989273071289062, 7.023223876953125, 6.820770263671875, 3.2637157440185547, 2.0305633544921875, 4.052509307861328, 8.111541748046875, 2.4146347045898438, 0.296630859375, -2.7545928955078125, 0.9356632232666016, 13.903968811035156, 11.811813354492188, 3.5321731567382812, 5.331981658935547, 13.048736572265625, 4.051971435546875, 7.621833801269531, 14.842117309570312, -4.882865905761719, -3.0651512145996094, 14.102100372314453, 7.828575134277344, 3.157756805419922, -0.2755603790283203, 5.1995391845703125, 2.2095565795898438, 0.6386947631835938, 14.059038162231445, 3.91943359375, -3.063884735107422, -5.154701232910156, -5.137594223022461, 3.1184539794921875, 1.4995193481445312, -5.949005126953125, 1.3677215576171875, 0.35516357421875, 5.506797790527344, 5.872798919677734, 4.594747543334961, 1.86297607421875, 1.5764007568359375, 14.906082153320312, 14.409355163574219, 5.890771865844727, 15.766891479492188, 8.607275009155273, 7.030548095703125, 5.246402740478516, -0.28170204162597656, -1.564234733581543, 3.230550765991211, 6.844738006591797, 7.712913513183594, -5.3990631103515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000520.npy"}
{"epoch": 0.7860922146636432, "step": 521, "batch_size": 64, "mean": 4.257848262786865, "std": 6.160005569458008, "min": -13.508209228515625, "p10": -2.1524103164672845, "median": 3.466907501220703, "p90": 12.100122833251953, "max": 19.453140258789062, "pos_frac": 0.828125, "sample": [8.111392974853516, 2.073211669921875, 10.005691528320312, 2.095428466796875, 12.18157958984375, 2.864288330078125, 11.910057067871094, 0.0289459228515625, -3.10479736328125, 8.40155029296875, 6.079654693603516, -0.24532699584960938, 7.423126220703125, 2.7688827514648438, 10.139839172363281, 6.0828857421875, 6.366598129272461, 3.9697341918945312, 3.351409912109375, 17.122940063476562, 2.078826904296875, 10.508415222167969, 1.533355712890625, 0.36055755615234375, -0.5394344329833984, 6.987457275390625, -5.227447509765625, 11.398872375488281, 1.022552490234375, 7.0135498046875, 19.453140258789062, 16.100799560546875, -7.922649383544922, 1.4480533599853516, 0.1912689208984375, -3.2243270874023438, -0.48410797119140625, 5.277748107910156, 0.59222412109375, 13.917625427246094, 7.5118560791015625, 0.20085716247558594, -2.4817638397216797, 3.5824050903320312, -1.3839187622070312, 2.3158130645751953, 7.60369873046875, 9.133430480957031, 4.7563018798828125, 12.572746276855469, 3.323711395263672, 4.585441589355469, -13.508209228515625, 14.285186767578125, -9.937301635742188, 1.5105953216552734, 2.2885265350341797, 0.04824066162109375, 9.597511291503906, 0.3292694091796875, 5.99127197265625, 0.9615936279296875, 7.288909912109375, 3.812530517578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000521.npy"}
{"epoch": 0.7876039304610734, "step": 522, "batch_size": 64, "mean": 4.138129234313965, "std": 7.350427627563477, "min": -19.4471435546875, "p10": -4.989515495300292, "median": 4.064275741577148, "p90": 13.348914337158208, "max": 19.791343688964844, "pos_frac": 0.75, "sample": [-1.361785888671875, 6.468601226806641, 1.868189811706543, 4.230503082275391, 5.524997711181641, 13.804100036621094, 5.475547790527344, 12.4166259765625, 9.554084777832031, 4.567342758178711, -6.739223480224609, -5.181371688842773, 4.345794677734375, 19.791343688964844, 1.9747886657714844, 12.338150024414062, -6.240653991699219, 5.851535797119141, 7.820285797119141, 3.5513553619384766, 6.149066925048828, 3.5083961486816406, 2.7363967895507812, -19.4471435546875, 10.52142333984375, -0.47139739990234375, 4.056980133056641, -3.3369827270507812, -3.5933380126953125, 3.953115463256836, -2.150005340576172, 16.355968475341797, 0.921661376953125, 9.301677703857422, 12.279317855834961, 1.2901840209960938, -2.1788330078125, 4.957878112792969, -0.43990325927734375, 1.215362548828125, 4.157590866088867, 4.071571350097656, 3.2820701599121094, 3.8273773193359375, 7.578243255615234, -0.4391288757324219, 10.333412170410156, -9.096752166748047, 19.706192016601562, 0.1434955596923828, 6.679847717285156, -7.800506591796875, 7.202262878417969, 18.359359741210938, 2.1669158935546875, 2.6983718872070312, -9.946372985839844, 14.1029052734375, 9.94051742553711, 13.748466491699219, 1.545389175415039, -4.541851043701172, 10.932655334472656, 10.498222351074219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000522.npy"}
{"epoch": 0.7891156462585034, "step": 523, "batch_size": 64, "mean": 4.950833320617676, "std": 6.859926223754883, "min": -13.12154769897461, "p10": -5.256808471679687, "median": 6.234169006347656, "p90": 13.047085952758792, "max": 20.041790008544922, "pos_frac": 0.78125, "sample": [-3.018016815185547, 13.70208740234375, 2.6318626403808594, 3.5318984985351562, 15.172225952148438, 9.977621078491211, 1.0180511474609375, 9.137794494628906, 8.176631927490234, 2.7920150756835938, 1.4811992645263672, 10.956855773925781, 15.674400329589844, -1.6954078674316406, 11.1485595703125, 10.593826293945312, 5.9459075927734375, 1.5757503509521484, 7.8626556396484375, -6.717002868652344, 3.7934303283691406, 11.832870483398438, -3.177623748779297, 4.475624084472656, -5.448089599609375, 6.7741546630859375, -5.569793701171875, -2.0028419494628906, 5.221099853515625, 6.9763336181640625, 6.681888580322266, 9.585922241210938, 11.146881103515625, 6.770429611206055, 8.491928100585938, 15.203990936279297, 12.46883773803711, 3.02685546875, 4.948661804199219, -9.7762451171875, 4.584320068359375, -4.81048583984375, 20.041790008544922, 6.522430419921875, 9.432518005371094, 3.8484039306640625, 0.33979034423828125, 2.03411865234375, 13.294906616210938, -7.8702850341796875, 14.659461975097656, 3.8950729370117188, 7.018798828125, -6.490196228027344, 10.835777282714844, 7.612312316894531, 3.8013381958007812, 8.739776611328125, 10.957069396972656, -13.12154769897461, -4.4910430908203125, 7.10174560546875, -0.8873291015625, 8.431358337402344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000523.npy"}
{"epoch": 0.7906273620559335, "step": 524, "batch_size": 64, "mean": 3.602719783782959, "std": 5.902561664581299, "min": -14.300483703613281, "p10": -2.5902816772460935, "median": 3.598756790161133, "p90": 10.678637695312501, "max": 20.26629638671875, "pos_frac": 0.765625, "sample": [10.303558349609375, 4.139442443847656, 17.368370056152344, 0.912841796875, 2.1448974609375, -2.1500473022460938, -3.5703582763671875, 1.47796630859375, 9.674972534179688, 2.6600914001464844, 9.823528289794922, -2.7789535522460938, -5.985166549682617, 5.5793914794921875, 0.2210845947265625, 8.142326354980469, -6.5570526123046875, -5.406585693359375, 2.8943519592285156, 11.493934631347656, 8.9490966796875, -14.300483703613281, 4.841926574707031, 0.40838623046875, 1.0847396850585938, 3.265094757080078, 20.26629638671875, 3.0178985595703125, 4.912668228149414, -1.6615447998046875, -0.6213150024414062, 5.114448547363281, -6.603874206542969, 4.019828796386719, 7.313232421875, 6.635986328125, 4.657981872558594, 5.413780212402344, 2.4327239990234375, -1.8394050598144531, 1.309112548828125, 0.20604705810546875, 0.9324798583984375, 14.409149169921875, 8.747537612915039, 0.6796188354492188, 12.370027542114258, -1.9800834655761719, 8.015213012695312, 6.90478515625, 5.5158538818359375, 3.9324188232421875, 4.350616455078125, 12.804454803466797, 3.236804962158203, 6.492959976196289, 10.839385986328125, 4.024251937866211, -1.068328857421875, -0.24554061889648438, 7.15771484375, -1.4424514770507812, 4.691902160644531, 0.9940643310546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000524.npy"}
{"epoch": 0.7921390778533636, "step": 525, "batch_size": 64, "mean": 3.2947843074798584, "std": 5.359790325164795, "min": -7.247016906738281, "p10": -2.609260559082031, "median": 3.1743392944335938, "p90": 10.64388771057129, "max": 16.334014892578125, "pos_frac": 0.75, "sample": [5.760810852050781, -6.973541259765625, -3.6892013549804688, -1.8245429992675781, -5.935375213623047, 3.3577728271484375, 3.8134422302246094, 1.1370849609375, 9.087539672851562, -1.3768844604492188, 2.7949180603027344, 1.104461669921875, 3.2898635864257812, 3.3139190673828125, 2.3583297729492188, 12.124664306640625, -0.08175468444824219, -7.247016906738281, 4.6317596435546875, 4.098762512207031, 6.793205261230469, -0.9062995910644531, 12.297065734863281, 12.917144775390625, 0.8679046630859375, 4.696685791015625, 2.2651290893554688, 0.9759941101074219, 3.34869384765625, 6.575321197509766, -3.5674896240234375, 9.697784423828125, 2.0579662322998047, 6.617210388183594, 9.010677337646484, -2.06878662109375, 2.938814163208008, 3.0588150024414062, 16.334014892578125, -1.4887847900390625, 0.05342864990234375, 3.718547821044922, 4.299074172973633, -2.0041122436523438, 2.836517333984375, 14.331893920898438, 1.1646614074707031, 1.9627914428710938, 3.3989639282226562, 10.518360137939453, -2.3186798095703125, 10.697685241699219, 5.658576965332031, 0.0885009765625, 5.1235504150390625, 9.351531982421875, -1.285247802734375, 5.351051330566406, 3.673492431640625, 5.8291015625, -2.733795166015625, 16.100421905517578, 0.0049381256103515625, -7.121131896972656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000525.npy"}
{"epoch": 0.7936507936507936, "step": 526, "batch_size": 64, "mean": 3.5220370292663574, "std": 5.295561790466309, "min": -9.45220947265625, "p10": -2.4648870468139643, "median": 3.634793281555176, "p90": 10.948938369750984, "max": 16.145401000976562, "pos_frac": 0.765625, "sample": [13.309982299804688, 5.820274353027344, -3.7686023712158203, 1.6371307373046875, -2.044649124145508, 5.404451370239258, 2.7328968048095703, 12.443618774414062, -5.8972625732421875, -2.644989013671875, 3.91741943359375, 5.431529998779297, -5.770637512207031, 2.8734283447265625, 6.13299560546875, -1.5568313598632812, 5.7665863037109375, 16.145401000976562, 0.24918365478515625, 9.118824005126953, 4.38616943359375, 7.008674621582031, 2.6423797607421875, 12.71234130859375, 9.143356323242188, 1.7270469665527344, 3.7164535522460938, 4.303627014160156, 3.553133010864258, 2.662933349609375, 1.162435531616211, 0.4718589782714844, 9.047554016113281, -1.6966304779052734, 8.67352294921875, -0.04481315612792969, 6.769279479980469, -0.7660903930664062, 7.419586181640625, 0.0919342041015625, 11.722759246826172, 2.516521453857422, 4.5376739501953125, 3.459890365600586, 4.891471862792969, -8.821794509887695, -1.8256263732910156, -0.08397293090820312, 7.0462646484375, 1.1971588134765625, 5.6508331298828125, -1.2961769104003906, 14.228408813476562, 4.201747894287109, 13.349929809570312, 0.6210479736328125, -3.0112762451171875, 5.881244659423828, 4.533592224121094, 2.817535400390625, 3.7191390991210938, 0.9055099487304688, 6.335205078125, -9.45220947265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000526.npy"}
{"epoch": 0.7951625094482238, "step": 527, "batch_size": 64, "mean": 2.9924192428588867, "std": 6.507940769195557, "min": -13.210220336914062, "p10": -3.9132905960083, "median": 2.41729736328125, "p90": 11.69839477539063, "max": 20.631183624267578, "pos_frac": 0.65625, "sample": [-3.0883216857910156, 4.962890625, 2.9535064697265625, 8.079757690429688, 2.5416717529296875, 1.9313735961914062, 17.155487060546875, 2.6585655212402344, 10.566696166992188, 9.748321533203125, 13.40070915222168, -4.939697265625, 4.690341949462891, -2.4941024780273438, 5.8424072265625, -3.2084274291992188, 13.734565734863281, 1.0562553405761719, 3.2940635681152344, -4.209772109985352, -0.9899406433105469, -4.713401794433594, -5.7854461669921875, -1.1612625122070312, 4.265869140625, 2.927490234375, 14.891578674316406, -1.2501754760742188, 3.336505889892578, -0.7104339599609375, 5.1312103271484375, 2.2929229736328125, -1.8058700561523438, 5.8630523681640625, 5.910404205322266, 20.631183624267578, 8.7606201171875, 1.582763671875, 0.3246784210205078, 9.022441864013672, 6.762397766113281, -0.6933479309082031, 10.574737548828125, -0.8144302368164062, 1.047607421875, -2.048635482788086, -3.2215003967285156, 13.475217819213867, 4.203338623046875, -2.4277400970458984, -6.6846466064453125, 1.3394336700439453, 1.5511474609375, 7.841022491455078, -1.1377944946289062, -2.675954818725586, 1.4892120361328125, 6.1654205322265625, 9.313629150390625, 12.179962158203125, -12.636043548583984, -13.210220336914062, 6.490222930908203, 1.4313125610351562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000527.npy"}
{"epoch": 0.7966742252456538, "step": 528, "batch_size": 64, "mean": 3.0980749130249023, "std": 6.387207984924316, "min": -11.553802490234375, "p10": -3.012824249267578, "median": 3.3555707931518555, "p90": 11.63730163574219, "max": 18.336105346679688, "pos_frac": 0.6875, "sample": [11.913909912109375, -11.553802490234375, 14.959701538085938, -1.6719474792480469, 7.76495361328125, 3.5272445678710938, 5.7385101318359375, 0.15582275390625, 1.5848941802978516, 3.2985916137695312, -1.1881046295166016, -3.176055908203125, 7.253273010253906, -10.50341796875, -2.5851058959960938, -2.2952957153320312, 12.508174896240234, 2.015829086303711, 2.1401500701904297, -0.2962799072265625, 0.8500175476074219, 13.085525512695312, 0.6840648651123047, -11.432937622070312, -1.2115020751953125, 5.217205047607422, -0.5506591796875, -2.6319503784179688, 3.4125499725341797, 8.715431213378906, 6.474662780761719, 1.8296432495117188, 5.092090606689453, 16.191009521484375, 3.4202041625976562, -0.6669082641601562, 18.336105346679688, 6.98077392578125, 7.4451904296875, 2.796255111694336, 4.8849029541015625, 9.2216796875, -1.1380481719970703, 4.008821487426758, -4.814958572387695, 6.6462860107421875, 3.7142066955566406, -0.46363067626953125, 10.99188232421875, 2.4257869720458984, 6.3881378173828125, -11.497106552124023, -7.788505554199219, 9.979461669921875, 4.688720703125, 5.728450775146484, -2.0981407165527344, 2.6240158081054688, 12.258563995361328, -2.25103759765625, 5.906192779541016, 4.6568603515625, 9.207962036132812, 1.36846923828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000528.npy"}
{"epoch": 0.7981859410430839, "step": 529, "batch_size": 64, "mean": 6.816671848297119, "std": 6.964150428771973, "min": -11.98385238647461, "p10": -1.056206130981445, "median": 5.589122772216797, "p90": 14.501219940185548, "max": 21.159576416015625, "pos_frac": 0.796875, "sample": [9.678565979003906, -1.1890983581542969, 8.75665283203125, 3.7283859252929688, 12.652149200439453, 2.2747421264648438, 20.989883422851562, 15.927534103393555, -2.394329071044922, 0.8629379272460938, -0.5852794647216797, -2.351869583129883, 5.19873046875, 5.109283447265625, 5.0914459228515625, 0.12831878662109375, -2.1954193115234375, 12.119646072387695, -0.54833984375, 9.465225219726562, 14.529548645019531, 0.3763885498046875, -4.3517608642578125, 4.467948913574219, 9.277297973632812, 14.244583129882812, -5.240379333496094, -0.3334808349609375, 4.213165283203125, 4.255390167236328, 12.373115539550781, 7.47882080078125, 15.08416748046875, 18.76338005065918, 5.675666809082031, 13.370155334472656, 13.161722183227539, 12.748052597045898, 0.3101043701171875, 14.21148681640625, 7.0186767578125, 11.595458984375, 19.9635009765625, 3.71990966796875, 5.093009948730469, 13.41050910949707, 8.793075561523438, -0.4399528503417969, 10.812698364257812, 10.916793823242188, 3.543590545654297, 11.171943664550781, 2.5318145751953125, 13.517509460449219, -0.746124267578125, 21.159576416015625, 10.742706298828125, 12.335685729980469, 3.4027786254882812, 5.5025787353515625, -11.98385238647461, 2.804962158203125, -0.3695220947265625, 14.43511962890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000529.npy"}
{"epoch": 0.799697656840514, "step": 530, "batch_size": 64, "mean": 4.35256290435791, "std": 5.577881813049316, "min": -5.877189636230469, "p10": -2.441181945800781, "median": 3.6200428009033203, "p90": 12.18091220855713, "max": 18.041168212890625, "pos_frac": 0.71875, "sample": [14.021484375, 8.05837631225586, 4.330532073974609, -2.738964080810547, 6.402870178222656, -0.1306915283203125, 18.041168212890625, -2.3995628356933594, 10.283355712890625, 7.8478851318359375, 2.146198272705078, 9.160560607910156, -1.8216419219970703, 5.316484451293945, 9.938228607177734, 5.774133682250977, 3.6502723693847656, 3.589813232421875, 13.249126434326172, -4.154022216796875, 0.07030296325683594, -5.877189636230469, 1.2589263916015625, 7.5928497314453125, 8.655677795410156, -1.0555343627929688, 9.5108642578125, 6.565944671630859, 3.1464385986328125, 14.86642074584961, 4.925079345703125, 0.9327316284179688, 13.060020446777344, 15.406768798828125, -0.45848846435546875, 0.7726955413818359, 8.0885009765625, 12.04762077331543, -2.4590187072753906, 1.5015640258789062, 0.2885284423828125, 6.4298553466796875, -0.1348419189453125, 5.745204925537109, -2.18768310546875, 1.4641342163085938, 10.30975341796875, -0.18769073486328125, -4.197063446044922, 7.8090667724609375, -3.1185779571533203, -2.9766273498535156, 7.454641342163086, -0.6384849548339844, 10.655815124511719, 2.1856460571289062, 8.488048553466797, -0.6440925598144531, 3.0051498413085938, -0.3527069091796875, 12.238037109375, 0.6021575927734375, 2.8544540405273438, 4.3535308837890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000530.npy"}
{"epoch": 0.8012093726379441, "step": 531, "batch_size": 64, "mean": 3.44205379486084, "std": 5.910897731781006, "min": -12.035270690917969, "p10": -3.244404602050781, "median": 2.0517377853393555, "p90": 11.650598335266114, "max": 17.89630889892578, "pos_frac": 0.734375, "sample": [-5.814323425292969, 10.919864654541016, 7.588287353515625, 3.32257080078125, 2.2127304077148438, -0.59075927734375, 0.26934814453125, 4.0572509765625, 3.3569183349609375, 0.3700294494628906, 2.0960350036621094, 0.8498897552490234, 8.722274780273438, -2.5938491821289062, 11.334680557250977, 9.874427795410156, -2.9409103393554688, -3.3744735717773438, 1.4462890625, 17.48984146118164, 5.3990631103515625, 12.033905029296875, -3.3906402587890625, -3.4240341186523438, -1.3265151977539062, 4.089086532592773, 3.0817947387695312, 1.972076416015625, 4.2797698974609375, -0.127410888671875, 4.074241638183594, -0.6361618041992188, -0.3699512481689453, 1.26007080078125, -12.035270690917969, -0.03834342956542969, 0.018341064453125, 6.075736999511719, 7.279155731201172, -7.485626220703125, 5.9775390625, 2.863616943359375, 12.486557006835938, 10.881681442260742, -1.2391204833984375, -3.5970916748046875, 15.834606170654297, 11.688591003417969, 4.5800018310546875, 0.8687667846679688, 0.5536537170410156, 2.0074405670166016, 6.038612365722656, 8.97222900390625, 0.087982177734375, 1.0096702575683594, 6.076446533203125, 0.4430389404296875, 11.561948776245117, 17.89630889892578, 1.5159072875976562, 1.0467987060546875, 13.491806030273438, -0.080963134765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000531.npy"}
{"epoch": 0.8027210884353742, "step": 532, "batch_size": 64, "mean": 4.7797698974609375, "std": 6.671884536743164, "min": -8.26336669921875, "p10": -4.076055717468262, "median": 3.7323780059814453, "p90": 13.876411819458008, "max": 20.358657836914062, "pos_frac": 0.78125, "sample": [10.592445373535156, 9.842910766601562, 12.208168029785156, 13.725021362304688, -4.275421142578125, -0.4183197021484375, 3.1823959350585938, 19.801029205322266, 1.8745536804199219, 2.583465576171875, -4.505699157714844, 7.48773193359375, 13.79266357421875, 1.9721298217773438, 13.332019805908203, 2.3179244995117188, 4.881145477294922, 6.718038558959961, 1.1648921966552734, -7.2716522216796875, 0.1756877899169922, 2.382701873779297, 9.909542083740234, 3.2033042907714844, 5.1745758056640625, 6.2854461669921875, 3.629650115966797, 14.583084106445312, 8.239219665527344, 11.087074279785156, 9.38824462890625, 0.4520835876464844, 2.032379150390625, 20.358657836914062, 1.1557769775390625, 6.6609039306640625, 6.367103576660156, -4.166191101074219, 0.703155517578125, 1.6370220184326172, -2.2523727416992188, -6.520530700683594, 2.1702957153320312, -0.9342041015625, 16.003761291503906, 3.8351058959960938, 13.912303924560547, -1.7108840942382812, 5.7055511474609375, 7.813751220703125, 4.577289581298828, 4.8011627197265625, -1.5746212005615234, 15.261363983154297, -4.6385345458984375, 10.810840606689453, -3.2461280822753906, 3.04412841796875, 16.577255249023438, -8.26336669921875, 0.10995674133300781, -3.8657398223876953, 10.009109497070312, 6.014926910400391], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000532.npy"}
{"epoch": 0.8042328042328042, "step": 533, "batch_size": 64, "mean": 4.442439079284668, "std": 6.4162492752075195, "min": -14.001239776611328, "p10": -2.6765560150146483, "median": 3.5894107818603516, "p90": 13.23162078857422, "max": 19.31397247314453, "pos_frac": 0.78125, "sample": [6.096046447753906, 10.825433731079102, -3.6603012084960938, 7.922828674316406, 5.935905456542969, 7.936164855957031, 0.4006805419921875, -0.7404003143310547, 8.261104583740234, 9.937837600708008, 0.6804656982421875, 1.2909221649169922, 4.12384033203125, -1.237945556640625, 10.12812614440918, 9.336578369140625, 1.9665145874023438, 8.623634338378906, 3.388031005859375, 0.03234100341796875, 1.5432395935058594, 2.1997928619384766, 4.3975830078125, 2.12054443359375, -2.7802200317382812, 4.957601547241211, 8.92943000793457, -1.9944305419921875, 3.3536529541015625, -9.29293441772461, 1.3512649536132812, 1.0371513366699219, 15.489555358886719, 15.902824401855469, -3.2843399047851562, 12.777938842773438, -4.647186279296875, 17.64208984375, 3.790790557861328, -0.706695556640625, 2.616790771484375, 6.946083068847656, 4.337757110595703, 6.8024139404296875, 13.684410095214844, 19.31397247314453, 3.2632598876953125, -2.434673309326172, 2.9193267822265625, -1.7129554748535156, -1.1927223205566406, 12.484317779541016, 6.551090240478516, 1.1246337890625, 1.2090225219726562, -4.538932800292969, 2.8877525329589844, 7.245880126953125, 4.621185302734375, 16.798805236816406, 13.426055908203125, 10.63995361328125, -14.001239776611328, 7.288463592529297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000533.npy"}
{"epoch": 0.8057445200302343, "step": 534, "batch_size": 64, "mean": 4.141200065612793, "std": 6.216010093688965, "min": -10.716217041015625, "p10": -3.8919551849365233, "median": 3.9351024627685547, "p90": 11.758412361145021, "max": 19.78265380859375, "pos_frac": 0.75, "sample": [3.0948410034179688, -4.384490966796875, 6.4202880859375, 3.7919998168945312, 1.01312255859375, 8.944313049316406, 1.4184722900390625, -7.7856903076171875, 3.7019386291503906, -6.547813415527344, 0.013614654541015625, 4.44964599609375, 7.716438293457031, 8.089576721191406, -3.3542709350585938, 19.78265380859375, 5.6531829833984375, 2.6972904205322266, 9.341712951660156, -5.90631103515625, 10.947700500488281, 11.898086547851562, 2.1749839782714844, 5.3089447021484375, -1.3028907775878906, 2.447662353515625, -10.716217041015625, 3.707643508911133, 13.473701477050781, 5.405977249145508, 5.0866851806640625, -3.926013946533203, 15.033378601074219, 14.772171020507812, 11.875007629394531, 9.319931030273438, 0.8970260620117188, 6.098110198974609, 17.727325439453125, 5.190576553344727, 8.543701171875, 10.280548095703125, -5.0316925048828125, -0.4453277587890625, 8.515203475952148, 5.602256774902344, -2.9261398315429688, 3.8356094360351562, 2.1300277709960938, -0.76690673828125, 2.8297157287597656, 10.241985321044922, 4.034595489501953, -2.0210399627685547, 2.9122314453125, 2.9553756713867188, 8.991153717041016, -3.8124847412109375, 5.385732650756836, -0.12127685546875, 8.298759460449219, 4.196174621582031, 11.486356735229492, -3.6480445861816406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000534.npy"}
{"epoch": 0.8072562358276644, "step": 535, "batch_size": 64, "mean": 4.314825057983398, "std": 7.057832717895508, "min": -15.174036026000977, "p10": -3.4271066665649412, "median": 4.24924373626709, "p90": 14.424268341064455, "max": 22.724754333496094, "pos_frac": 0.75, "sample": [1.1843452453613281, -4.824310302734375, 14.277023315429688, 6.703067779541016, 3.2957229614257812, 10.034534454345703, -8.671867370605469, -3.4201412200927734, -3.4300918579101562, 7.157417297363281, 7.01141357421875, -1.239786148071289, 4.492738723754883, 5.905906677246094, -1.2361831665039062, 0.49688720703125, -3.2878036499023438, 5.45355224609375, 0.5443191528320312, -2.5142059326171875, 4.631446838378906, 6.702751159667969, 8.579185485839844, 1.919097900390625, 8.410175323486328, -1.7860870361328125, 16.18254852294922, -3.2016868591308594, -7.735099792480469, 1.3105411529541016, 15.168685913085938, 19.7923583984375, 8.641170501708984, -3.0712814331054688, 3.812664031982422, 6.291624069213867, 1.6592864990234375, 16.78887939453125, -4.837348937988281, 5.099077224731445, 6.016670227050781, 1.1729278564453125, 22.724754333496094, 6.915489196777344, 1.009552001953125, -15.174036026000977, -0.81890869140625, 8.229209899902344, 1.3017845153808594, 8.722328186035156, 7.943092346191406, -3.926067352294922, 2.2067222595214844, 14.487373352050781, 8.032609939575195, 5.721378326416016, 4.005748748779297, 9.557540893554688, 11.770145416259766, 3.5896682739257812, 1.4245567321777344, 0.7805557250976562, 16.97967529296875, 11.18548583984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000535.npy"}
{"epoch": 0.8087679516250945, "step": 536, "batch_size": 64, "mean": 5.180801868438721, "std": 5.562654495239258, "min": -3.0279083251953125, "p10": -1.9052715301513663, "median": 4.082893371582031, "p90": 13.06547317504883, "max": 20.130020141601562, "pos_frac": 0.8125, "sample": [5.621089935302734, 2.10394287109375, -3.0279083251953125, 17.088134765625, 12.84100341796875, 1.861114501953125, 3.0014114379882812, -2.9487152099609375, 3.7119216918945312, 1.7270870208740234, -2.494873046875, 4.255401611328125, 10.268142700195312, 9.435195922851562, 2.594125747680664, 4.696830749511719, 5.387542724609375, 13.969038009643555, 20.130020141601562, 6.277971267700195, 4.510906219482422, -2.291379928588867, 10.892797470092773, 8.171646118164062, -0.02649688720703125, 16.808448791503906, 7.5091552734375, 3.7988758087158203, 3.5010833740234375, 4.852519989013672, 11.471328735351562, -2.572734832763672, 2.4946250915527344, 6.191791534423828, -1.0282745361328125, 0.42528533935546875, 11.779521942138672, 0.423370361328125, 6.73736572265625, 0.7674217224121094, 13.255455017089844, 11.879095077514648, 5.950336456298828, -2.2524280548095703, 0.6528472900390625, 3.3713607788085938, 1.8740234375, 8.020242691040039, 3.559720993041992, 8.929229736328125, -0.451324462890625, 11.842918395996094, 1.6470718383789062, 13.161674499511719, 9.986373901367188, 4.445350646972656, 1.4778079986572266, 15.657394409179688, -1.0952396392822266, -2.91949462890625, 3.1376724243164062, 5.010189056396484, -0.39508056640625, 3.9103851318359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000536.npy"}
{"epoch": 0.8102796674225246, "step": 537, "batch_size": 64, "mean": 4.485739707946777, "std": 6.749697208404541, "min": -12.291282653808594, "p10": -4.36237449645996, "median": 4.854131698608398, "p90": 12.967603111267092, "max": 18.611984252929688, "pos_frac": 0.75, "sample": [5.085090637207031, 9.959953308105469, 3.9518280029296875, 9.366729736328125, 11.368408203125, 10.255361557006836, 10.454265594482422, -0.40334129333496094, 6.5711517333984375, 7.028329849243164, 1.2440910339355469, 3.9127960205078125, 13.731315612792969, 8.912612915039062, 12.518585205078125, 10.046661376953125, 12.539140701293945, 4.623172760009766, 0.4138660430908203, -3.720653533935547, -2.138591766357422, 3.650909423828125, 5.4113006591796875, 2.079813003540039, -1.9545478820800781, 13.151229858398438, 6.266357421875, 1.1875686645507812, -12.291282653808594, 0.47632408142089844, 5.417396545410156, 1.7963752746582031, 3.209989547729492, -2.3897781372070312, -5.461723327636719, 0.12558746337890625, 15.52679443359375, 6.273929595947266, 8.715240478515625, -1.9128456115722656, 11.809646606445312, 11.353370666503906, 2.171356201171875, 3.6441421508789062, -2.738435745239258, -0.7257804870605469, 5.55291748046875, -6.1342926025390625, 3.676788330078125, 10.300796508789062, 4.191677093505859, 9.611112594604492, -8.022356033325195, 15.56103515625, 13.858283996582031, 18.611984252929688, -2.875823974609375, 6.3610687255859375, -5.999755859375, 6.5148162841796875, 14.106391906738281, 6.647010803222656, -10.75064468383789, -4.637397766113281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000537.npy"}
{"epoch": 0.8117913832199547, "step": 538, "batch_size": 64, "mean": 5.358842372894287, "std": 6.162752151489258, "min": -10.576187133789062, "p10": -1.9277565002441401, "median": 4.849864959716797, "p90": 13.121083068847659, "max": 22.667892456054688, "pos_frac": 0.796875, "sample": [-1.4823150634765625, 8.293088912963867, 0.5993003845214844, 2.719470977783203, 6.058261871337891, -10.576187133789062, 6.801906585693359, -0.95819091796875, 8.078025817871094, 9.47601318359375, 4.560543060302734, 12.32670783996582, -0.19482421875, 10.631637573242188, 7.236902236938477, 6.305809020996094, 12.434135437011719, 15.687713623046875, 6.0194244384765625, 6.192350387573242, 3.4524765014648438, -0.8687572479248047, 13.415489196777344, 4.748199462890625, 3.7484169006347656, -3.509796142578125, 4.829414367675781, 3.0129165649414062, 20.332992553710938, 5.636322021484375, -2.5559616088867188, 8.381752014160156, -4.0708770751953125, -0.72265625, -2.1186599731445312, 1.3227081298828125, 4.6475982666015625, 6.392023086547852, 1.7938079833984375, 3.69671630859375, 11.831153869628906, -3.8159027099609375, 17.034072875976562, 6.6264190673828125, -1.380228042602539, 7.289943695068359, -2.7194595336914062, 13.580453872680664, 6.560356140136719, 22.667892456054688, 6.565753936767578, 9.3248291015625, 4.8703155517578125, 1.4311065673828125, 3.982177734375, 2.1972808837890625, 19.33725357055664, 1.6653022766113281, 8.80301284790039, 2.4101409912109375, 3.0346755981445312, 9.020151138305664, 7.514190673828125, 3.3611183166503906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000538.npy"}
{"epoch": 0.8133030990173847, "step": 539, "batch_size": 64, "mean": 4.667385578155518, "std": 5.940952777862549, "min": -6.428680419921875, "p10": -1.7981674194335935, "median": 3.1856632232666016, "p90": 13.787363243103028, "max": 19.833709716796875, "pos_frac": 0.828125, "sample": [4.389610290527344, 10.569694519042969, 0.43766212463378906, 16.551414489746094, 7.935834884643555, 2.090362548828125, 11.402122497558594, 7.30865478515625, 11.671981811523438, 0.71435546875, 14.086296081542969, 3.35284423828125, 0.8436126708984375, 3.7264251708984375, -1.2916412353515625, -0.3329277038574219, 7.970922470092773, -2.735382080078125, 15.616477966308594, 4.1927337646484375, 3.0409603118896484, -1.62939453125, 6.4285430908203125, -6.428680419921875, 1.5191535949707031, -1.8704986572265625, 0.49870872497558594, 3.713817596435547, 19.833709716796875, 2.5822830200195312, 3.0127334594726562, 2.86798095703125, 0.5865211486816406, 0.8776397705078125, 13.196914672851562, 4.093442916870117, 2.0255088806152344, 1.459014892578125, 11.703025817871094, 2.12213134765625, -2.2773971557617188, 3.16241455078125, 3.3274593353271484, 1.9374160766601562, 8.364822387695312, 4.0718841552734375, -3.540027618408203, 15.046913146972656, 3.53546142578125, -2.8356475830078125, 9.923381805419922, 12.261398315429688, 1.8068733215332031, 0.42427825927734375, 3.208911895751953, 9.242477416992188, -1.149505615234375, 1.9003791809082031, -6.374298095703125, 13.938547134399414, 3.8615989685058594, 13.434600830078125, 16.44482421875, 0.861358642578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000539.npy"}
{"epoch": 0.8148148148148148, "step": 540, "batch_size": 64, "mean": 5.670567035675049, "std": 6.785914897918701, "min": -9.738351821899414, "p10": -2.231296157836914, "median": 4.929080963134766, "p90": 15.630107879638674, "max": 18.874446868896484, "pos_frac": 0.8125, "sample": [-6.85205078125, 2.9873008728027344, 18.545166015625, 9.866870880126953, 2.178577423095703, -9.738351821899414, 8.654014587402344, 2.9601898193359375, 1.7836990356445312, 8.492259979248047, 1.2180595397949219, 9.818313598632812, 13.66474723815918, 4.75469970703125, -3.504924774169922, 5.39356803894043, 16.98147201538086, 5.741218566894531, -2.119152069091797, 7.275993347167969, 8.566787719726562, -2.27935791015625, 9.184539794921875, 2.906553268432617, 15.73272705078125, 15.100021362304688, -2.410276412963867, 6.89013671875, -1.4281463623046875, 4.9127197265625, -5.48846435546875, 13.905258178710938, 16.264358520507812, 5.117828369140625, 14.84588623046875, 0.6496353149414062, 0.02679443359375, 9.844234466552734, -4.47137451171875, 1.0617408752441406, 0.15477371215820312, 10.587905883789062, 7.4983062744140625, 18.874446868896484, 17.330726623535156, 3.2932052612304688, 15.390663146972656, 4.945442199707031, 1.37274169921875, -1.343719482421875, 4.842781066894531, 10.101547241210938, -0.3449554443359375, -0.9742622375488281, 1.8264083862304688, 7.6607513427734375, 0.20026397705078125, 17.843612670898438, 2.1781158447265625, 9.112777709960938, 7.4342041015625, 1.4856071472167969, 2.9707069396972656, 13.440961837768555], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000540.npy"}
{"epoch": 0.8163265306122449, "step": 541, "batch_size": 64, "mean": 5.446465969085693, "std": 7.108374118804932, "min": -11.671371459960938, "p10": -3.4923274993896483, "median": 5.083250045776367, "p90": 14.933421897888186, "max": 18.623714447021484, "pos_frac": 0.734375, "sample": [18.623714447021484, 10.537696838378906, 9.80434799194336, 16.183149337768555, -0.809356689453125, 16.080772399902344, -0.9963531494140625, 13.097904205322266, 14.21873664855957, -1.7966957092285156, -1.004150390625, 11.679290771484375, 4.6304168701171875, 5.751716613769531, 2.645801544189453, -8.895004272460938, 5.250354766845703, 15.87335205078125, 2.5767822265625, 12.544239044189453, -3.2197418212890625, 6.475519180297852, 17.84451675415039, 13.049140930175781, 12.501899719238281, 6.661184310913086, 9.849250793457031, -0.025308609008789062, -8.669326782226562, -1.3914318084716797, 5.332378387451172, 4.088750839233398, 13.753984451293945, 11.373992919921875, -0.05742645263671875, 9.670206069946289, 3.3669185638427734, -6.3187255859375, 10.603824615478516, -0.05702400207519531, 0.9381504058837891, -3.609149932861328, 15.239715576171875, 11.294075012207031, 2.1822681427001953, 6.999752044677734, 11.062564849853516, 4.7344970703125, -11.671371459960938, 2.0074691772460938, 0.5913105010986328, 11.218259811401367, 4.940998077392578, 3.1487808227539062, -0.00278472900390625, -3.9643707275390625, 5.157161712646484, 5.00933837890625, 1.1351490020751953, 6.836250305175781, 1.357320785522461, -4.246065139770508, 18.031641006469727, 9.353553771972656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000541.npy"}
{"epoch": 0.817838246409675, "step": 542, "batch_size": 64, "mean": 5.641759872436523, "std": 6.989621639251709, "min": -7.2925262451171875, "p10": -3.230631256103515, "median": 4.7062225341796875, "p90": 15.824228668212896, "max": 22.221012115478516, "pos_frac": 0.78125, "sample": [10.049266815185547, 11.46346664428711, 1.357635498046875, 12.981475830078125, 3.60595703125, 5.0111236572265625, 5.381622314453125, 7.7069244384765625, 4.939537048339844, 4.2799072265625, 5.843616485595703, -0.3004150390625, -3.4102554321289062, 6.5919189453125, 2.0135250091552734, 8.29515266418457, -1.4885673522949219, 13.546600341796875, -5.6872711181640625, 12.617851257324219, 16.4468994140625, 13.533073425292969, 1.4690265655517578, -1.0076751708984375, -6.276805877685547, -7.2925262451171875, 5.1484375, 0.98785400390625, 2.2611541748046875, -3.7546615600585938, 16.669368743896484, 14.04207992553711, 1.1934013366699219, 12.350799560546875, -2.0727500915527344, -2.1923866271972656, -4.710601806640625, 7.4475555419921875, -2.8115081787109375, 10.806861877441406, 6.616180419921875, 2.2743682861328125, 16.666271209716797, -0.5059623718261719, 7.073396682739258, 2.4876785278320312, 11.133411407470703, 3.1756858825683594, 0.8326950073242188, 16.77947998046875, 14.371330261230469, 19.708751678466797, -6.296003341674805, 6.281288146972656, 3.85003662109375, 4.297504425048828, 0.3293304443359375, 22.221012115478516, 10.92330551147461, 4.472908020019531, 4.135440826416016, 12.867439270019531, 3.6033401489257812, 16.737079620361328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000542.npy"}
{"epoch": 0.8193499622071051, "step": 543, "batch_size": 64, "mean": 4.7779693603515625, "std": 6.421280860900879, "min": -17.89264678955078, "p10": -1.179373168945312, "median": 4.221202850341797, "p90": 13.403584289550782, "max": 19.14166831970215, "pos_frac": 0.828125, "sample": [0.6606903076171875, 2.521238327026367, -0.010746002197265625, -0.4271697998046875, 7.6205902099609375, 2.2018661499023438, 10.728851318359375, 13.412391662597656, -10.726123809814453, 9.41162109375, 0.3507537841796875, 7.488868713378906, 6.424472808837891, 10.30698013305664, 14.833633422851562, 6.1459197998046875, 19.14166831970215, -6.784511566162109, 2.914825439453125, 8.502574920654297, 2.0603790283203125, 3.5295639038085938, 0.6827430725097656, 1.6205978393554688, 2.843708038330078, 11.970930099487305, -17.89264678955078, 0.2944812774658203, 0.07670211791992188, -1.5965995788574219, 6.0389251708984375, 13.289192199707031, 15.110946655273438, 5.826099395751953, 9.569637298583984, 3.8321380615234375, 16.09841537475586, -0.2840404510498047, -3.4701919555664062, -0.6086311340332031, 9.445289611816406, 4.999303817749023, 7.13641357421875, 9.293548583984375, 7.4562225341796875, 5.10980224609375, 15.116767883300781, 13.383033752441406, 0.11083602905273438, 1.9914093017578125, 2.251300811767578, 2.0626049041748047, 1.1466903686523438, 4.610267639160156, 7.968353271484375, 2.0708465576171875, 7.7381591796875, 3.2960033416748047, 3.3336944580078125, -1.585226058959961, 5.4493560791015625, 15.348075866699219, -1.4239768981933594, 5.770524978637695], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000543.npy"}
{"epoch": 0.8208616780045351, "step": 544, "batch_size": 64, "mean": 6.10788631439209, "std": 7.671019554138184, "min": -9.863861083984375, "p10": -4.8623615264892575, "median": 5.1722002029418945, "p90": 17.24345302581787, "max": 22.258407592773438, "pos_frac": 0.796875, "sample": [9.060516357421875, -6.658363342285156, 5.211212158203125, 3.4346179962158203, 9.520210266113281, 8.769966125488281, -5.641754150390625, 3.204435348510742, 9.003738403320312, 1.7784042358398438, 1.839447021484375, 2.4312992095947266, 5.084808349609375, 16.15850067138672, 4.428108215332031, -0.17040061950683594, -9.863861083984375, 6.3921051025390625, 13.189186096191406, 7.946044921875, 4.936737060546875, 3.143707275390625, 7.59246826171875, 13.028827667236328, 11.266983032226562, 3.3102684020996094, 3.5230331420898438, -4.050750732421875, -0.08737754821777344, -5.064117431640625, 21.708709716796875, -4.391597747802734, 6.852710723876953, -0.6683502197265625, 17.282114028930664, 6.009246826171875, 14.438217163085938, -7.295341491699219, 5.133188247680664, 7.177696228027344, 6.435462951660156, 2.24810791015625, 17.333877563476562, 1.5438594818115234, 2.5065460205078125, 3.8685531616210938, 11.986709594726562, 4.48350715637207, 19.17443084716797, -7.457550048828125, 0.44244384765625, 19.093841552734375, 15.845169067382812, 22.258407592773438, -5.895656585693359, 6.636138916015625, 14.98321533203125, 2.0503997802734375, 18.767162322998047, 17.153244018554688, -2.0020370483398438, 9.703521728515625, 10.48724365234375, 10.293548583984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000544.npy"}
{"epoch": 0.8223733938019653, "step": 545, "batch_size": 64, "mean": 4.466965198516846, "std": 6.806182384490967, "min": -11.879613876342773, "p10": -3.879708862304687, "median": 4.751241683959961, "p90": 13.967898559570315, "max": 20.35308837890625, "pos_frac": 0.71875, "sample": [10.142358779907227, 6.659950256347656, -3.2414016723632812, 16.29427719116211, 5.245037078857422, 3.304546356201172, -3.4982223510742188, -1.7049522399902344, -1.183126449584961, 4.517024993896484, 5.96905517578125, -2.7795562744140625, 16.896324157714844, 3.2289905548095703, -0.025499343872070312, 8.146171569824219, 5.666923522949219, 5.6796417236328125, -4.560031890869141, 3.8210983276367188, 5.479118347167969, 14.209625244140625, -1.1697311401367188, 9.658920288085938, 11.51409912109375, 8.170913696289062, 5.060874938964844, 0.8309726715087891, 5.9328765869140625, 1.4515056610107422, 0.5166797637939453, -8.712970733642578, -4.410491943359375, 4.773265838623047, -3.57745361328125, -1.551828384399414, 15.384193420410156, 17.519451141357422, 13.40386962890625, -1.5991783142089844, -0.13641357421875, 9.606277465820312, 8.050945281982422, 5.435583114624023, 1.440338134765625, 8.927864074707031, -4.278358459472656, 4.729217529296875, 3.627960205078125, -11.879613876342773, 1.591888427734375, 8.846099853515625, 7.895538330078125, 20.35308837890625, 11.097724914550781, -4.009246826171875, 0.9546852111816406, 5.817930221557617, -6.6026611328125, 0.5408668518066406, 10.269569396972656, 16.144920349121094, 13.292121887207031, 2.706127166748047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000545.npy"}
{"epoch": 0.8238851095993953, "step": 546, "batch_size": 64, "mean": 4.460666656494141, "std": 6.315736293792725, "min": -10.125518798828125, "p10": -4.145571899414062, "median": 4.733244895935059, "p90": 11.72166213989258, "max": 17.04584503173828, "pos_frac": 0.765625, "sample": [13.244464874267578, -3.0941162109375, 11.780097961425781, 4.194694519042969, 6.125862121582031, 11.311037063598633, 1.64617919921875, -3.73834228515625, -0.7659378051757812, -3.1295948028564453, -10.125518798828125, 8.11203384399414, 3.8539772033691406, 2.9904632568359375, -9.618267059326172, 10.85845947265625, 1.4505653381347656, 2.081207275390625, -4.917263031005859, 2.9409255981445312, -4.320098876953125, 6.372018814086914, 7.599090576171875, 5.966224670410156, 6.118869781494141, 4.22125244140625, 8.573518753051758, -2.140625, 14.06009292602539, 17.04584503173828, 4.214588165283203, 5.8768310546875, 12.141357421875, 2.154327392578125, 7.688390731811523, -6.3621826171875, 3.3359375, 10.51055908203125, 3.0960216522216797, 4.8811492919921875, 4.958612442016602, 4.785955429077148, -5.651411056518555, 4.7091827392578125, 7.093183517456055, 9.503334045410156, -2.5843372344970703, 4.043872833251953, 7.820474624633789, 9.689922332763672, 11.132926940917969, 1.9747314453125, 15.608848571777344, -0.722747802734375, -2.1474990844726562, 2.5054473876953125, 10.77928352355957, 8.452033996582031, 16.70937728881836, 4.757307052612305, 11.585311889648438, -8.192863464355469, 11.019454956054688, 1.4181880950927734], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000546.npy"}
{"epoch": 0.8253968253968254, "step": 547, "batch_size": 64, "mean": 5.522754669189453, "std": 6.349582672119141, "min": -12.361183166503906, "p10": -1.4636524200439451, "median": 5.712203025817871, "p90": 13.342832946777348, "max": 21.515926361083984, "pos_frac": 0.84375, "sample": [1.0126876831054688, 1.0647430419921875, 5.575925827026367, 4.351814270019531, 14.23248291015625, 2.6297473907470703, -3.5195980072021484, 21.515926361083984, 10.01382064819336, 10.046470642089844, 6.8124847412109375, 8.341476440429688, 4.855979919433594, 4.061651229858398, 1.8872051239013672, 0.19683074951171875, 9.170333862304688, 17.76551055908203, 0.7116470336914062, 5.986480712890625, 11.597991943359375, 8.7447509765625, 17.338951110839844, 1.344818115234375, 13.790191650390625, 5.848480224609375, 2.4974517822265625, 8.65789794921875, 11.801742553710938, 1.272857666015625, 6.088005065917969, -1.2043647766113281, -12.361183166503906, 5.44580078125, -4.267791748046875, 7.782905578613281, 12.298995971679688, 1.7494583129882812, 8.647659301757812, 6.906452178955078, 5.9864349365234375, 14.455955505371094, 7.74151611328125, 8.601795196533203, -7.001350402832031, 8.075521469116211, 2.6719436645507812, -10.370223999023438, -1.5747756958007812, 5.455604553222656, 11.162757873535156, 6.133638381958008, 16.236114501953125, -2.1493911743164062, 3.6050949096679688, -1.0446014404296875, 4.878448486328125, 11.74752426147461, 1.7858104705810547, 5.217811584472656, 8.098581314086914, 3.1900863647460938, 10.449577331542969, -0.5922870635986328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000547.npy"}
{"epoch": 0.8269085411942555, "step": 548, "batch_size": 64, "mean": 3.9804792404174805, "std": 6.2858500480651855, "min": -7.309085845947266, "p10": -3.8727935791015624, "median": 3.023122787475586, "p90": 12.299864578247075, "max": 21.476802825927734, "pos_frac": 0.71875, "sample": [2.8751983642578125, 8.334861755371094, 3.7586517333984375, -5.2638397216796875, -1.7235126495361328, 8.535903930664062, -3.960521697998047, 19.85995101928711, 10.609701156616211, 4.6341094970703125, -2.544586181640625, 15.861274719238281, 0.39070701599121094, 1.0677413940429688, -0.3674774169921875, 3.0799522399902344, -4.1580047607421875, 2.57476806640625, 2.9662933349609375, 5.091644287109375, 11.40325927734375, 4.656593322753906, -5.590854644775391, 3.5206451416015625, 5.386749267578125, 2.1710968017578125, 16.60501480102539, 9.99325180053711, 9.252029418945312, 12.684123992919922, 0.8826332092285156, 3.276966094970703, 0.3084373474121094, 6.666778564453125, -0.07293701171875, 0.8394622802734375, 14.784149169921875, 3.897367477416992, -3.8965606689453125, 5.098907470703125, 7.592582702636719, 2.9421749114990234, 7.215812683105469, -0.9949188232421875, 16.51458740234375, -0.24048233032226562, -0.8914299011230469, 7.2743377685546875, -3.1886348724365234, 4.854240417480469, 2.292266845703125, -7.309085845947266, 4.018100738525391, 8.873519897460938, 1.9975814819335938, -0.318817138671875, 9.502706527709961, -3.8173370361328125, 2.2598037719726562, -4.6696014404296875, 21.476802825927734, 7.025379180908203, -3.0684757232666016, 1.9196319580078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000548.npy"}
{"epoch": 0.8284202569916855, "step": 549, "batch_size": 64, "mean": 4.094517707824707, "std": 6.49368143081665, "min": -13.219451904296875, "p10": -3.446379661560058, "median": 3.5756759643554688, "p90": 12.682061004638673, "max": 16.81237030029297, "pos_frac": 0.734375, "sample": [0.154144287109375, 9.167411804199219, -1.5537490844726562, 3.0082664489746094, 0.5348358154296875, 1.00018310546875, 6.7846527099609375, 1.3744983673095703, 4.008827209472656, 14.983707427978516, 14.458602905273438, 2.66839599609375, 4.066493988037109, 13.098100662231445, 0.020599365234375, 1.6896591186523438, -5.622161865234375, 6.511920928955078, 6.8140459060668945, 9.587833404541016, 11.80621337890625, 10.095771789550781, 5.1365966796875, -4.353202819824219, 6.9113311767578125, -0.6215934753417969, 1.5782356262207031, 12.475631713867188, 2.97308349609375, -8.881477355957031, 11.436046600341797, 16.422805786132812, 7.79827880859375, -0.5690765380859375, 6.039693832397461, 12.770530700683594, -0.9245662689208984, -7.00201416015625, -2.16058349609375, 6.570274353027344, -8.145767211914062, 3.8705596923828125, 1.4470558166503906, 2.631101608276367, 7.3344879150390625, 8.85000991821289, -2.8814468383789062, 7.145896911621094, 9.578720092773438, -0.966339111328125, -1.3782100677490234, 10.886953353881836, 11.455902099609375, 5.476898193359375, 3.280792236328125, -0.44814300537109375, 4.97364616394043, 2.5651321411132812, 0.5824012756347656, 16.81237030029297, 16.041976928710938, -13.219451904296875, -3.6884937286376953, -0.4151763916015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000549.npy"}
{"epoch": 0.8299319727891157, "step": 550, "batch_size": 64, "mean": 3.692516803741455, "std": 6.072179317474365, "min": -10.628280639648438, "p10": -3.3583858489990233, "median": 2.348674774169922, "p90": 10.514363861083984, "max": 18.749950408935547, "pos_frac": 0.796875, "sample": [2.9420242309570312, 18.749950408935547, 17.198040008544922, 4.8521728515625, -7.05940055847168, 1.1510467529296875, 0.017095565795898438, -0.08864593505859375, 0.10636138916015625, 2.5237388610839844, 8.989774703979492, 4.8260345458984375, 6.365724563598633, 3.1111278533935547, 0.1559772491455078, 14.886375427246094, 17.91442108154297, 8.6285400390625, 5.741050720214844, 4.924781799316406, 10.569618225097656, 0.9914512634277344, 6.184732437133789, 9.433921813964844, 9.868804931640625, -3.5044479370117188, 9.615081787109375, 0.6597194671630859, 1.0771217346191406, -4.835666656494141, -5.3556060791015625, 0.9611663818359375, 1.69207763671875, 1.2108001708984375, 1.3880615234375, 2.3334884643554688, 2.8466567993164062, 0.3858795166015625, -1.0257720947265625, 1.3067359924316406, -3.0175743103027344, 7.433784484863281, 4.824802398681641, 2.26953125, 4.783042907714844, 5.091716766357422, 10.38543701171875, 5.807592391967773, 2.363861083984375, 9.392658233642578, -3.7750625610351562, 8.123208999633789, 6.4688262939453125, 1.2126178741455078, 15.010692596435547, 1.2073249816894531, -10.628280639648438, -0.17418861389160156, 15.008041381835938, -8.30518913269043, 2.2320213317871094, 1.7736434936523438, -0.9459953308105469, -1.9614448547363281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000550.npy"}
{"epoch": 0.8314436885865457, "step": 551, "batch_size": 64, "mean": 4.01011848449707, "std": 5.6849365234375, "min": -8.922500610351562, "p10": -2.281438064575195, "median": 3.048065185546875, "p90": 11.576520919799805, "max": 18.446929931640625, "pos_frac": 0.765625, "sample": [4.292774200439453, 4.7578277587890625, 7.754192352294922, 4.1843109130859375, 9.398334503173828, 8.921113967895508, 18.446929931640625, 0.19929885864257812, 1.174264907836914, 9.180992126464844, -0.6512069702148438, 1.9004402160644531, 14.461257934570312, 1.5073204040527344, 10.139533996582031, 2.694507598876953, 0.7506103515625, 4.61767578125, -8.922500610351562, -1.3008384704589844, 3.9036827087402344, 3.8969573974609375, 8.048057556152344, 0.62640380859375, 3.401622772216797, 16.47215461730957, 1.5329742431640625, 6.199302673339844, 8.0361328125, 6.194431304931641, 4.277626037597656, 0.4729499816894531, 0.8691120147705078, 3.70843505859375, -3.04400634765625, 11.78985595703125, -3.0188674926757812, 15.415031433105469, -0.28106117248535156, 12.87725830078125, -1.4919204711914062, 1.2922592163085938, 2.2455520629882812, 0.7262077331542969, 11.531673431396484, 9.2396240234375, 10.152801513671875, 6.546123504638672, -1.451395034790039, -2.6307449340820312, 1.3765335083007812, 7.42974853515625, -0.8536529541015625, -5.714607238769531, -2.4136734008789062, 1.867279052734375, -1.9728889465332031, 11.23480224609375, -1.19842529296875, 1.1328010559082031, -5.1999664306640625, 0.3779277801513672, 11.595741271972656, 7.94091796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000551.npy"}
{"epoch": 0.8329554043839759, "step": 552, "batch_size": 64, "mean": 4.800152778625488, "std": 6.1779279708862305, "min": -11.23128890991211, "p10": -1.5665300369262691, "median": 4.689667701721191, "p90": 11.777988052368165, "max": 20.50661849975586, "pos_frac": 0.84375, "sample": [-2.235078811645508, 3.6835861206054688, 2.3307418823242188, 0.8100662231445312, 11.303190231323242, 2.674060821533203, 0.9348373413085938, 1.3522491455078125, 3.698211669921875, 11.453022003173828, 9.754730224609375, 17.417984008789062, 8.549224853515625, 7.2445068359375, 1.8797073364257812, -1.7520771026611328, -1.1335868835449219, 11.917259216308594, 4.082550048828125, -0.7776641845703125, 5.82208251953125, 8.220878601074219, 3.4669189453125, 5.713476181030273, 10.923149108886719, 1.4178428649902344, 6.107154846191406, 12.28639030456543, 7.266685485839844, -10.146255493164062, 4.490594863891602, 7.757585525512695, 10.6962890625, -11.02157974243164, 7.836444854736328, -0.5177993774414062, 13.699066162109375, 9.090896606445312, 1.7309226989746094, 4.024818420410156, 8.760772705078125, 4.888740539550781, -4.965354919433594, 13.781387329101562, 4.4607696533203125, 7.590965270996094, 9.87130355834961, 3.127391815185547, -10.850265502929688, 1.416259765625, 20.50661849975586, 5.486808776855469, 4.031269073486328, -11.23128890991211, 7.958513259887695, 3.3614883422851562, 2.9088363647460938, 5.124223709106445, 7.9364166259765625, 5.6725006103515625, 12.865646362304688, 5.8645172119140625, 4.0545806884765625, 2.534566879272461], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000552.npy"}
{"epoch": 0.8344671201814059, "step": 553, "batch_size": 64, "mean": 4.543527603149414, "std": 8.09799575805664, "min": -16.278488159179688, "p10": -4.159294891357422, "median": 2.5341758728027344, "p90": 16.364956665039067, "max": 22.918212890625, "pos_frac": 0.671875, "sample": [18.83666229248047, -4.067420959472656, -0.31108856201171875, 9.896259307861328, 1.262298583984375, -0.6784553527832031, 13.932500839233398, 4.99029541015625, -5.9539947509765625, -0.34418487548828125, 1.36767578125, 0.608306884765625, 10.215991973876953, 11.923995971679688, -0.45931243896484375, 12.823661804199219, -2.794279098510742, 9.415489196777344, 2.0096893310546875, -16.278488159179688, -2.8411178588867188, 13.931137084960938, 1.8779144287109375, 8.902778625488281, 16.830841064453125, 11.853080749511719, 0.7650985717773438, 0.2946434020996094, -4.062896728515625, -5.925071716308594, 21.284332275390625, 14.702840805053711, 9.176544189453125, 22.918212890625, 4.110786437988281, 9.140401840209961, 17.515827178955078, 5.0941009521484375, 4.055816650390625, 17.62924575805664, 3.6592559814453125, 3.5255775451660156, -0.8203525543212891, 1.442962646484375, -4.19866943359375, 2.9902267456054688, -3.1219215393066406, 21.724700927734375, -9.163787841796875, -5.4947357177734375, 1.5630950927734375, 6.608001708984375, 9.613983154296875, 2.078125, -0.4762115478515625, 8.351448059082031, 0.141845703125, -4.728759765625, 7.291282653808594, 5.660331726074219, -1.0341548919677734, -2.138254165649414, 15.27789306640625, -1.6162567138671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000553.npy"}
{"epoch": 0.8359788359788359, "step": 554, "batch_size": 64, "mean": 4.5531392097473145, "std": 7.1147260665893555, "min": -14.569442749023438, "p10": -2.5097110748291014, "median": 3.1303977966308594, "p90": 15.379276657104498, "max": 21.968721389770508, "pos_frac": 0.765625, "sample": [1.3933792114257812, 15.952434539794922, 19.362760543823242, 12.7755126953125, 2.1251678466796875, 3.8574886322021484, 3.155181884765625, 2.375690460205078, 4.155906677246094, 7.502998352050781, -1.2932891845703125, 0.0095672607421875, -1.5017623901367188, 10.492424011230469, -10.749673843383789, 3.56536865234375, 6.405763626098633, -4.6119537353515625, 2.949798583984375, 2.1790409088134766, 0.8962497711181641, 6.00738525390625, 0.44205665588378906, -3.41937255859375, 5.7934112548828125, 8.417533874511719, 17.544342041015625, 1.9518318176269531, -2.657501220703125, 6.450897216796875, 21.968721389770508, 0.1796722412109375, 5.428678512573242, -0.3360023498535156, -0.45232391357421875, -2.787752151489258, 11.50649642944336, 1.486948013305664, 4.848228454589844, -1.28045654296875, -14.569442749023438, 13.183326721191406, -0.33809661865234375, 4.136669158935547, 5.0853271484375, 1.3773994445800781, 1.0532512664794922, 3.0213184356689453, 1.7826061248779297, -0.8941497802734375, 8.622444152832031, 3.6534500122070312, -2.164867401123047, 14.041908264160156, 20.59112548828125, 1.8375625610351562, 6.3849334716796875, 6.818855285644531, 3.1056137084960938, 13.614486694335938, 17.644210815429688, -2.9035720825195312, 18.974430084228516, 5.251251220703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000554.npy"}
{"epoch": 0.8374905517762661, "step": 555, "batch_size": 64, "mean": 4.179170608520508, "std": 6.640335559844971, "min": -16.14605712890625, "p10": -3.8929016113281247, "median": 4.594480514526367, "p90": 12.515676116943359, "max": 16.892078399658203, "pos_frac": 0.765625, "sample": [0.3998146057128906, -14.183235168457031, 2.5361595153808594, 2.25, 8.346138000488281, 1.6780776977539062, -5.7565460205078125, -0.37357330322265625, -2.0437278747558594, -2.0647125244140625, 9.086380004882812, -3.9215850830078125, 6.899871826171875, 2.2469444274902344, 12.105022430419922, 13.668594360351562, 3.1210403442382812, -0.5226936340332031, 2.4894790649414062, 10.289794921875, 4.975738525390625, -0.0522003173828125, 16.552047729492188, 1.998199462890625, 10.298364639282227, 3.5759811401367188, 8.838546752929688, 13.923168182373047, 3.2465972900390625, 11.666145324707031, 8.239187240600586, 5.854419708251953, 7.8837127685546875, 12.550926208496094, -6.905342102050781, 8.954944610595703, -16.14605712890625, 5.384075164794922, 1.4194717407226562, 4.481178283691406, 6.3253021240234375, 6.5599212646484375, 1.8658676147460938, 2.6770782470703125, -3.8259735107421875, 2.4039478302001953, 13.143043518066406, 4.955902099609375, 13.26125717163086, -3.3306884765625, 10.94091796875, -0.8001251220703125, -6.7944183349609375, 1.9463615417480469, 7.112903594970703, 5.550178527832031, 9.776792526245117, 4.707782745361328, 6.165395736694336, -4.382244110107422, 16.892078399658203, 6.307107925415039, 12.433425903320312, 0.5847530364990234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000555.npy"}
{"epoch": 0.8390022675736961, "step": 556, "batch_size": 64, "mean": 4.431397438049316, "std": 7.127571105957031, "min": -8.61948013305664, "p10": -3.394469451904296, "median": 2.403196334838867, "p90": 14.039229583740235, "max": 25.20928955078125, "pos_frac": 0.6875, "sample": [7.9755706787109375, 2.6965255737304688, 2.9423370361328125, -0.8240470886230469, -2.6614913940429688, 12.490272521972656, 16.75991439819336, 3.1240673065185547, -0.7937526702880859, -5.6409912109375, -0.3629741668701172, 1.1478500366210938, 11.465484619140625, 12.306121826171875, 1.1158943176269531, -1.15167236328125, 5.3673248291015625, 0.135528564453125, 7.093280792236328, 12.474372863769531, -0.9682083129882812, 11.332511901855469, 12.066045761108398, 1.3205986022949219, -0.6196651458740234, 2.4726943969726562, -8.268070220947266, 6.169221878051758, 9.25777816772461, -1.1082305908203125, 0.7814617156982422, -6.7860260009765625, 13.901725769042969, -0.534637451171875, -1.6641159057617188, 2.333698272705078, 7.613670349121094, 14.452949523925781, 1.6147232055664062, 1.433420181274414, 13.058832168579102, 25.20928955078125, 1.5138797760009766, 8.200027465820312, 11.642410278320312, 11.833656311035156, -0.33380889892578125, 3.9931640625, -8.61948013305664, -1.550933837890625, 15.590274810791016, -5.620307922363281, -0.9969940185546875, 14.098159790039062, 0.3730354309082031, -3.7086029052734375, 1.923736572265625, -6.7546844482421875, 5.488395690917969, 12.781492233276367, 14.343238830566406, 14.443475723266602, 5.834026336669922, 0.406005859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000556.npy"}
{"epoch": 0.8405139833711263, "step": 557, "batch_size": 64, "mean": 5.766865253448486, "std": 6.723106861114502, "min": -7.155109405517578, "p10": -2.240785980224609, "median": 5.178096771240234, "p90": 15.56998977661133, "max": 21.767860412597656, "pos_frac": 0.78125, "sample": [14.358770370483398, 6.700885772705078, 7.374544143676758, 0.7512092590332031, 0.8509025573730469, -3.720043182373047, 20.42630958557129, 1.71759033203125, 9.08243179321289, 4.9171905517578125, 1.063140869140625, -7.155109405517578, -0.4056243896484375, 8.064262390136719, 2.7435951232910156, 1.4996280670166016, 8.805381774902344, 5.161643981933594, 19.740617752075195, 8.691070556640625, -0.8123512268066406, -0.6252822875976562, 15.221084594726562, -2.738006591796875, -1.3478546142578125, -3.237091064453125, 15.719520568847656, 19.050811767578125, 2.5311527252197266, -0.5279464721679688, 5.194549560546875, 5.845739364624023, 8.478309631347656, -6.231996536254883, 2.9460296630859375, 17.121549606323242, 4.529167175292969, 12.156295776367188, 13.558914184570312, 21.767860412597656, 1.399078369140625, 1.9647693634033203, -2.5497055053710938, -0.06258773803710938, 0.8242855072021484, 2.8289794921875, 7.6038818359375, 9.505889892578125, 9.66385269165039, 4.549253463745117, 0.6855850219726562, 3.902097702026367, 6.615119934082031, 7.860231399536133, 8.978830337524414, 6.11602783203125, 13.228271484375, 7.150119781494141, 9.473270416259766, 8.176658630371094, -1.5199737548828125, -3.4861679077148438, 17.097259521484375, 9.805500030517578], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000557.npy"}
{"epoch": 0.8420256991685563, "step": 558, "batch_size": 64, "mean": 5.353769302368164, "std": 6.680166244506836, "min": -10.603363037109375, "p10": -1.7974830627441405, "median": 3.365589141845703, "p90": 14.812136840820314, "max": 21.966812133789062, "pos_frac": 0.78125, "sample": [4.126922607421875, 13.191566467285156, 14.84503173828125, -1.8698348999023438, 14.735382080078125, 1.549560546875, 14.278583526611328, 0.5126438140869141, 0.7233695983886719, 2.0673980712890625, 1.1961784362792969, 9.647567749023438, 3.7756500244140625, 3.2776336669921875, -0.6686248779296875, 8.332300186157227, 9.365751266479492, 3.4535446166992188, 7.459644317626953, 5.075370788574219, 15.426300048828125, 11.7960205078125, 4.123619079589844, 13.228975296020508, 6.011016845703125, -2.6649322509765625, 17.674530029296875, 1.798980712890625, 8.777816772460938, 3.1504058837890625, 13.613033294677734, 1.936004638671875, 2.46575927734375, 21.966812133789062, -1.3743133544921875, -1.4355144500732422, 3.0450000762939453, 0.4090118408203125, 4.1371612548828125, 18.720169067382812, 10.166362762451172, -1.924072265625, 14.857879638671875, -1.628662109375, 13.578413009643555, 16.944019317626953, -1.9764785766601562, -1.2388992309570312, 11.371246337890625, 2.510040283203125, 3.2268142700195312, -10.603363037109375, 4.777130126953125, -2.9062652587890625, 0.73321533203125, -1.0076675415039062, 8.429824829101562, -1.0752105712890625, -3.5789947509765625, 0.609375, 10.647613525390625, 2.0822391510009766, 10.034599304199219, 0.730560302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000558.npy"}
{"epoch": 0.8435374149659864, "step": 559, "batch_size": 64, "mean": 4.971553802490234, "std": 7.429248809814453, "min": -9.888555526733398, "p10": -4.141560363769531, "median": 3.4996871948242188, "p90": 14.591592597961426, "max": 21.535736083984375, "pos_frac": 0.71875, "sample": [-0.3335113525390625, -4.126617431640625, 2.1333656311035156, -1.313690185546875, 9.300819396972656, 21.535736083984375, 13.230491638183594, 13.040191650390625, 2.1002044677734375, 1.4686698913574219, -8.8072509765625, 13.6533203125, -0.5102386474609375, 8.870986938476562, 0.10127067565917969, 16.184837341308594, 10.019966125488281, 2.7144699096679688, 9.647048950195312, 6.687992095947266, 13.810558319091797, 2.424102783203125, -3.0466461181640625, 7.5875244140625, -0.5481491088867188, 1.4088325500488281, -4.8798370361328125, 5.622295379638672, 2.3040771484375, 20.063159942626953, 1.4517364501953125, 4.1163177490234375, 5.8023834228515625, 10.751296997070312, 15.552146911621094, -2.2264747619628906, 8.31651496887207, -1.4281864166259766, 1.0734481811523438, -5.069892883300781, 0.52227783203125, -4.1479644775390625, -2.4438858032226562, 4.616535186767578, 7.65020751953125, 14.600053787231445, 10.164005279541016, -4.6437225341796875, 11.152542114257812, 6.653453826904297, 8.052194595336914, 18.90471649169922, -9.357799530029297, -2.0035858154296875, 9.646957397460938, 18.18527603149414, 2.717479705810547, 14.571849822998047, 2.883056640625, 1.5792179107666016, 12.426830291748047, 8.106986999511719, -0.4519691467285156, -9.888555526733398], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000559.npy"}
{"epoch": 0.8450491307634165, "step": 560, "batch_size": 64, "mean": 4.576733589172363, "std": 6.691855430603027, "min": -8.444259643554688, "p10": -3.6039360046386713, "median": 3.410029411315918, "p90": 15.242914962768555, "max": 18.00140953063965, "pos_frac": 0.703125, "sample": [3.0210189819335938, 5.371025085449219, 15.612373352050781, 5.788330078125, -7.95819091796875, -2.7539749145507812, 14.158714294433594, 6.042938232421875, 16.051727294921875, 0.8862762451171875, 12.609237670898438, -0.37493896484375, 2.2366867065429688, 7.2482452392578125, -3.1962738037109375, -3.9865875244140625, -3.8772048950195312, -0.9703426361083984, 4.646137237548828, 8.599571228027344, -3.7786483764648438, 8.801139831542969, 3.2425537109375, 0.5606765747070312, 9.258106231689453, 8.238225936889648, 0.3346710205078125, 10.75047492980957, 15.578208923339844, 3.2657718658447266, 6.0487060546875, -2.56121826171875, -5.830314636230469, 15.445228576660156, 12.858795166015625, -2.075958251953125, 2.1114425659179688, -2.710052490234375, 18.00140953063965, -8.444259643554688, 16.754356384277344, 12.340019226074219, 6.708282470703125, 7.753747940063477, 1.7915496826171875, 7.102775573730469, -1.7862167358398438, 14.853931427001953, 2.8349838256835938, -1.0236549377441406, -0.5121841430664062, 6.5299072265625, -0.5487747192382812, -1.90985107421875, 2.46466064453125, 8.512420654296875, -4.541778564453125, 3.691265106201172, 12.003135681152344, 3.5542869567871094, 15.409622192382812, 7.885776519775391, 1.7481231689453125, 3.044832229614258], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000560.npy"}
{"epoch": 0.8465608465608465, "step": 561, "batch_size": 64, "mean": 5.182496070861816, "std": 7.055527687072754, "min": -9.332893371582031, "p10": -2.73603515625, "median": 3.5532302856445312, "p90": 16.439820098876954, "max": 22.444786071777344, "pos_frac": 0.765625, "sample": [0.4240875244140625, 0.6334800720214844, 1.6987762451171875, 1.5799217224121094, 1.6623153686523438, 9.959892272949219, 2.131145477294922, -2.61187744140625, 8.595823287963867, 3.777313232421875, 3.3291473388671875, 6.5178680419921875, 0.8009529113769531, 10.854835510253906, 1.2747039794921875, -4.785820007324219, 1.4293899536132812, 14.54472541809082, 22.444786071777344, -0.1353302001953125, 1.4284477233886719, 11.151687622070312, 8.804122924804688, -2.105846405029297, 9.203208923339844, -2.042713165283203, 6.753288269042969, -2.909332275390625, -9.332893371582031, 16.800064086914062, -5.324504852294922, 4.3750457763671875, 10.156723022460938, 12.08565902709961, 4.3026123046875, 2.833404541015625, 4.712043762207031, 6.08381462097168, 1.3392562866210938, 19.239593505859375, 18.007293701171875, 16.503890991210938, 14.216171264648438, 20.255859375, 10.48923110961914, -3.992034912109375, -2.6262969970703125, 8.720832824707031, 16.290321350097656, 5.413782119750977, -2.7830657958984375, 2.353435516357422, 2.366680145263672, -0.29286766052246094, 5.1868438720703125, 9.213071823120117, 1.6892566680908203, 6.864723205566406, 9.521369934082031, -4.434928894042969, -0.7576026916503906, 3.0116920471191406, 17.030498504638672, -2.248199462890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000561.npy"}
{"epoch": 0.8480725623582767, "step": 562, "batch_size": 64, "mean": 5.58132791519165, "std": 6.6047682762146, "min": -7.897468566894531, "p10": -2.5514398574829102, "median": 5.8931884765625, "p90": 14.431289100646973, "max": 18.566448211669922, "pos_frac": 0.75, "sample": [0.9394454956054688, 14.389310836791992, -5.865383148193359, 8.138763427734375, 5.7158203125, 8.180793762207031, 0.7880401611328125, 17.175628662109375, 18.512741088867188, -0.4102630615234375, 6.168670654296875, 9.340888977050781, -6.025753021240234, 1.9076805114746094, 12.88232421875, -0.00345611572265625, 0.435028076171875, 2.21588134765625, 8.615097045898438, -2.6425323486328125, 11.305885314941406, 13.603857040405273, -0.8258819580078125, 0.24765777587890625, -2.4328536987304688, 15.475162506103516, -2.602262496948242, -0.9642486572265625, 7.6412200927734375, 5.167430877685547, 12.07669448852539, -2.9726943969726562, 18.566448211669922, 9.263084411621094, 5.102043151855469, 14.44927978515625, -2.378223419189453, -0.05347442626953125, -1.0801715850830078, 14.142219543457031, 1.6115570068359375, 0.44449615478515625, 8.056066513061523, 6.6957244873046875, 7.129158020019531, 17.062637329101562, 8.990921020507812, 7.4663238525390625, 11.44400405883789, 5.283042907714844, 12.876800537109375, 5.87530517578125, -0.134674072265625, 6.243213653564453, 7.590396881103516, 8.649011611938477, 10.852590560913086, 16.55402374267578, 5.91107177734375, -7.897468566894531, 0.5348968505859375, -3.685821533203125, 1.4891891479492188, 3.9726181030273438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000562.npy"}
{"epoch": 0.8495842781557067, "step": 563, "batch_size": 64, "mean": 4.919520378112793, "std": 7.5754218101501465, "min": -11.052160263061523, "p10": -4.853656768798828, "median": 5.172087669372559, "p90": 13.785448455810547, "max": 23.19993019104004, "pos_frac": 0.734375, "sample": [0.86785888671875, 9.552299499511719, 6.621601104736328, 0.5219135284423828, 11.9598388671875, 11.497344970703125, -11.052160263061523, 13.612579345703125, 5.4716339111328125, -0.7295455932617188, -0.18285751342773438, 6.3906707763671875, 0.06592559814453125, -5.006767272949219, 13.303455352783203, 3.3994579315185547, -6.277156829833984, 12.786079406738281, -5.2365875244140625, 14.70526123046875, 5.116065979003906, 18.94140625, -3.6609878540039062, 7.844047546386719, 11.566787719726562, 4.098653793334961, 11.961891174316406, 2.3074989318847656, 5.268283843994141, -9.550712585449219, -0.21987152099609375, -3.804851531982422, -2.736236572265625, 1.8458099365234375, 0.8324031829833984, 15.010354995727539, 2.668384552001953, -10.70449447631836, 8.31129264831543, 2.852123260498047, 16.199447631835938, 14.500160217285156, 11.183599472045898, -2.9327468872070312, -4.49639892578125, 4.223136901855469, 13.859535217285156, -0.06317901611328125, 11.784172058105469, 7.173126220703125, -10.914764404296875, 3.814939498901367, 7.741405487060547, 11.785350799560547, 8.461463928222656, 5.228109359741211, 23.19993019104004, 13.025009155273438, -0.646636962890625, 11.389520645141602, 7.243293762207031, 1.8932876586914062, 9.39946174621582, 1.5793724060058594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000563.npy"}
{"epoch": 0.8510959939531368, "step": 564, "batch_size": 64, "mean": 5.352931022644043, "std": 6.717764854431152, "min": -9.605182647705078, "p10": -2.4293373107910154, "median": 5.48239803314209, "p90": 14.265746498107912, "max": 20.571922302246094, "pos_frac": 0.78125, "sample": [9.361425399780273, -2.1439208984375, 13.777099609375, 10.314971923828125, 11.81346321105957, 16.28154754638672, 8.262710571289062, -9.605182647705078, -0.04872894287109375, -0.90509033203125, 14.791519165039062, 13.856229782104492, 5.531742095947266, -2.5516586303710938, 1.2747344970703125, -0.45299530029296875, -2.0206565856933594, 5.446500778198242, 7.31787109375, -0.08467864990234375, -0.3332366943359375, 0.6186103820800781, 8.915210723876953, 7.52191162109375, 12.246658325195312, 4.290069580078125, 13.451141357421875, -3.8458938598632812, 3.4781551361083984, 5.5182952880859375, 12.157341003417969, 14.767654418945312, 19.596542358398438, 3.218189239501953, 0.74884033203125, -6.83929443359375, 5.650661468505859, 4.182117462158203, 1.8584671020507812, 1.7097740173339844, 14.644163131713867, 5.7586517333984375, 7.32025146484375, 2.335561752319336, -3.7465877532958984, 1.0030059814453125, 6.275672912597656, 3.909576416015625, 1.9891738891601562, -7.893730163574219, 10.226024627685547, 6.960268020629883, 11.3450927734375, 0.1716327667236328, 11.13397216796875, 2.7017288208007812, 11.727767944335938, 20.571922302246094, -8.17889404296875, 14.441253662109375, 2.1929092407226562, 8.127700805664062, 3.384733200073242, 7.0576019287109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000564.npy"}
{"epoch": 0.8526077097505669, "step": 565, "batch_size": 64, "mean": 4.128960609436035, "std": 6.423488616943359, "min": -11.767740249633789, "p10": -4.222812843322754, "median": 3.7088279724121094, "p90": 12.380861663818362, "max": 19.599334716796875, "pos_frac": 0.734375, "sample": [1.6563072204589844, -4.174396514892578, -6.153388977050781, 9.484771728515625, 0.21511077880859375, 1.188140869140625, 3.228668212890625, 5.993568420410156, 0.23122406005859375, 2.9822463989257812, 4.266471862792969, 6.6403656005859375, 13.516044616699219, -11.767740249633789, -0.14559173583984375, 7.5042877197265625, 4.607969284057617, 13.537910461425781, 19.599334716796875, 18.57758331298828, 9.949737548828125, -6.569061279296875, -1.1393966674804688, -2.044017791748047, 7.340858459472656, 6.5174560546875, 3.3136215209960938, 2.7034683227539062, 11.893753051757812, 8.208988189697266, 7.382364273071289, 0.5541572570800781, -1.7778472900390625, -0.35489654541015625, 8.160097122192383, -4.243562698364258, 4.925806045532227, 7.790790557861328, -4.894426345825195, 5.502662658691406, 4.3286590576171875, -4.724933624267578, -1.1842041015625, 8.00970458984375, 1.7057037353515625, 8.602184295654297, 1.4579696655273438, 7.434391021728516, 14.033199310302734, 0.652587890625, 10.5264892578125, -2.903911590576172, -4.98406982421875, -1.6785163879394531, 3.1305856704711914, 2.615093231201172, -0.131378173828125, 10.597427368164062, 1.2289390563964844, 4.612823486328125, 19.344804763793945, 10.676851272583008, 4.104034423828125, 12.589622497558594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000565.npy"}
{"epoch": 0.854119425547997, "step": 566, "batch_size": 64, "mean": 4.299956321716309, "std": 7.698660373687744, "min": -12.356191635131836, "p10": -5.463352966308594, "median": 3.9202356338500977, "p90": 12.761361694335937, "max": 21.800643920898438, "pos_frac": 0.6875, "sample": [8.026233673095703, 2.7007369995117188, -1.844400405883789, 2.9014739990234375, 0.5696563720703125, 14.266407012939453, 16.795047760009766, 6.874645233154297, 4.111040115356445, 7.570823669433594, 4.489408493041992, 3.7275848388671875, 3.655170440673828, 2.4251441955566406, 11.040763854980469, 16.43206787109375, -12.276824951171875, 10.344757080078125, 9.362964630126953, 7.883199691772461, 9.56607437133789, -1.3438644409179688, -5.9298858642578125, 10.993736267089844, 0.1771259307861328, -5.4345245361328125, 11.99807357788086, 12.63314437866211, 10.343643188476562, -5.355796813964844, 11.955013275146484, 12.733024597167969, 12.773506164550781, -0.8802490234375, -4.53790283203125, -12.356191635131836, -4.729011535644531, 6.88323974609375, 11.43121337890625, -8.28506851196289, 2.1475143432617188, -5.4757080078125, 21.800643920898438, -6.27801513671875, 10.845928192138672, 3.038482666015625, -4.999595642089844, -5.501377105712891, 11.323127746582031, 1.7407379150390625, -1.498748779296875, -0.93829345703125, 4.9167633056640625, 8.310590744018555, -2.822906494140625, 5.688257217407227, -4.285797119140625, 12.998207092285156, 5.274467468261719, -0.9381084442138672, 10.435115814208984, 2.579833984375, 3.72943115234375, 21.41545867919922], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000566.npy"}
{"epoch": 0.8556311413454271, "step": 567, "batch_size": 64, "mean": 5.077354907989502, "std": 6.566307067871094, "min": -7.200202941894531, "p10": -2.8645229339599605, "median": 4.392239570617676, "p90": 14.029571533203125, "max": 23.389022827148438, "pos_frac": 0.78125, "sample": [2.800018310546875, 1.3927879333496094, -2.4673919677734375, 16.3592586517334, 3.38116455078125, 5.8804168701171875, 14.05126953125, 10.29196548461914, 18.365816116333008, 0.9597930908203125, 11.353691101074219, 6.617586135864258, 6.004646301269531, -6.254722595214844, 4.854576110839844, 1.4014205932617188, -3.1116867065429688, 5.5428924560546875, 3.0669403076171875, 6.706783294677734, 7.3422088623046875, 0.4106178283691406, -0.9513378143310547, 7.027862548828125, 0.46227264404296875, 0.07815361022949219, -3.026874542236328, -3.610116958618164, 6.312431335449219, -7.200202941894531, 17.86304473876953, 4.280128479003906, 16.584461212158203, 23.389022827148438, 10.582725524902344, 0.3072700500488281, 9.045934677124023, -0.821624755859375, 5.2145233154296875, 13.580543518066406, -4.291461944580078, 3.2050094604492188, 2.0211029052734375, 4.1626129150390625, 11.302467346191406, 11.848617553710938, -1.1891403198242188, 3.9380016326904297, 11.270164489746094, 5.882331848144531, 9.167369842529297, 0.6784019470214844, 13.97894287109375, 0.22484588623046875, -0.10514068603515625, -2.4857025146484375, -4.284637451171875, 4.504350662231445, 16.94731903076172, -1.033843994140625, 6.423175811767578, 2.7795753479003906, 8.018447875976562, 7.9196319580078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000567.npy"}
{"epoch": 0.8571428571428571, "step": 568, "batch_size": 64, "mean": 5.6282477378845215, "std": 6.142501354217529, "min": -5.450096130371094, "p10": -1.5976352691650386, "median": 4.853813171386719, "p90": 14.568995666503913, "max": 24.596282958984375, "pos_frac": 0.8125, "sample": [2.40362548828125, -0.16823387145996094, 5.242740631103516, 0.9477920532226562, 5.469512939453125, 9.232765197753906, 7.145263671875, -5.450096130371094, 2.7308311462402344, 0.8753662109375, -4.261823654174805, 13.023645401000977, 2.358427047729492, 12.71435546875, -2.3989715576171875, -3.2742156982421875, 0.8467140197753906, 1.795684814453125, 8.245590209960938, 6.662425994873047, -0.151092529296875, 4.0511474609375, 1.5706796646118164, 7.008293151855469, 13.069404602050781, 15.211677551269531, 9.014829635620117, 15.430618286132812, -4.788089752197266, 4.430206298828125, 5.0596466064453125, 4.59657096862793, 7.008760452270508, -1.0850601196289062, 7.819211959838867, 4.382291793823242, -1.8173103332519531, 7.827217102050781, -0.12150382995605469, 7.277544021606445, 6.120674133300781, 15.663436889648438, 1.4535598754882812, 7.165000915527344, 12.058856964111328, 2.3593368530273438, 8.742637634277344, -2.7023391723632812, 1.522430419921875, 3.6928329467773438, 5.498626708984375, 9.659103393554688, 4.647979736328125, 10.559776306152344, 16.035430908203125, 2.4322071075439453, -0.15171432495117188, 12.0338134765625, 16.862442016601562, 9.956123352050781, 2.4810333251953125, 1.1008987426757812, 18.48297882080078, 24.596282958984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000568.npy"}
{"epoch": 0.8586545729402872, "step": 569, "batch_size": 64, "mean": 4.294561386108398, "std": 6.623303413391113, "min": -13.464637756347656, "p10": -2.3827989578247064, "median": 3.801815986633301, "p90": 12.23881072998047, "max": 19.985679626464844, "pos_frac": 0.796875, "sample": [9.881591796875, 0.6528472900390625, 2.103626251220703, 15.151792526245117, 5.9168243408203125, 15.427694320678711, -7.191654205322266, 6.754241943359375, 9.64532470703125, 17.211326599121094, 8.991142272949219, 1.6848678588867188, 0.7550849914550781, 5.696805953979492, -12.199729919433594, 7.192554473876953, 9.105045318603516, 12.39202880859375, 0.03896141052246094, 8.324104309082031, 11.881301879882812, 7.128662109375, 1.3426017761230469, 16.779884338378906, 1.6504936218261719, 8.833868026733398, 8.787704467773438, 3.2128734588623047, 3.8723602294921875, -7.851127624511719, -8.727529525756836, 3.465106964111328, -0.5399951934814453, 1.7846336364746094, -0.30634307861328125, 3.909698486328125, 19.985679626464844, 3.0596923828125, 5.9251708984375, 1.8691139221191406, 6.436994552612305, -2.5771007537841797, 2.196197509765625, -0.6064434051513672, 3.578826904296875, -13.464637756347656, 5.2938385009765625, -1.20953369140625, 8.283084869384766, 4.169956207275391, 3.731271743774414, 4.7208709716796875, 16.127901077270508, -1.5502166748046875, -3.2851314544677734, 3.199075698852539, 5.119991302490234, 2.984710693359375, 10.630329132080078, 0.0626983642578125, -1.9294281005859375, 4.654518127441406, 11.497993469238281, 3.1878280639648438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000569.npy"}
{"epoch": 0.8601662887377173, "step": 570, "batch_size": 64, "mean": 6.764734268188477, "std": 6.566583633422852, "min": -6.161262512207031, "p10": -1.406215667724609, "median": 6.275323867797852, "p90": 16.012223052978516, "max": 20.223209381103516, "pos_frac": 0.828125, "sample": [12.806159973144531, -5.944694519042969, 8.807838439941406, 6.170246124267578, 9.180099487304688, 7.532234191894531, 0.7667999267578125, -1.7694168090820312, 4.705970764160156, -0.511962890625, 1.3607292175292969, 1.9967403411865234, 1.8364143371582031, 8.434700012207031, 2.2168617248535156, 9.92129135131836, -0.7904376983642578, -1.0307998657226562, 1.375091552734375, 11.586021423339844, 8.590682983398438, -2.200817108154297, 2.2966461181640625, 14.325162887573242, 20.223209381103516, 0.2845039367675781, 12.158798217773438, 8.469245910644531, 14.960777282714844, 4.729240417480469, 5.993366241455078, 15.410003662109375, 9.893241882324219, -2.884246826171875, 4.867639541625977, 7.859832763671875, 11.676284790039062, 6.422819137573242, 17.182937622070312, 5.927896499633789, 14.914535522460938, 19.203842163085938, 13.479644775390625, -2.070842742919922, 3.9430770874023438, 7.104869842529297, 12.312774658203125, 10.449234008789062, 6.380401611328125, -6.161262512207031, 9.786977767944336, -0.7656936645507812, 1.7505950927734375, 5.367448806762695, 20.027099609375, 4.7657318115234375, 2.296689987182617, 2.132139205932617, 10.335044860839844, 16.761817932128906, 18.42572021484375, 2.962818145751953, 16.27031707763672, -1.567108154296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000570.npy"}
{"epoch": 0.8616780045351474, "step": 571, "batch_size": 64, "mean": 3.908352851867676, "std": 6.544003486633301, "min": -10.826362609863281, "p10": -6.172287559509276, "median": 4.4359893798828125, "p90": 10.865019989013673, "max": 18.849559783935547, "pos_frac": 0.765625, "sample": [7.680717468261719, 4.287467956542969, 4.201019287109375, 2.1673831939697266, -0.6535873413085938, 16.92169952392578, 6.1494293212890625, 4.96270751953125, 9.343576431274414, 3.741851806640625, 4.886249542236328, 9.315025329589844, -0.6339035034179688, 0.8653850555419922, -1.472036361694336, 7.3780059814453125, 3.4444656372070312, 7.423553466796875, 0.6459732055664062, 10.101158142089844, 13.158588409423828, 0.11725616455078125, 10.48345947265625, 4.049522399902344, 11.380928039550781, 8.440738677978516, 7.846698760986328, 5.516090393066406, -9.323951721191406, 8.755508422851562, 0.3413734436035156, -6.944250106811523, 4.121381759643555, -4.758745193481445, 6.8572540283203125, 1.5164566040039062, -6.7780914306640625, -1.689901351928711, -2.0332183837890625, 3.381378173828125, 5.382698059082031, 4.961029052734375, 5.375160217285156, 1.4252548217773438, 5.641757965087891, 15.590492248535156, 10.974624633789062, -8.774164199829102, 1.3533096313476562, 2.1339492797851562, -9.573928833007812, -8.569580078125, 9.508460998535156, 10.609275817871094, 9.65371322631836, -1.6254348754882812, 7.385711669921875, 14.988941192626953, 4.584510803222656, 7.9010772705078125, -3.064760208129883, 1.054656982421875, -10.826362609863281, 18.849559783935547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000571.npy"}
{"epoch": 0.8631897203325775, "step": 572, "batch_size": 64, "mean": 4.276226043701172, "std": 6.0366435050964355, "min": -7.719207763671875, "p10": -3.3936538696289062, "median": 4.584194183349609, "p90": 12.307312774658204, "max": 21.759666442871094, "pos_frac": 0.78125, "sample": [2.0082931518554688, -7.324485778808594, 1.3886795043945312, 6.991790771484375, 9.150394439697266, 2.9563446044921875, 15.137798309326172, 4.602790832519531, -2.5455493927001953, 21.759666442871094, 5.1964874267578125, 6.823034286499023, 9.109283447265625, 11.920585632324219, -1.2750358581542969, 9.733901977539062, -3.19561767578125, 1.7418193817138672, 1.7926521301269531, 13.618972778320312, 7.93115234375, 5.7109375, 4.5655975341796875, 12.728904724121094, 6.9290924072265625, 4.3809051513671875, 0.47504425048828125, 4.420280456542969, -4.016563415527344, -1.95867919921875, 10.03839111328125, 13.185422897338867, 12.473052978515625, -7.19317626953125, 7.9530792236328125, 5.181026458740234, 0.5626373291015625, -3.7657546997070312, 1.5765571594238281, 1.6864395141601562, 7.993476867675781, 6.146883010864258, 8.276435852050781, 7.094049453735352, 5.023551940917969, 10.373497009277344, 0.2121429443359375, -3.366363525390625, 16.242630004882812, 8.849327087402344, 5.620201110839844, 5.8918609619140625, -1.21490478515625, 3.63543701171875, -7.719207763671875, -5.31982421875, 0.33350372314453125, 5.970863342285156, -3.3316650390625, -3.4053497314453125, 6.2294921875, 0.8863525390625, 3.733724594116211, 3.0661888122558594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000572.npy"}
{"epoch": 0.8647014361300076, "step": 573, "batch_size": 64, "mean": 4.8617777824401855, "std": 8.179359436035156, "min": -12.172128677368164, "p10": -4.786184692382813, "median": 3.4180450439453125, "p90": 16.547857666015627, "max": 23.487510681152344, "pos_frac": 0.75, "sample": [1.86419677734375, 7.09327507019043, -12.172128677368164, 8.535316467285156, -2.8321304321289062, 2.9191207885742188, -4.2905426025390625, 8.210351943969727, 3.5388031005859375, -4.7431182861328125, -11.174936294555664, -6.950431823730469, -1.921133041381836, 1.2262496948242188, 3.4292526245117188, 2.367706298828125, 0.6422882080078125, 6.327789306640625, 16.221954345703125, -12.053466796875, 14.463546752929688, 5.41778564453125, 14.809127807617188, 3.1311569213867188, -1.7465896606445312, -2.5946083068847656, 22.826065063476562, 3.868539810180664, 12.232601165771484, 3.4068374633789062, 7.666572570800781, 1.395416259765625, 4.218929290771484, 4.938873291015625, 19.038787841796875, 4.905216217041016, 0.4015083312988281, 3.0314254760742188, 2.9304351806640625, 23.487510681152344, 14.109245300292969, 8.122392654418945, 8.814382553100586, 0.7519779205322266, -1.256317138671875, 2.6722030639648438, 21.098190307617188, 2.96380615234375, 2.8505935668945312, 9.88311767578125, 18.648094177246094, 16.687530517578125, 2.727020263671875, 9.293889999389648, -0.09470748901367188, -5.473606109619141, 19.257598876953125, -4.8046417236328125, 8.851734161376953, 6.51036262512207, 7.645469665527344, -1.178924560546875, -6.152458190917969, 15.159271240234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000573.npy"}
{"epoch": 0.8662131519274376, "step": 574, "batch_size": 64, "mean": 5.517082214355469, "std": 5.890059471130371, "min": -4.72119140625, "p10": -1.1745639801025387, "median": 4.904754638671875, "p90": 13.945661926269532, "max": 21.850677490234375, "pos_frac": 0.828125, "sample": [6.125049591064453, -2.1150360107421875, -0.14242172241210938, -3.6243972778320312, 0.7776336669921875, 7.771106719970703, 0.6121597290039062, -2.0736007690429688, 1.6104660034179688, -0.8447113037109375, -0.7373466491699219, 7.6768035888671875, 3.61627197265625, 1.9696674346923828, -1.4668655395507812, -0.7359848022460938, -1.3159294128417969, 2.1377410888671875, 4.635108947753906, 4.5960235595703125, 11.058940887451172, 4.897926330566406, 5.90667724609375, 8.620735168457031, -1.4995498657226562, 21.850677490234375, 7.304832458496094, 15.003089904785156, 0.19005966186523438, 11.12396240234375, 7.915496826171875, 0.21312904357910156, 6.998493194580078, 6.677337646484375, 1.971221923828125, 7.745677947998047, -4.72119140625, 2.4896774291992188, 13.983711242675781, 1.8923587799072266, 6.719024658203125, 5.285026550292969, 18.41669464111328, 6.074851989746094, 9.071163177490234, 15.709930419921875, 7.385032653808594, 6.987579345703125, 4.5473480224609375, 4.911582946777344, 0.4529876708984375, 18.940021514892578, 3.6483535766601562, 7.652761459350586, 10.265548706054688, 0.5942306518554688, 2.201486587524414, 11.67800521850586, 13.856880187988281, 16.207969665527344, 0.6490249633789062, 9.0660400390625, 13.735015869140625, 0.9416885375976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000574.npy"}
{"epoch": 0.8677248677248677, "step": 575, "batch_size": 64, "mean": 3.850635051727295, "std": 7.393406391143799, "min": -12.473278045654297, "p10": -4.825822067260742, "median": 3.129267692565918, "p90": 14.846654510498048, "max": 21.781829833984375, "pos_frac": 0.6875, "sample": [3.7570571899414062, -6.154315948486328, -10.460662841796875, -1.0210628509521484, 9.7451171875, 2.129791259765625, 0.9393386840820312, 6.0340423583984375, -1.003204345703125, -3.3621063232421875, 9.03326416015625, -0.9776535034179688, 17.774520874023438, -4.604591369628906, -6.557975769042969, 9.747482299804688, -7.544952392578125, 5.7830963134765625, 5.810312271118164, 3.7384414672851562, 6.4040985107421875, 14.669288635253906, 4.719512939453125, 4.785484313964844, 1.7093429565429688, 0.636688232421875, -4.398162841796875, 6.483121871948242, 14.92266845703125, 0.06391143798828125, -0.11351585388183594, 2.3914947509765625, 15.228816986083984, 17.146163940429688, 11.862613677978516, -8.017087936401367, 21.10674285888672, -4.920635223388672, 17.89960479736328, 11.28994369506836, -0.1864776611328125, 6.5479888916015625, 8.28390121459961, 1.1693115234375, 2.2983856201171875, 2.032318115234375, 1.6310272216796875, -12.473278045654297, -0.5637969970703125, 21.781829833984375, 10.031387329101562, 2.64111328125, 3.7478179931640625, 5.153238296508789, -2.1602096557617188, 5.6866455078125, 3.225749969482422, -3.374187469482422, 3.032785415649414, 8.469608306884766, -3.2560653686523438, 9.411090850830078, 7.3523406982421875, -0.7179222106933594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000575.npy"}
{"epoch": 0.8692365835222978, "step": 576, "batch_size": 64, "mean": 4.996522426605225, "std": 7.0755438804626465, "min": -8.734954833984375, "p10": -3.1584781646728515, "median": 4.686088562011719, "p90": 14.523097991943363, "max": 18.348052978515625, "pos_frac": 0.6875, "sample": [13.761932373046875, 13.437627792358398, -1.0761222839355469, 5.429567337036133, -4.775146484375, 8.147842407226562, 16.78646469116211, 2.643613815307617, 6.621971130371094, 4.503044128417969, -3.0170650482177734, -4.161149978637695, 12.641937255859375, -7.1644287109375, 7.211189270019531, -5.061195373535156, -2.8261795043945312, 9.501659393310547, 17.914199829101562, -0.6793212890625, 9.792490005493164, 17.595003128051758, 6.693027496337891, -0.19444847106933594, 11.694931030273438, 7.333295822143555, 4.869132995605469, -4.469480514526367, 15.457893371582031, -2.2295169830322266, 1.1526298522949219, 0.10984039306640625, 1.708251953125, 13.5792236328125, 4.3729095458984375, 2.4063644409179688, 17.720413208007812, -1.079498291015625, 1.0141830444335938, 1.3995399475097656, 9.615917205810547, -0.8567314147949219, 14.849311828613281, -2.0521297454833984, 11.799415588378906, 1.9091949462890625, 18.348052978515625, 12.696990966796875, -0.6931056976318359, 6.514354705810547, 9.483474731445312, -8.734954833984375, 5.0483856201171875, -2.7861328125, 8.628280639648438, 5.904838562011719, -3.219083786010742, -2.2948379516601562, 10.728141784667969, 11.790130615234375, 1.1462230682373047, 0.8153362274169922, -1.0792160034179688, 13.448944091796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000576.npy"}
{"epoch": 0.8707482993197279, "step": 577, "batch_size": 64, "mean": 3.788405179977417, "std": 7.040760040283203, "min": -10.881004333496094, "p10": -5.323016357421874, "median": 2.8580265045166016, "p90": 13.345420455932622, "max": 21.56336212158203, "pos_frac": 0.71875, "sample": [7.816884994506836, 2.4800052642822266, -5.645606994628906, 5.869096755981445, -0.8659400939941406, 6.018157958984375, -0.14669418334960938, -1.8948898315429688, 10.901573181152344, 0.9857406616210938, -2.4832229614257812, 6.5457763671875, 9.440174102783203, -1.3999481201171875, -3.44329833984375, 15.389366149902344, 8.187469482421875, 16.241100311279297, -10.881004333496094, 7.912519454956055, 12.322734832763672, 6.535011291503906, 1.3815135955810547, 17.98383331298828, 0.38239288330078125, -7.48834228515625, -7.135869979858398, 7.739055633544922, 6.585153579711914, 0.553466796875, 0.8748779296875, 3.620403289794922, 0.8206787109375, 2.621044158935547, 3.405731201171875, 0.40283966064453125, 1.8035392761230469, 3.4152069091796875, -0.05612945556640625, -1.2822246551513672, 4.087684631347656, 6.0133056640625, 14.465721130371094, 1.7479400634765625, 19.568918228149414, 13.783714294433594, -10.311332702636719, 7.3765869140625, 2.605865478515625, 10.518722534179688, -4.570304870605469, 6.3620452880859375, 21.56336212158203, 11.951175689697266, 2.7926902770996094, 1.3183326721191406, -9.434860229492188, -5.66064453125, -2.396139144897461, 10.257158279418945, 2.9233627319335938, 7.8126220703125, 4.428253173828125, -0.25843048095703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000577.npy"}
{"epoch": 0.872260015117158, "step": 578, "batch_size": 64, "mean": 3.4311115741729736, "std": 6.424037933349609, "min": -8.478607177734375, "p10": -3.849566841125488, "median": 1.8515539169311523, "p90": 14.098692893981935, "max": 20.065673828125, "pos_frac": 0.65625, "sample": [2.9380455017089844, -4.767778396606445, 0.5501937866210938, -1.4261474609375, 3.0501556396484375, -0.5282363891601562, 4.442592620849609, 3.610004425048828, 6.536064147949219, -1.1284465789794922, 0.33797454833984375, 20.065673828125, -1.4738273620605469, 5.35711669921875, 11.774028778076172, -8.478607177734375, 14.516525268554688, 6.318902969360352, 8.50479507446289, 11.401634216308594, 14.373252868652344, 4.5386810302734375, 0.7973308563232422, 4.120611190795898, -0.9730873107910156, 6.038385391235352, 9.597709655761719, -0.24090576171875, 1.2616252899169922, 0.07030487060546875, 15.094306945800781, 7.611783981323242, 17.373409271240234, 6.1307373046875, -0.27701568603515625, 3.799224853515625, 3.3227462768554688, 6.1089935302734375, 2.4414825439453125, 0.8195533752441406, 11.88580322265625, -7.542694091796875, 3.2806243896484375, -1.3228397369384766, -2.367767333984375, -3.2332992553710938, -3.9698104858398438, 13.797090530395508, -1.7451725006103516, 14.717052459716797, 6.969512939453125, -4.965095520019531, 14.227951049804688, 0.15548324584960938, 0.867919921875, -3.568998336791992, 0.58251953125, -0.29178619384765625, -1.7404708862304688, 0.39125823974609375, -4.583503723144531, -0.451263427734375, -5.56561279296875, 10.454452514648438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000578.npy"}
{"epoch": 0.873771730914588, "step": 579, "batch_size": 64, "mean": 6.280138969421387, "std": 6.803126811981201, "min": -9.285537719726562, "p10": -1.7294570922851562, "median": 5.951629638671875, "p90": 16.143161201477053, "max": 23.115951538085938, "pos_frac": 0.8125, "sample": [8.801383972167969, 6.951263427734375, 4.579301834106445, 2.7370223999023438, 3.3263320922851562, -3.1360244750976562, 8.649940490722656, 2.2812118530273438, 7.547393798828125, 18.368576049804688, 15.406890869140625, 7.836065292358398, 7.422771453857422, 18.320999145507812, 8.364974975585938, 5.005531311035156, 15.655906677246094, 0.36450958251953125, -1.7684860229492188, 7.5113677978515625, 23.115951538085938, -2.3114585876464844, 20.991275787353516, -0.12049484252929688, 3.2845306396484375, 1.854909896850586, 19.11231803894043, 8.789932250976562, 15.829187393188477, 7.505401611328125, -1.42144775390625, 5.259122848510742, 13.084770202636719, 5.225925445556641, 9.089111328125, 6.4046173095703125, 9.056350708007812, 6.513246536254883, -2.0621337890625, 16.277721405029297, 0.10964202880859375, 7.248439788818359, 0.32947540283203125, -5.117973327636719, 7.194393157958984, 2.2720584869384766, -1.6383895874023438, -9.285537719726562, 18.561126708984375, 0.5191268920898438, 5.4986419677734375, 3.572113037109375, -1.0694122314453125, 11.635831832885742, 7.909229278564453, 2.8865833282470703, 13.915878295898438, 9.152687072753906, 2.8768978118896484, 4.391288757324219, -2.5149993896484375, 3.649524688720703, 10.634834289550781, -0.5083389282226562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000579.npy"}
{"epoch": 0.8752834467120182, "step": 580, "batch_size": 64, "mean": 4.3713226318359375, "std": 7.292051792144775, "min": -12.443109512329102, "p10": -4.30426254272461, "median": 3.124635696411133, "p90": 14.62836494445801, "max": 20.15332794189453, "pos_frac": 0.6875, "sample": [-6.00933837890625, -0.3958740234375, -4.2877655029296875, 1.6920433044433594, -2.9199371337890625, 7.037591934204102, 17.599559783935547, 8.052230834960938, 5.4321136474609375, -0.11162567138671875, 9.643653869628906, 4.332160949707031, 1.6631011962890625, 1.84405517578125, -5.556602478027344, 3.299602508544922, 9.557060241699219, 6.257728576660156, -2.071268081665039, 5.289226531982422, -1.4402198791503906, 8.344013214111328, 16.5665283203125, 0.8893508911132812, 10.190694808959961, -1.0711822509765625, 13.918869018554688, 0.374755859375, -0.24350357055664062, 16.835250854492188, 9.220069885253906, 8.477714538574219, 2.6906890869140625, 7.386770248413086, -4.14361572265625, 20.15332794189453, -7.110584259033203, -12.443109512329102, 0.6955223083496094, 17.55328369140625, -8.565439224243164, 10.724761962890625, -4.311332702636719, 6.1705474853515625, 16.796356201171875, -1.3087539672851562, 1.9310894012451172, 1.377023696899414, 10.814651489257812, 2.9496688842773438, 14.902660369873047, 13.278705596923828, -0.8895645141601562, -8.616157531738281, -0.7761383056640625, -2.368326187133789, 7.8397064208984375, 10.04609489440918, 8.509143829345703, 13.98834228515625, 2.3697738647460938, 5.8960113525390625, 2.8896942138671875, 8.92376708984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000580.npy"}
{"epoch": 0.8767951625094482, "step": 581, "batch_size": 64, "mean": 4.2297773361206055, "std": 6.620793342590332, "min": -8.968576431274414, "p10": -3.9321533203124996, "median": 3.9393348693847656, "p90": 11.012756347656252, "max": 22.841842651367188, "pos_frac": 0.78125, "sample": [11.68398666381836, 8.564193725585938, 1.4403839111328125, 5.186237335205078, 3.017578125, 1.2895736694335938, 0.6640777587890625, -4.2673187255859375, 7.6651458740234375, 7.527198791503906, -8.558570861816406, 2.1438026428222656, 1.4202766418457031, 5.89463996887207, 8.110740661621094, -4.931737899780273, 9.25299072265625, 2.858631134033203, 7.593345642089844, 0.7012805938720703, 17.240854263305664, 2.780071258544922, 17.30683135986328, -4.01220703125, 9.533355712890625, 21.910446166992188, 9.198841094970703, 0.6676254272460938, 7.8675689697265625, 6.993766784667969, -3.0682220458984375, 8.52978515625, 0.8322429656982422, 9.79339599609375, 5.8582000732421875, 5.3658447265625, 1.9474258422851562, -5.2049407958984375, 22.841842651367188, 14.394691467285156, 1.7139892578125, -3.745361328125, 6.797637939453125, -1.9385223388671875, 5.695377349853516, -8.968576431274414, 11.14312744140625, 0.815185546875, -2.188129425048828, -6.337306976318359, 9.905563354492188, 1.4972305297851562, 6.11407470703125, 10.70855712890625, 1.5677337646484375, 4.9621429443359375, 0.07137870788574219, -2.407350540161133, -2.7472686767578125, 4.861091613769531, 0.1490325927734375, 7.986530303955078, 9.2742919921875, -2.2585525512695312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000581.npy"}
{"epoch": 0.8783068783068783, "step": 582, "batch_size": 64, "mean": 4.904342174530029, "std": 6.376689434051514, "min": -8.609870910644531, "p10": -2.952321624755859, "median": 4.080780029296875, "p90": 12.069077301025391, "max": 21.6295166015625, "pos_frac": 0.796875, "sample": [9.451263427734375, 19.8280029296875, 3.139667510986328, 3.9693145751953125, 3.286590576171875, 0.124542236328125, 10.245773315429688, 3.5880775451660156, 9.01095962524414, 12.218681335449219, 3.4454879760742188, 5.311470031738281, 7.9628143310546875, 21.6295166015625, 0.5086593627929688, 12.828878402709961, 2.080474853515625, -0.41352081298828125, 5.626384735107422, 8.400421142578125, 2.11724853515625, 8.178337097167969, 6.653369903564453, 13.942794799804688, 11.337692260742188, 8.636909484863281, 4.245037078857422, 3.540851593017578, -7.724130630493164, -6.8192291259765625, 0.3638896942138672, -1.0357666015625, 11.209518432617188, -3.0522308349609375, -0.3742828369140625, 10.818035125732422, 17.043792724609375, 9.032867431640625, 11.17626953125, 1.8081283569335938, 11.720001220703125, 10.998931884765625, 2.4848766326904297, 4.383241653442383, -1.7082653045654297, 1.045989990234375, -5.001808166503906, 4.1922454833984375, 4.887847900390625, -8.609870910644531, 1.7938251495361328, 1.8844146728515625, -0.08190155029296875, 0.381866455078125, -3.7466259002685547, 7.066719055175781, 11.077264785766602, -5.049407958984375, 12.559907913208008, -2.7192001342773438, 1.304412841796875, 1.128692626953125, 9.392982482910156, 11.149202346801758], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000582.npy"}
{"epoch": 0.8798185941043084, "step": 583, "batch_size": 64, "mean": 4.340846061706543, "std": 7.530553340911865, "min": -13.347251892089844, "p10": -4.437930679321289, "median": 4.568370819091797, "p90": 14.175983428955083, "max": 21.43366241455078, "pos_frac": 0.71875, "sample": [4.906307220458984, 0.19769287109375, -3.0744857788085938, -3.6192588806152344, 20.70815658569336, 4.558475494384766, -2.3448944091796875, -2.47894287109375, -7.393924713134766, -0.225677490234375, 7.53253173828125, -6.998176574707031, -0.7778453826904297, 8.807968139648438, 4.677299499511719, -3.795093536376953, 7.739116668701172, -1.523183822631836, 20.59091567993164, 17.386375427246094, 7.39021110534668, -11.100959777832031, 6.6686859130859375, -0.801971435546875, 4.6532135009765625, 5.507623672485352, 8.247711181640625, -7.8658599853515625, 5.382966995239258, 10.100410461425781, 4.578266143798828, 2.109376907348633, 6.5685577392578125, 0.18115234375, 4.11541748046875, 14.657516479492188, -4.713432312011719, -5.520332336425781, 7.879966735839844, 19.97127914428711, 3.5530319213867188, 17.291061401367188, -13.347251892089844, 11.73046875, 2.381742477416992, 10.6280517578125, 9.366752624511719, 0.773468017578125, 5.48162841796875, -2.1143112182617188, 8.059906005859375, -0.1191253662109375, 11.223154067993164, 1.7318267822265625, 5.384504318237305, 4.491302490234375, 1.6530513763427734, 6.234712600708008, 0.4469776153564453, 13.052406311035156, 21.43366241455078, 10.93182373046875, 2.727874755859375, 1.9342975616455078], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000583.npy"}
{"epoch": 0.8813303099017384, "step": 584, "batch_size": 64, "mean": 4.791357040405273, "std": 7.667906284332275, "min": -10.908485412597656, "p10": -4.2998607635498045, "median": 5.001726150512695, "p90": 14.173826789855958, "max": 25.865814208984375, "pos_frac": 0.6875, "sample": [18.085205078125, 7.3945770263671875, 10.518302917480469, -1.1889591217041016, 4.8422088623046875, -1.4238262176513672, 18.97119140625, 10.384269714355469, 7.7255096435546875, 8.3126220703125, 5.161243438720703, -2.1827735900878906, -2.590627670288086, -2.9910030364990234, -2.8960304260253906, 11.928924560546875, 9.030157089233398, 14.480989456176758, 13.830923080444336, -4.081668853759766, 14.356231689453125, 13.023368835449219, -3.3917083740234375, 14.320785522460938, 9.953277587890625, 6.727935791015625, 0.6225223541259766, -0.8173141479492188, 4.4521484375, 4.320892333984375, 12.038673400878906, 1.0519256591796875, 8.207847595214844, 3.6695327758789062, -2.901216506958008, 10.886688232421875, -4.39337158203125, -5.021209716796875, 1.055429458618164, -0.09303855895996094, 6.616138458251953, 12.997764587402344, -6.2438507080078125, 6.394247055053711, 10.870132446289062, 7.887969970703125, -0.4032135009765625, 6.039022445678711, 2.2670516967773438, -5.051017761230469, -10.908485412597656, 5.492137908935547, -1.5988311767578125, 8.734172821044922, 1.415395736694336, 25.865814208984375, 22.20166778564453, 0.3284187316894531, 8.066259384155273, 3.0291061401367188, 2.6331558227539062, 7.370403289794922, -9.881256103515625, -8.856019973754883], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000584.npy"}
{"epoch": 0.8828420256991686, "step": 585, "batch_size": 64, "mean": 4.027131080627441, "std": 6.387627124786377, "min": -15.318069458007812, "p10": -2.028905487060547, "median": 3.7740488052368164, "p90": 12.571052551269533, "max": 19.441707611083984, "pos_frac": 0.828125, "sample": [3.1997413635253906, 7.5509490966796875, 3.8726348876953125, 7.197486877441406, 8.436233520507812, 1.1339797973632812, 2.8901710510253906, 15.172897338867188, 2.9761810302734375, 1.0622806549072266, -14.01613998413086, 0.9017753601074219, 13.701385498046875, 0.06844329833984375, 0.04720306396484375, 6.873992919921875, -5.49525260925293, 1.4946975708007812, 7.950592041015625, 5.846893310546875, 12.045753479003906, 1.844888687133789, 0.6493854522705078, -6.086494445800781, 4.150491714477539, 5.524824142456055, 0.28987884521484375, -0.9597396850585938, 5.6441497802734375, 3.169849395751953, -1.3797492980957031, 1.7064361572265625, 10.32080078125, 0.2088184356689453, -6.3731231689453125, 3.7575759887695312, 0.16732025146484375, 9.695611953735352, 13.161617279052734, 8.632991790771484, 5.286338806152344, 7.65825080871582, 2.5074996948242188, -5.341970443725586, -15.318069458007812, 15.555908203125, 15.443706512451172, 10.80344009399414, -2.0575408935546875, -1.7233810424804688, 3.7905216217041016, 5.633232116699219, 7.994300842285156, 4.616914749145508, 3.0376338958740234, -1.9620895385742188, 3.74676513671875, 12.796180725097656, 19.441707611083984, 6.52227783203125, 2.7808837890625, 3.9102516174316406, 7.957880020141602, 7.618324279785156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000585.npy"}
{"epoch": 0.8843537414965986, "step": 586, "batch_size": 64, "mean": 5.273736000061035, "std": 5.902338981628418, "min": -8.834770202636719, "p10": -1.2633850097656247, "median": 4.796080589294434, "p90": 12.291851806640626, "max": 19.81769561767578, "pos_frac": 0.859375, "sample": [14.560897827148438, 3.3533859252929688, 8.218154907226562, 6.394111633300781, 3.6958770751953125, 15.351921081542969, 6.085042953491211, 0.42392539978027344, 4.780067443847656, 3.422060012817383, 9.302679061889648, -2.8915252685546875, -2.010366439819336, 16.20447540283203, 16.01018524169922, -1.0943183898925781, 4.812093734741211, 8.063774108886719, 3.217214584350586, 6.440725326538086, 0.6734523773193359, 11.851860046386719, 4.2391815185546875, 11.153039932250977, 2.4508132934570312, 0.1698131561279297, 0.3512763977050781, 0.5316848754882812, 9.974773406982422, 7.232517242431641, 10.449485778808594, 9.751251220703125, 0.3397369384765625, 9.52602767944336, 6.1005096435546875, 2.290782928466797, -8.834770202636719, 19.81769561767578, -1.3358421325683594, 2.287679672241211, 1.4602031707763672, 3.815032958984375, 2.3188629150390625, 7.932945251464844, 7.898597717285156, 0.7269172668457031, -1.3917427062988281, 6.842746734619141, -0.3999347686767578, 6.8846893310546875, 12.323410034179688, 1.3955841064453125, 8.324623107910156, 2.5680809020996094, 11.067136764526367, 4.062660217285156, -7.4437408447265625, 16.70470428466797, -8.368850708007812, 12.218215942382812, 8.445549011230469, 5.512294769287109, 3.510814666748047, 7.748931884765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000586.npy"}
{"epoch": 0.8858654572940288, "step": 587, "batch_size": 64, "mean": 4.664154052734375, "std": 7.611452102661133, "min": -18.018646240234375, "p10": -4.189810943603515, "median": 4.5383453369140625, "p90": 14.946935272216798, "max": 20.71944808959961, "pos_frac": 0.75, "sample": [0.27687835693359375, -12.5780029296875, 3.8817291259765625, 5.12275505065918, 6.3698883056640625, -6.296909332275391, 3.0373458862304688, -4.51202392578125, 7.583412170410156, 7.7742919921875, 2.82379150390625, 5.4465179443359375, -3.4379806518554688, -0.4092254638671875, 1.5089874267578125, 5.944850921630859, 10.487115859985352, -0.1904926300048828, 0.6218109130859375, 20.357860565185547, 9.830856323242188, 5.747611999511719, -0.9044113159179688, 16.622703552246094, 6.2952117919921875, 6.479335784912109, 20.71944808959961, -0.528350830078125, -18.018646240234375, -0.8022499084472656, 11.760284423828125, 4.216915130615234, 11.114372253417969, 17.470081329345703, 3.237762451171875, 4.03167724609375, 7.841243743896484, -13.196853637695312, -0.55792236328125, 12.059783935546875, 15.136428833007812, -4.945781707763672, 10.546920776367188, 6.708887100219727, -0.5778350830078125, 7.982841491699219, 4.859775543212891, 10.677337646484375, 10.753484725952148, 2.3403358459472656, 14.504783630371094, -7.068580627441406, 8.257020950317383, 0.9584274291992188, 4.079986572265625, 16.050689697265625, 1.4478263854980469, -0.6051750183105469, 0.3264427185058594, 6.373443603515625, 4.094627380371094, 16.730087280273438, 12.163215637207031, 0.47918701171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000587.npy"}
{"epoch": 0.8873771730914588, "step": 588, "batch_size": 64, "mean": 4.821053504943848, "std": 6.609106063842773, "min": -10.368606567382812, "p10": -3.653148651123045, "median": 4.754825592041016, "p90": 12.678149414062501, "max": 21.376968383789062, "pos_frac": 0.78125, "sample": [4.7108306884765625, -6.4830474853515625, 0.0640106201171875, 4.4062957763671875, -1.7941741943359375, 8.626571655273438, 16.546417236328125, 7.681602478027344, 8.894325256347656, 9.602325439453125, 21.376968383789062, 4.548097610473633, 4.910852432250977, 5.169347763061523, 3.4924468994140625, 7.865200042724609, 19.808509826660156, 18.869110107421875, 4.528631210327148, 12.795135498046875, 5.396202087402344, 5.113000869750977, -4.575052261352539, 13.43988037109375, 5.422771453857422, 10.531669616699219, 4.436553955078125, -0.9049224853515625, -1.0781002044677734, -1.404693603515625, 1.1022586822509766, 9.07076644897461, -6.614288330078125, 0.8695220947265625, 8.89419937133789, 11.54962158203125, -0.984588623046875, 5.464546203613281, 2.753824234008789, -0.28720855712890625, -4.449851989746094, -4.703346252441406, 5.754085540771484, 13.31081771850586, 7.713710784912109, 4.798820495605469, 11.179513931274414, 0.8281192779541016, 1.201202392578125, 11.22382926940918, 4.212867736816406, 5.6776123046875, 0.9499111175537109, 3.715282440185547, 0.6539306640625, 2.6834869384765625, -1.5938301086425781, 7.807167053222656, 2.2284698486328125, -9.954666137695312, 8.26405143737793, 12.405181884765625, 11.194252014160156, -10.368606567382812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000588.npy"}
{"epoch": 0.8888888888888888, "step": 589, "batch_size": 64, "mean": 5.771548271179199, "std": 6.420150279998779, "min": -8.258651733398438, "p10": -0.16365814208984353, "median": 4.685874938964844, "p90": 15.372053718566896, "max": 23.718055725097656, "pos_frac": 0.890625, "sample": [0.8571681976318359, 9.12115478515625, -4.472494125366211, 15.592254638671875, 4.938758850097656, 2.579710006713867, 1.9127426147460938, 2.449066162109375, 2.2060775756835938, 0.3503437042236328, 1.3828887939453125, 0.18417739868164062, 19.281326293945312, 5.6683502197265625, 11.516708374023438, 5.5389862060546875, 4.091926574707031, 9.253028869628906, 18.114116668701172, 13.638092041015625, 2.6392974853515625, 9.422744750976562, 8.845346450805664, 4.53192138671875, 16.850601196289062, 2.552001953125, -2.3093414306640625, 6.7664031982421875, 12.508913040161133, 5.576114654541016, 6.149166107177734, -0.2571220397949219, 11.916948318481445, 6.0988311767578125, -0.8510322570800781, 4.906349182128906, 4.059303283691406, 7.535301208496094, 5.109306335449219, 17.142990112304688, 6.7243194580078125, 4.090986251831055, 12.819442749023438, 4.761482238769531, 6.537544250488281, 2.566131591796875, 0.4749946594238281, -4.861034393310547, 0.054424285888671875, 2.428497314453125, -8.258651733398438, 6.006477355957031, 20.460752487182617, -6.691551208496094, 3.744903564453125, 9.014533996582031, 14.858251571655273, 4.610267639160156, 2.4641799926757812, 2.0195274353027344, 1.437896728515625, 3.0739688873291016, 23.718055725097656, 3.925241470336914], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000589.npy"}
{"epoch": 0.890400604686319, "step": 590, "batch_size": 64, "mean": 6.322257041931152, "std": 7.05045747756958, "min": -10.939918518066406, "p10": -2.668726348876952, "median": 6.081311225891113, "p90": 16.476949691772464, "max": 19.63056755065918, "pos_frac": 0.84375, "sample": [11.854621887207031, 3.8576889038085938, 8.034957885742188, 1.1254615783691406, 12.418128967285156, 17.313552856445312, 4.405059814453125, -10.939918518066406, 1.4076690673828125, 1.2261581420898438, 5.0904388427734375, 0.4759063720703125, 0.2392425537109375, 2.8969993591308594, -7.4168548583984375, 4.445407867431641, 7.432270050048828, 13.441497802734375, -7.464221954345703, -1.8080978393554688, -1.1396255493164062, 3.8809814453125, 1.82525634765625, 13.409049987792969, 18.32091522216797, 5.022560119628906, 1.5177116394042969, 10.959335327148438, 17.15777587890625, 9.8077392578125, 0.034069061279296875, 13.821929931640625, 2.839963912963867, 11.6904296875, 7.219734191894531, 10.68682861328125, 9.715753555297852, 6.6754302978515625, 0.5006103515625, 5.467380523681641, 15.869312286376953, 6.846254348754883, 17.418807983398438, 11.169662475585938, 16.73736572265625, 5.659065246582031, 12.416366577148438, 6.822227478027344, -3.037567138671875, 10.075454711914062, -3.122589111328125, 13.672491073608398, 11.515884399414062, 0.2125091552734375, 3.1309967041015625, 14.786827087402344, 6.503557205200195, 4.5742950439453125, 17.768875122070312, -1.5384502410888672, 8.058170318603516, -3.283935546875, 19.63056755065918, -4.711483001708984], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000590.npy"}
{"epoch": 0.891912320483749, "step": 591, "batch_size": 64, "mean": 5.285675525665283, "std": 7.51272439956665, "min": -9.000534057617188, "p10": -3.7162420272827146, "median": 5.331581115722656, "p90": 15.75495758056641, "max": 21.789535522460938, "pos_frac": 0.734375, "sample": [-0.534332275390625, 16.224483489990234, 9.150960922241211, 10.620122909545898, 6.258872985839844, 4.44378662109375, 11.804649353027344, 3.5412445068359375, 4.344568252563477, 10.148727416992188, 5.2160797119140625, -3.31060791015625, 6.9659423828125, -6.403926849365234, 5.8260040283203125, -1.6868896484375, 6.67140007019043, 2.3606414794921875, 6.9204559326171875, 2.2419795989990234, 9.146942138671875, 4.095731735229492, 16.404541015625, -6.68304443359375, 14.798385620117188, -9.000534057617188, 7.746429443359375, 2.6390151977539062, 2.9253692626953125, 5.7114715576171875, -7.7003631591796875, -3.890085220336914, 16.1649169921875, 21.789535522460938, 0.9793033599853516, 12.881961822509766, -7.457050323486328, 2.7671127319335938, 14.26364517211914, 0.8575630187988281, -1.7720947265625, -1.7246112823486328, 4.261222839355469, 10.818534851074219, -2.5311546325683594, 18.926422119140625, -2.705036163330078, -7.0840606689453125, -3.0469131469726562, 12.745269775390625, -3.1817169189453125, -0.8612098693847656, 20.602508544921875, 6.7240447998046875, 1.2698097229003906, 13.539497375488281, 13.045578002929688, 6.5578155517578125, 5.44708251953125, 9.221389770507812, 17.39885711669922, 7.1271820068359375, 0.45488929748535156, 13.804916381835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000591.npy"}
{"epoch": 0.8934240362811792, "step": 592, "batch_size": 64, "mean": 4.798343658447266, "std": 6.9443488121032715, "min": -8.063505172729492, "p10": -3.350482559204101, "median": 3.0322723388671875, "p90": 16.05645141601563, "max": 22.489715576171875, "pos_frac": 0.765625, "sample": [5.657310485839844, 17.85112762451172, 13.929126739501953, 3.5795745849609375, 7.660575866699219, 18.752878189086914, 13.521324157714844, -0.3677101135253906, 2.1689300537109375, 3.6460037231445312, 1.3263931274414062, 7.209230422973633, 2.5657501220703125, 0.5716323852539062, 5.6423492431640625, 7.883003234863281, 19.47256851196289, 6.168048858642578, 1.6428146362304688, 1.6291427612304688, -5.5204925537109375, 11.26596450805664, 2.527048110961914, 14.4317626953125, 0.9578132629394531, 7.962890625, 3.4243927001953125, 17.331756591796875, 11.083610534667969, 7.383049011230469, -4.725612640380859, 6.097177505493164, 1.1007003784179688, 5.089256286621094, 4.654945373535156, 1.8233299255371094, -4.3289031982421875, 9.972694396972656, -8.063505172729492, -0.9074058532714844, -0.14049720764160156, 10.776620864868164, -0.4473724365234375, -5.192432403564453, -3.570343017578125, 7.03387451171875, -2.775299072265625, 16.75274658203125, 20.33917999267578, -4.835151672363281, 22.489715576171875, -0.5018081665039062, 1.1161632537841797, 0.5285301208496094, 2.8533248901367188, 1.0639781951904297, -0.254791259765625, 2.510101318359375, -2.837474822998047, 3.7445068359375, 3.0633544921875, 0.4286613464355469, 3.001190185546875, 9.87664794921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000592.npy"}
{"epoch": 0.8949357520786092, "step": 593, "batch_size": 64, "mean": 4.747987747192383, "std": 7.913001537322998, "min": -13.892051696777344, "p10": -4.85295524597168, "median": 4.720710754394531, "p90": 15.573898124694827, "max": 21.069320678710938, "pos_frac": 0.734375, "sample": [8.37847900390625, 1.8302345275878906, 10.50042724609375, -3.6602630615234375, 15.808717727661133, -1.8178176879882812, 2.6026840209960938, -5.1991119384765625, 17.801937103271484, 3.9947967529296875, 17.306903839111328, 1.8312301635742188, 14.325485229492188, 13.117454528808594, 2.5445785522460938, -4.869655609130859, 5.043006896972656, 7.38226318359375, 14.415969848632812, 6.118398666381836, 3.710386276245117, 5.706829071044922, 15.025985717773438, 12.115325927734375, 6.868705749511719, 5.335418701171875, -1.6941852569580078, 8.743127822875977, 3.2067642211914062, -10.4388427734375, 11.985931396484375, -9.236328125, -1.2342453002929688, 10.134986877441406, 7.35389518737793, 8.031682968139648, 0.10824775695800781, -4.813987731933594, 11.9952392578125, -1.4022483825683594, -8.959415435791016, -3.0355091094970703, 16.901153564453125, -12.257949829101562, 18.638992309570312, 10.291336059570312, 18.445228576660156, 3.0808792114257812, -3.394683837890625, 21.069320678710938, 2.5943603515625, -0.014722824096679688, 6.788719177246094, 5.302471160888672, 6.135307312011719, -13.892051696777344, 1.263071060180664, 9.976539611816406, 1.1787986755371094, -1.2522506713867188, 1.7584609985351562, 4.398414611816406, 2.4324188232421875, 7.463916778564453], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000593.npy"}
{"epoch": 0.8964474678760394, "step": 594, "batch_size": 64, "mean": 3.7374889850616455, "std": 6.593701362609863, "min": -13.10980224609375, "p10": -3.8322824478149413, "median": 3.0269556045532227, "p90": 12.72292366027832, "max": 17.623273849487305, "pos_frac": 0.71875, "sample": [2.606637954711914, 9.172348022460938, -3.6212615966796875, 8.086578369140625, 8.371417999267578, 10.980995178222656, 0.5958499908447266, 1.4495620727539062, 5.331336975097656, -4.063087463378906, -2.5509872436523438, 0.06327438354492188, -3.6201305389404297, 4.0405731201171875, 3.134950637817383, -0.5353317260742188, -1.7143020629882812, -0.8215904235839844, 12.966156005859375, 0.4646453857421875, 9.936553955078125, 2.6978015899658203, 0.8456192016601562, 6.367610931396484, 17.623273849487305, 4.965705871582031, 3.6662063598632812, -5.344757080078125, 9.07448959350586, -1.0014324188232422, 17.43085479736328, 2.1851806640625, 17.299545288085938, 12.758403778076172, 0.46883392333984375, -3.922719955444336, -4.882835388183594, 5.975616455078125, 11.430294036865234, -7.2036590576171875, 7.844718933105469, 0.2387256622314453, 8.886438369750977, 15.709846496582031, -2.7969932556152344, 4.915430068969727, 2.4792003631591797, 10.761962890625, -2.3819522857666016, 8.167633056640625, 6.8117523193359375, -13.10980224609375, 12.64013671875, 15.983695983886719, 5.893463134765625, 4.958648681640625, 0.38671875, 4.604801177978516, 2.9189605712890625, -3.423797607421875, 4.0216217041015625, -6.8878173828125, 1.1670303344726562, -1.2993621826171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000594.npy"}
{"epoch": 0.8979591836734694, "step": 595, "batch_size": 64, "mean": 6.321621894836426, "std": 7.688138484954834, "min": -18.32346534729004, "p10": -2.3438213348388666, "median": 5.873993873596191, "p90": 16.149242782592776, "max": 21.831085205078125, "pos_frac": 0.796875, "sample": [-1.2511978149414062, 10.233184814453125, 14.71136474609375, 7.494171142578125, 8.280403137207031, 5.793725967407227, 8.056953430175781, 4.586145401000977, -0.6210403442382812, 15.63840103149414, 12.878959655761719, 1.7073974609375, 3.09857177734375, -6.25697135925293, 14.332784652709961, 2.3324966430664062, 3.4909934997558594, -1.6280479431152344, 13.885189056396484, 0.9800662994384766, 12.439094543457031, 6.411346435546875, 16.352134704589844, 4.7687835693359375, 15.822845458984375, 3.8529434204101562, 4.425086975097656, 4.042888641357422, 8.394912719726562, 11.591041564941406, 2.11920166015625, -6.506591796875, 5.954261779785156, -3.059295654296875, 12.489723205566406, 13.419692993164062, 17.383865356445312, -7.6890869140625, -18.32346534729004, 21.831085205078125, 19.921493530273438, 16.289127349853516, 8.34796142578125, 9.713661193847656, -2.6505813598632812, 14.40167236328125, 7.7691650390625, 10.714103698730469, 5.733062744140625, 5.266498565673828, -1.467782974243164, 8.175933837890625, 2.6680870056152344, 2.1047439575195312, 2.1856460571289062, 5.0118255615234375, 18.27301025390625, 8.341873168945312, -1.047149658203125, -0.89013671875, 0.0056743621826171875, -7.3962860107421875, 12.39297866821289, 17.25520896911621], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000595.npy"}
{"epoch": 0.8994708994708994, "step": 596, "batch_size": 64, "mean": 3.221146583557129, "std": 6.9624505043029785, "min": -11.092218399047852, "p10": -4.084357833862304, "median": 2.811093330383301, "p90": 15.51767730712891, "max": 21.222000122070312, "pos_frac": 0.640625, "sample": [-4.20147705078125, 3.8584442138671875, 16.610626220703125, 3.631519317626953, 6.071281433105469, -4.07122802734375, -2.7900123596191406, 0.22531890869140625, 0.496795654296875, 14.7095947265625, -6.67901611328125, 4.474262237548828, -7.913475036621094, -0.8227310180664062, 17.16509246826172, 18.348299026489258, 7.900913238525391, 2.2899017333984375, -5.931034088134766, 2.7064762115478516, -2.500213623046875, 7.3843841552734375, 4.561065673828125, 5.757062911987305, 18.64704704284668, 7.934535980224609, 6.197273254394531, -3.186420440673828, 0.6373519897460938, 2.91571044921875, 1.2679862976074219, 1.3297882080078125, 3.10552978515625, -0.13274383544921875, 3.3660354614257812, -3.1031761169433594, -2.0071029663085938, 21.222000122070312, 0.11282730102539062, -8.240242004394531, 4.1278533935546875, 4.51251220703125, -0.9660072326660156, -11.092218399047852, -0.657623291015625, 4.989185333251953, -0.5145416259765625, -2.751596450805664, 15.863998413085938, 6.008613586425781, 11.976678848266602, 2.9232101440429688, -2.657958984375, 1.8889007568359375, -0.5831146240234375, 17.2017822265625, -0.3011016845703125, 7.802984237670898, -4.089984893798828, 3.7047252655029297, 5.883861541748047, 8.761688232421875, 3.6848907470703125, -0.9116039276123047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000596.npy"}
{"epoch": 0.9009826152683296, "step": 597, "batch_size": 64, "mean": 4.664018154144287, "std": 6.6561479568481445, "min": -7.251932144165039, "p10": -2.391793060302734, "median": 2.914372444152832, "p90": 14.715486145019534, "max": 22.285865783691406, "pos_frac": 0.734375, "sample": [-1.1848278045654297, -7.246912002563477, -1.5048999786376953, 0.80621337890625, 15.04705810546875, 3.4298782348632812, 4.029474258422852, -3.8647689819335938, -3.0828475952148438, 15.895998001098633, -0.09717178344726562, 13.440872192382812, 1.8989391326904297, 5.025474548339844, -2.4171142578125, 2.3191566467285156, 20.02777099609375, 15.19936752319336, 9.023246765136719, -2.7214221954345703, 7.822782516479492, 2.7352981567382812, -5.2872772216796875, -0.8955307006835938, 11.161705017089844, 10.006519317626953, 2.435943603515625, -1.3135814666748047, 10.9793701171875, -1.9210929870605469, 4.14042854309082, 4.763526916503906, 13.644309997558594, 4.561927795410156, 7.438684463500977, 4.858486175537109, 1.9297637939453125, 3.6174755096435547, 5.014324188232422, 15.230880737304688, -1.4832305908203125, -2.3327102661132812, 2.1003875732421875, 0.063385009765625, -0.6251373291015625, 1.8334007263183594, 1.2223644256591797, 0.9164714813232422, 12.67437744140625, 2.2698822021484375, 22.285865783691406, 3.07501220703125, 9.799156188964844, -2.0979843139648438, -7.251932144165039, 5.9852447509765625, 0.8480377197265625, 2.2450103759765625, 16.145660400390625, 13.941818237304688, 2.753732681274414, 12.431718826293945, 9.311805725097656, 7.43739128112793], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000597.npy"}
{"epoch": 0.9024943310657596, "step": 598, "batch_size": 64, "mean": 3.2310941219329834, "std": 6.862454891204834, "min": -11.933052062988281, "p10": -4.853162574768066, "median": 2.6621484756469727, "p90": 12.26739196777344, "max": 19.84308433532715, "pos_frac": 0.671875, "sample": [0.4974689483642578, -4.03424072265625, 6.5406494140625, 4.146659851074219, 6.945289611816406, -0.3322334289550781, 2.5667266845703125, 13.845874786376953, 6.294981002807617, -11.933052062988281, -1.0913124084472656, 4.010234832763672, 1.4160194396972656, -4.911657333374023, 17.692794799804688, -9.631111145019531, 18.805461883544922, -5.45355224609375, 10.382844924926758, 5.39990234375, -3.4143829345703125, -8.823257446289062, -4.7166748046875, 8.028255462646484, 2.3930816650390625, 2.757570266723633, 3.2711639404296875, 6.2781829833984375, -3.3003005981445312, -2.2462196350097656, -2.0330677032470703, 2.2347373962402344, 6.949762344360352, 5.354225158691406, 12.446441650390625, -1.6540565490722656, 9.63037109375, -1.5380516052246094, 8.869537353515625, 0.2528533935546875, 8.710968017578125, 5.2666778564453125, 0.3771686553955078, 19.84308433532715, -1.648651123046875, 3.8254966735839844, 4.5520172119140625, -0.00708770751953125, 10.079694747924805, 6.646705627441406, 0.820770263671875, -5.956932067871094, 0.4373016357421875, 3.028076171875, 1.5787925720214844, -6.22991943359375, 16.70763397216797, 7.5969390869140625, -1.1844863891601562, 0.21718215942382812, 11.849609375, 13.899669647216797, -4.243492126464844, 8.724884033203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000598.npy"}
{"epoch": 0.9040060468631897, "step": 599, "batch_size": 64, "mean": 3.755467414855957, "std": 7.038053035736084, "min": -11.78680419921875, "p10": -3.613783264160156, "median": 2.491121292114258, "p90": 12.959956359863282, "max": 21.403350830078125, "pos_frac": 0.6875, "sample": [-1.7947006225585938, 2.372385025024414, 4.0644073486328125, 4.813697814941406, -4.382713317871094, 9.034961700439453, -3.417736053466797, 14.701736450195312, -3.27142333984375, -1.1649169921875, -6.4077606201171875, -2.5304927825927734, 3.964832305908203, 6.843044281005859, 6.820167541503906, -2.7741775512695312, -9.461898803710938, -0.21408653259277344, 2.7279014587402344, -0.892242431640625, 13.988748550415039, 6.151464462280273, -0.15009307861328125, -11.78680419921875, 0.5296783447265625, -1.1771087646484375, 0.1528778076171875, 15.986881256103516, -3.697803497314453, 1.50079345703125, 17.826187133789062, 7.414562225341797, -2.6548309326171875, -7.324317932128906, 1.9608039855957031, 1.6612396240234375, 1.4844779968261719, 1.2107620239257812, 0.31183433532714844, 13.16448974609375, 12.468782424926758, 0.9359092712402344, 1.5736732482910156, 6.2637786865234375, 12.446382522583008, 11.103458404541016, 10.695098876953125, -2.6087570190429688, -2.9741554260253906, 3.158477783203125, 21.403350830078125, 6.722137451171875, 7.5272674560546875, 5.115322113037109, 12.482711791992188, 2.7752552032470703, 21.174713134765625, 2.2531890869140625, 11.477607727050781, 2.6098575592041016, -3.81170654296875, 8.898630142211914, 9.827407836914062, 3.2466964721679688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000599.npy"}
{"epoch": 0.9055177626606198, "step": 600, "batch_size": 64, "mean": 3.578876256942749, "std": 6.808477878570557, "min": -13.000652313232422, "p10": -4.113274765014648, "median": 3.3852767944335938, "p90": 12.136014366149904, "max": 20.56871795654297, "pos_frac": 0.71875, "sample": [11.039558410644531, 1.9315643310546875, -3.0582027435302734, 1.9766464233398438, 12.836647033691406, 2.4417724609375, 4.45648193359375, -4.404766082763672, 15.000141143798828, 14.332382202148438, -3.1354293823242188, 4.692638397216797, 9.644756317138672, 12.3582763671875, -3.4331283569335938, -1.7634716033935547, -5.5004425048828125, -2.9363574981689453, -6.335205078125, 10.933120727539062, 0.7295379638671875, 5.158538818359375, 14.126266479492188, 4.597587585449219, -2.53399658203125, 9.406997680664062, 4.032192230224609, 11.617403030395508, -0.08689117431640625, 10.760185241699219, 10.34588623046875, 3.024974822998047, -13.000652313232422, -0.17315292358398438, 2.7730941772460938, -9.72281265258789, 1.4373626708984375, 5.111537933349609, 0.7789421081542969, 10.56719970703125, 0.9124755859375, 5.1484375, 4.967288970947266, 4.9730072021484375, -0.8344631195068359, 6.116342544555664, -7.720306396484375, -3.1703433990478516, -10.297821044921875, 10.05972671508789, 0.9982566833496094, 2.8498001098632812, 1.773904800415039, -0.476593017578125, 5.327766418457031, 3.8042144775390625, 6.221260070800781, 3.7455787658691406, 5.498046875, 19.61855697631836, 1.0866775512695312, 1.869028091430664, 5.9813385009765625, 20.56871795654297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000600.npy"}
{"epoch": 0.9070294784580499, "step": 601, "batch_size": 64, "mean": 5.099976539611816, "std": 7.387240886688232, "min": -12.233528137207031, "p10": -2.699477767944335, "median": 3.9982070922851562, "p90": 14.930764770507814, "max": 23.71752166748047, "pos_frac": 0.71875, "sample": [9.558761596679688, 1.5591583251953125, 10.701339721679688, -5.680475234985352, 11.496612548828125, 11.47549819946289, 20.871871948242188, 0.5777587890625, 1.9436874389648438, -5.302825927734375, 23.71752166748047, -12.233528137207031, 16.379371643066406, 4.466035842895508, 0.17032241821289062, 3.145477294921875, 12.77020263671875, 8.5789794921875, -1.9427261352539062, 7.194923400878906, 15.182266235351562, 2.290334701538086, 0.6901779174804688, 16.509490966796875, 20.334205627441406, 1.3950233459472656, -1.4320621490478516, -0.41800880432128906, 9.25677490234375, -0.6977386474609375, 0.9858551025390625, 8.794139862060547, 4.7607421875, -1.2443351745605469, -0.08564376831054688, 9.749275207519531, 3.208831787109375, 6.635772705078125, 4.100730895996094, 3.8514270782470703, -6.2566070556640625, 4.59686279296875, 1.3797798156738281, 11.772838592529297, -0.225433349609375, 4.301494598388672, 3.8956832885742188, -1.578887939453125, -3.0237998962402344, 3.8541431427001953, 5.641681671142578, 14.472137451171875, -0.5527820587158203, -0.48607826232910156, 8.842414855957031, 9.02493667602539, 15.1273193359375, 8.80842399597168, 12.218536376953125, -0.3973979949951172, -6.6134185791015625, 13.106338500976562, 13.517440795898438, -8.342376708984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000601.npy"}
{"epoch": 0.90854119425548, "step": 602, "batch_size": 64, "mean": 5.0842509269714355, "std": 6.1805806159973145, "min": -7.9642486572265625, "p10": -1.5529720306396482, "median": 3.6606292724609375, "p90": 15.647429656982423, "max": 17.438262939453125, "pos_frac": 0.765625, "sample": [-2.9801406860351562, 12.025154113769531, 10.06536865234375, 16.907867431640625, 6.36688232421875, 6.470741271972656, 3.85772705078125, 0.3902549743652344, 8.299835205078125, 2.8740158081054688, 17.438262939453125, -2.209014892578125, 10.906295776367188, 15.455772399902344, -4.487407684326172, 16.070396423339844, -2.6586761474609375, 11.278030395507812, 2.1991729736328125, 1.7288150787353516, 1.969970703125, -0.8698940277099609, 0.19641876220703125, -1.3076591491699219, -0.39879608154296875, 10.879753112792969, 1.8216514587402344, 10.816967010498047, 4.80072021484375, 5.951236724853516, -1.3176155090332031, 1.6821823120117188, 7.329994201660156, 1.3482799530029297, 4.001186370849609, -0.19434738159179688, 2.4811134338378906, 3.917633056640625, 10.241193771362305, -0.11939239501953125, -1.0639686584472656, 3.5587730407714844, 7.493076324462891, 15.729568481445312, -4.194709777832031, -7.9642486572265625, 9.157302856445312, 3.7476272583007812, 6.185579299926758, 1.0392723083496094, 0.35699462890625, 3.5674362182617188, 16.9337158203125, 9.219413757324219, 0.7955093383789062, 12.79222297668457, 7.731609344482422, 3.424121856689453, 16.253562927246094, 15.976036071777344, -1.653839111328125, 9.960132598876953, 3.5736312866210938, -0.45671844482421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000602.npy"}
{"epoch": 0.91005291005291, "step": 603, "batch_size": 64, "mean": 5.145867347717285, "std": 6.213833332061768, "min": -8.198715209960938, "p10": -1.7916633605957024, "median": 3.904600143432617, "p90": 13.724857330322267, "max": 23.965179443359375, "pos_frac": 0.78125, "sample": [8.195648193359375, 13.844161987304688, 9.71588134765625, -0.05500030517578125, -0.28584861755371094, 6.955596923828125, 2.6167678833007812, 6.44744873046875, -2.205554962158203, -2.3684539794921875, 13.85519027709961, 1.947967529296875, 9.460735321044922, -0.45874595642089844, 0.6836090087890625, 12.895500183105469, -2.7079811096191406, 1.9407577514648438, 1.6425018310546875, 23.965179443359375, 6.579936981201172, 5.1463623046875, 14.06678581237793, 3.378997802734375, -0.22075271606445312, 4.024116516113281, 9.065153121948242, -1.036041259765625, 7.43609619140625, -8.198715209960938, 3.2352752685546875, 3.328857421875, 4.905996322631836, 2.2981948852539062, -4.067539215087891, 9.622383117675781, 7.314655303955078, 3.7016868591308594, 0.22667884826660156, 16.818201065063477, 3.35772705078125, 8.470108032226562, 3.3090133666992188, 4.394203186035156, 8.130928039550781, 12.150413513183594, 4.099449157714844, -0.7588214874267578, 3.785083770751953, 0.5212650299072266, -2.1155014038085938, 1.6383094787597656, 9.19681167602539, 12.257465362548828, 12.243976593017578, 0.5186004638671875, 5.625276565551758, -0.78399658203125, 13.446479797363281, 13.04327392578125, 15.780017852783203, 0.5279979705810547, 14.506378173828125, -7.7206268310546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000603.npy"}
{"epoch": 0.9115646258503401, "step": 604, "batch_size": 64, "mean": 5.03169059753418, "std": 7.7593913078308105, "min": -14.427688598632812, "p10": -3.1535335540771485, "median": 4.219676971435547, "p90": 15.975975036621096, "max": 21.471946716308594, "pos_frac": 0.734375, "sample": [5.599700927734375, 9.418598175048828, -1.3332862854003906, -3.198688507080078, 7.316650390625, 6.103071212768555, 1.2516136169433594, 1.456106185913086, 18.044818878173828, -3.0481719970703125, 0.62774658203125, -1.3277130126953125, 1.1378841400146484, 10.786746978759766, -0.855499267578125, 2.8140945434570312, 14.279190063476562, 2.346242904663086, 5.4241790771484375, 10.579437255859375, -14.047271728515625, -0.8983917236328125, 1.2831649780273438, -3.4839630126953125, 5.973306655883789, 1.4661064147949219, 1.217569351196289, 4.498481750488281, 5.5453033447265625, -14.427688598632812, 6.182346343994141, -0.8149795532226562, 3.587188720703125, -3.904632568359375, 19.903491973876953, -3.4551620483398438, -0.31817626953125, 14.759613037109375, 21.25267791748047, 10.409416198730469, 7.730403900146484, 19.524642944335938, 21.388168334960938, 2.359973907470703, 5.318702697753906, 11.1007080078125, 4.0865325927734375, 11.102764129638672, 21.471946716308594, -1.257080078125, 15.475082397460938, 4.352821350097656, 3.1337814331054688, 0.9550552368164062, 8.702781677246094, 7.775825500488281, 9.541309356689453, -0.3706951141357422, 1.6699638366699219, 16.190643310546875, 10.790996551513672, -1.8031158447265625, -7.7477569580078125, 8.383613586425781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000604.npy"}
{"epoch": 0.9130763416477702, "step": 605, "batch_size": 64, "mean": 4.465373992919922, "std": 6.715708255767822, "min": -10.512725830078125, "p10": -3.792549514770507, "median": 4.445018768310547, "p90": 13.471092224121096, "max": 21.66546630859375, "pos_frac": 0.765625, "sample": [5.0413665771484375, 0.8815155029296875, -4.220539093017578, 21.66546630859375, 10.087654113769531, 12.963577270507812, 1.77435302734375, 4.3088531494140625, 8.876651763916016, -5.645227432250977, 12.521839141845703, -2.7433013916015625, 4.581184387207031, -1.2406387329101562, 0.11330032348632812, -2.1905250549316406, 1.3707427978515625, 8.519630432128906, 6.883930206298828, 3.1359291076660156, -1.3203201293945312, 0.2800750732421875, 4.740718841552734, -7.666175842285156, -1.1518783569335938, 6.086708068847656, 16.11366844177246, 6.436834335327148, 1.8030414581298828, 4.8941497802734375, 7.519561767578125, 3.4990310668945312, -8.969802856445312, 14.510902404785156, -0.9239959716796875, 11.134567260742188, 15.451358795166016, 8.911163330078125, 2.6681365966796875, 12.249275207519531, 0.84124755859375, 1.6535835266113281, 6.195899963378906, 8.446115493774414, 15.320419311523438, 3.9276809692382812, 9.754638671875, 13.6885986328125, 6.974327087402344, -5.5129547119140625, 19.022071838378906, 6.583507537841797, 6.112674713134766, 1.3348541259765625, -5.862983703613281, 2.5679092407226562, 2.0322704315185547, -10.512725830078125, 5.252220153808594, -0.8513679504394531, 1.25518798828125, 8.825210571289062, -2.7939071655273438, 8.576669692993164], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000605.npy"}
{"epoch": 0.9145880574452003, "step": 606, "batch_size": 64, "mean": 4.541077613830566, "std": 6.355517387390137, "min": -6.763397216796875, "p10": -2.427153968811035, "median": 3.9500160217285156, "p90": 12.374697875976565, "max": 19.863388061523438, "pos_frac": 0.6875, "sample": [-2.048931121826172, 11.12103271484375, 0.7547988891601562, 14.071205139160156, -1.1327438354492188, -0.1389312744140625, 12.668838500976562, -3.93585205078125, -1.010061264038086, 11.014240264892578, 15.96449089050293, -6.763397216796875, 8.162925720214844, 8.590560913085938, 10.412590026855469, -6.4417266845703125, 7.07634162902832, 2.1038284301757812, 0.020355224609375, 6.250946044921875, 18.102630615234375, -5.1365966796875, -0.2837791442871094, 7.8808441162109375, -3.430633544921875, -1.5067520141601562, 9.947906494140625, 2.2008819580078125, 4.980936050415039, 8.158790588378906, 2.738494873046875, 0.5857391357421875, -3.903980255126953, 11.043388366699219, 1.9367141723632812, 1.6019515991210938, 4.3480072021484375, -2.4677181243896484, 5.2877197265625, 7.734825134277344, 11.688369750976562, 16.76673126220703, 2.6576995849609375, 11.150810241699219, 3.5520248413085938, 19.863388061523438, 1.6962203979492188, 5.6579742431640625, -2.3325042724609375, -1.2914848327636719, 11.18746566772461, -2.1068496704101562, -1.32122802734375, 6.119712829589844, -1.8393020629882812, 8.929544448852539, 2.3975601196289062, -0.6881294250488281, 5.063587188720703, 15.770416259765625, 6.051197052001953, 10.722732543945312, 4.70660400390625, -0.33345985412597656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000606.npy"}
{"epoch": 0.9160997732426304, "step": 607, "batch_size": 64, "mean": 5.074833869934082, "std": 7.096303939819336, "min": -12.560150146484375, "p10": -3.5866582870483397, "median": 4.759016036987305, "p90": 13.327326202392578, "max": 26.177810668945312, "pos_frac": 0.734375, "sample": [3.1971435546875, 3.5145721435546875, -3.625131607055664, 4.213134765625, 8.319786071777344, 6.409507751464844, 10.817779541015625, 6.950965881347656, -12.560150146484375, -0.7815170288085938, 0.3361854553222656, 3.5641555786132812, 21.461288452148438, 14.490144729614258, 2.7527847290039062, -0.119354248046875, -3.49688720703125, 3.4558658599853516, 10.0203857421875, 18.557044982910156, 3.1044464111328125, 9.103748321533203, 8.788021087646484, 9.535537719726562, 11.573455810546875, 4.9141082763671875, 3.0734481811523438, 5.53045654296875, -2.6478500366210938, -1.5609664916992188, 8.858985900878906, 10.456548690795898, 11.856372833251953, 4.603923797607422, -4.521610260009766, 16.45392608642578, 13.390884399414062, -0.6680259704589844, 10.779510498046875, -2.0121383666992188, -4.017280578613281, 2.0484962463378906, 2.740692138671875, 7.912685394287109, 5.464282989501953, -4.603599548339844, -1.039785385131836, 10.534067153930664, 26.177810668945312, 13.732976913452148, -0.10335159301757812, -2.8975372314453125, 8.717880249023438, 13.179023742675781, 0.8475685119628906, 8.617340087890625, 0.5227546691894531, 0.1318359375, 6.395973205566406, 12.993730545043945, 6.215309143066406, -7.798088073730469, -4.2449493408203125, 5.171031951904297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000607.npy"}
{"epoch": 0.9176114890400605, "step": 608, "batch_size": 64, "mean": 3.6547417640686035, "std": 6.376016616821289, "min": -11.355880737304688, "p10": -3.1493692398071285, "median": 2.7510910034179688, "p90": 12.327774429321291, "max": 20.405803680419922, "pos_frac": 0.71875, "sample": [6.860857009887695, -0.809814453125, 1.280771255493164, 8.330455780029297, 0.1640777587890625, 3.886322021484375, 11.420608520507812, -3.3161087036132812, 3.4812049865722656, -5.14422607421875, 6.263221740722656, 2.1568222045898438, 8.805831909179688, 8.97275161743164, 1.0336036682128906, 8.279308319091797, 17.371475219726562, -9.670501708984375, -3.248037338256836, -0.4027366638183594, 2.4043045043945312, -2.3393592834472656, -0.4846935272216797, 20.405803680419922, 7.880363464355469, 0.5963554382324219, -1.517974853515625, 4.884763717651367, 0.7132053375244141, 16.920856475830078, 2.18145751953125, -6.156394958496094, -6.5252685546875, 6.704742431640625, -1.9512481689453125, 10.463897705078125, -1.222900390625, 0.1204071044921875, 5.4073028564453125, 2.1461181640625, 2.777984619140625, 3.987712860107422, 1.71954345703125, -1.1930313110351562, -2.9191436767578125, 7.80999755859375, -0.6833267211914062, 13.228485107421875, 13.618316650390625, 5.230998992919922, 5.85406494140625, 3.51361083984375, 4.345527648925781, 5.003715515136719, 0.5651607513427734, 11.66134262084961, -11.355880737304688, 16.176557540893555, 3.1021728515625, -1.1084518432617188, 0.7943744659423828, 12.613388061523438, 2.7241973876953125, 10.088531494140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000608.npy"}
{"epoch": 0.9191232048374905, "step": 609, "batch_size": 64, "mean": 5.636974334716797, "std": 6.3608293533325195, "min": -11.053726196289062, "p10": -1.7249795913696282, "median": 5.1467132568359375, "p90": 14.468486785888674, "max": 20.210693359375, "pos_frac": 0.828125, "sample": [15.3280029296875, 6.5380706787109375, 6.537452697753906, 2.9933929443359375, 5.822479248046875, 1.4908294677734375, 9.407646179199219, 8.277481079101562, 0.027254104614257812, -2.030200958251953, -2.6750125885009766, 5.000110626220703, -0.6397552490234375, 3.6670780181884766, 20.210693359375, 11.609779357910156, 14.739311218261719, 9.11984634399414, 3.721036911010742, 19.36779022216797, -6.815864562988281, 12.154998779296875, 4.5017547607421875, 11.808624267578125, -0.48455238342285156, 12.335929870605469, 5.207923889160156, 8.419174194335938, -5.368011474609375, 4.0705108642578125, 16.21269989013672, -0.17925453186035156, 18.46805191040039, 8.009471893310547, 6.4744720458984375, 10.12835693359375, 3.785320281982422, 0.03932380676269531, 17.894515991210938, 6.9356689453125, 2.8239803314208984, -2.9885482788085938, 2.7030868530273438, 13.836563110351562, 8.460636138916016, -11.053726196289062, 3.8754138946533203, 5.085502624511719, 8.060508728027344, -1.012796401977539, 4.580436706542969, 1.4596405029296875, 5.008758544921875, 3.4421024322509766, 7.052003860473633, 0.7649993896484375, 7.983852386474609, 7.30082893371582, 9.144947052001953, -4.788959503173828, 9.4471435546875, 5.607887268066406, 0.7039031982421875, 1.1557846069335938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000609.npy"}
{"epoch": 0.9206349206349206, "step": 610, "batch_size": 64, "mean": 5.089980125427246, "std": 7.978628158569336, "min": -13.462295532226562, "p10": -4.396824645996094, "median": 4.252251625061035, "p90": 15.935773658752442, "max": 25.145950317382812, "pos_frac": 0.703125, "sample": [4.804628372192383, 14.979782104492188, 10.46710205078125, 10.9686279296875, 2.0858383178710938, 12.66872787475586, 2.4508819580078125, 2.7835826873779297, -4.834930419921875, 2.9280471801757812, 3.86236572265625, -7.620311737060547, 5.068248748779297, 18.314163208007812, -0.0073394775390625, 4.011730194091797, 15.567785263061523, -2.5357284545898438, -13.462295532226562, -0.961181640625, -0.17343902587890625, -1.6059722900390625, 6.318534851074219, 5.178249359130859, -4.928474426269531, -2.0474853515625, -4.1492462158203125, 24.35848617553711, 8.304569244384766, 16.093482971191406, 1.962057113647461, 21.86767578125, 7.869270324707031, 11.639968872070312, -4.1298065185546875, 5.507612228393555, 14.043716430664062, 0.7733478546142578, 7.859893798828125, 4.197662353515625, 6.823577880859375, 4.306840896606445, 4.917243957519531, -4.5029296875, 16.507095336914062, -2.440845489501953, -0.7861213684082031, 0.18993759155273438, 10.942502975463867, 15.1632080078125, 2.4425735473632812, 4.844776153564453, 4.906982421875, 0.833953857421875, -0.7091884613037109, -0.15888595581054688, -6.9569091796875, 9.669471740722656, 25.145950317382812, 2.5293655395507812, 10.368911743164062, -5.6316070556640625, 18.37738800048828, 8.495620727539062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000610.npy"}
{"epoch": 0.9221466364323507, "step": 611, "batch_size": 64, "mean": 3.62573504447937, "std": 6.084738731384277, "min": -9.108177185058594, "p10": -3.807812309265136, "median": 3.2119007110595703, "p90": 11.268434715270997, "max": 24.123245239257812, "pos_frac": 0.703125, "sample": [8.525520324707031, 6.857294082641602, -6.712860107421875, 4.548576354980469, 14.408205032348633, 1.9968376159667969, 0.973358154296875, 6.6334075927734375, -1.0370025634765625, 2.47503662109375, -4.716739654541016, 12.197029113769531, 3.8674774169921875, 2.953174591064453, -1.7839775085449219, -1.1913318634033203, -4.145320892333984, 11.353591918945312, -6.0785980224609375, 24.123245239257812, 6.960990905761719, 7.658805847167969, 3.4598312377929688, -2.8058605194091797, -4.801242828369141, -0.8656635284423828, 10.590263366699219, 1.5866947174072266, 1.9911117553710938, 8.856201171875, 2.8144054412841797, -2.9128875732421875, -1.1905975341796875, 14.276123046875, 2.6911087036132812, 2.7262115478515625, -6.802772521972656, 16.376983642578125, 4.369483947753906, 5.063774108886719, 4.883674621582031, 1.8943347930908203, -1.0298309326171875, 7.9202880859375, -0.2994117736816406, 6.351818084716797, 5.040613174438477, 5.5676116943359375, 5.57569694519043, 7.7166748046875, 6.747200012207031, 13.400808334350586, 2.398958206176758, 7.464395523071289, 3.208850860595703, 4.932338714599609, 11.069734573364258, -9.108177185058594, 4.381614685058594, 1.9324417114257812, -3.020292282104492, -2.0561695098876953, -1.43096923828125, 3.2149505615234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000611.npy"}
{"epoch": 0.9236583522297808, "step": 612, "batch_size": 64, "mean": 5.428564071655273, "std": 7.692351818084717, "min": -15.719219207763672, "p10": -2.852914428710937, "median": 4.6707048416137695, "p90": 16.784573745727542, "max": 24.14684295654297, "pos_frac": 0.765625, "sample": [-0.024478912353515625, -7.0628662109375, 7.72918701171875, -0.27150726318359375, -3.25341796875, 0.22428131103515625, 10.977897644042969, 10.955642700195312, -0.7404937744140625, 0.31362152099609375, 12.133560180664062, 11.830429077148438, 8.9490966796875, 1.5516204833984375, 8.128364562988281, 6.502494812011719, 4.756708145141602, 2.7902984619140625, 4.30743408203125, 5.738883972167969, 7.590599060058594, -9.843555450439453, 4.5847015380859375, 2.3137435913085938, -2.558135986328125, 3.8461341857910156, 15.132781982421875, -3.6069297790527344, -1.5828361511230469, 1.8170547485351562, 5.183250427246094, 14.831289291381836, -0.026020050048828125, 0.4527778625488281, -2.005352020263672, 16.044078826904297, 3.009735107421875, 9.387741088867188, 21.206756591796875, -10.327972412109375, 17.62139129638672, 2.0031776428222656, 17.540237426757812, -2.979248046875, 17.1019287109375, 17.85260009765625, 7.980319976806641, -0.7234992980957031, 10.892616271972656, 24.14684295654297, 2.4237289428710938, 4.282127380371094, 18.905303955078125, 7.52789306640625, 7.345365524291992, 9.7078857421875, 6.6789703369140625, 0.6968898773193359, 4.191192626953125, -15.719219207763672, 6.851991653442383, 11.041152954101562, 3.9639759063720703, 7.107879638671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000612.npy"}
{"epoch": 0.9251700680272109, "step": 613, "batch_size": 64, "mean": 6.491569995880127, "std": 7.0674943923950195, "min": -11.514877319335938, "p10": -0.6888629913330078, "median": 5.317353248596191, "p90": 17.201930236816406, "max": 20.600975036621094, "pos_frac": 0.875, "sample": [0.04290008544921875, 4.794097900390625, 0.9357643127441406, 9.096611022949219, 7.801824569702148, 7.783470153808594, 8.578926086425781, 4.238433837890625, -4.4626617431640625, -0.6998252868652344, 5.370676040649414, 4.503002166748047, -11.514877319335938, 6.808971405029297, 7.087942123413086, 8.84515380859375, 0.7593154907226562, 2.8945465087890625, 17.039405822753906, 3.0957794189453125, 0.5179767608642578, -1.6460895538330078, -2.754425048828125, 2.7970104217529297, 19.044231414794922, 6.648523330688477, 14.099920272827148, 3.143831253051758, 1.9730987548828125, 4.925285339355469, 16.613540649414062, 7.821371078491211, 7.6668548583984375, 0.3799324035644531, 2.0670547485351562, 5.264030456542969, -2.3928604125976562, 20.49138069152832, 7.462808609008789, 20.600975036621094, 15.245620727539062, 12.76579475402832, 1.4069290161132812, 18.079132080078125, 17.67656707763672, 1.0754852294921875, 16.996719360351562, 7.326269149780273, -0.6632843017578125, 11.054859161376953, 2.5855255126953125, 1.935333251953125, 8.077110290527344, 17.271583557128906, 10.506637573242188, 5.211370468139648, 9.290008544921875, 14.83485221862793, 4.339578628540039, 20.35087013244629, 2.8757705688476562, 7.388637542724609, 0.7641143798828125, -8.658905029296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000613.npy"}
{"epoch": 0.926681783824641, "step": 614, "batch_size": 64, "mean": 4.653756141662598, "std": 5.391396522521973, "min": -10.512340545654297, "p10": -1.310847473144531, "median": 3.994452476501465, "p90": 11.021277618408202, "max": 19.6207275390625, "pos_frac": 0.84375, "sample": [10.079290390014648, 9.964714050292969, 2.263172149658203, 4.252479553222656, 3.333221435546875, 8.945915222167969, 3.703187942504883, 2.3667259216308594, 3.6166419982910156, 2.670398712158203, 9.344711303710938, 6.996896743774414, 11.491256713867188, 7.949909210205078, 6.077728271484375, 4.116498947143555, 8.34128189086914, 7.961814880371094, 15.362495422363281, 11.009384155273438, 0.46860504150390625, 3.20294189453125, -2.7530059814453125, -5.6905059814453125, 19.6207275390625, -1.4342422485351562, 0.41808509826660156, -6.0249176025390625, 7.664939880371094, 4.35992431640625, 0.7226486206054688, 1.622283935546875, 0.4561958312988281, 11.296266555786133, 1.6445999145507812, 3.520242691040039, 10.468921661376953, 3.712127685546875, -10.512340545654297, 5.724372863769531, -1.7896385192871094, 15.9046630859375, 6.9157562255859375, 9.886764526367188, 4.00872802734375, 6.484964370727539, 1.9229316711425781, 4.3482818603515625, 5.834484100341797, 0.3805999755859375, 3.9801769256591797, 3.5945663452148438, -3.5873260498046875, -1.0229263305664062, -0.7708702087402344, 12.967811584472656, 8.79052734375, 3.7520828247070312, 7.672210693359375, 1.5424346923828125, 1.4868640899658203, 6.832942962646484, -0.6576118469238281, 11.026374816894531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000614.npy"}
{"epoch": 0.9281934996220711, "step": 615, "batch_size": 64, "mean": 5.367961883544922, "std": 7.38378381729126, "min": -12.026298522949219, "p10": -3.551348114013672, "median": 5.177490234375, "p90": 15.85850067138672, "max": 19.99530029296875, "pos_frac": 0.75, "sample": [2.0137710571289062, 5.160125732421875, 9.055929183959961, 3.7866363525390625, 8.637374877929688, 12.525379180908203, 3.1726226806640625, -11.198471069335938, 16.43880844116211, 4.115379333496094, -6.185600280761719, 4.716461181640625, -4.675750732421875, 7.109161376953125, 4.300357818603516, -3.5486297607421875, 5.49127197265625, 17.181007385253906, 10.630950927734375, -0.1295623779296875, 10.85211181640625, 2.140899658203125, 15.398292541503906, -1.5944843292236328, 9.681327819824219, 6.0047607421875, -3.5525131225585938, 5.82135009765625, 4.6096038818359375, 16.011444091796875, 11.709129333496094, 2.034116744995117, -1.1924972534179688, -4.658695220947266, 6.6355438232421875, 11.394248962402344, 11.1136474609375, 5.194854736328125, -0.5113067626953125, 14.83319091796875, 15.501632690429688, 19.99530029296875, 11.971538543701172, 5.2356414794921875, 9.21636962890625, 1.3693084716796875, 2.9129638671875, 0.0708465576171875, -0.023101806640625, 14.356842041015625, 5.62213134765625, 17.29095458984375, -2.3199386596679688, 5.570262908935547, -1.7567901611328125, 17.90313720703125, 2.6078109741210938, 8.711109161376953, 16.630565643310547, 1.8580913543701172, 4.093849182128906, -2.0863189697265625, -12.026298522949219, -9.678604125976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000615.npy"}
{"epoch": 0.9297052154195011, "step": 616, "batch_size": 64, "mean": 4.380434513092041, "std": 6.575697422027588, "min": -8.815010070800781, "p10": -2.5779409408569336, "median": 3.6579504013061523, "p90": 13.517446517944338, "max": 20.534194946289062, "pos_frac": 0.703125, "sample": [13.668685913085938, 1.201019287109375, 0.971160888671875, -1.6004257202148438, 9.02730941772461, -1.2733707427978516, 20.534194946289062, 6.5117645263671875, -0.7188758850097656, 4.2127532958984375, 3.8784713745117188, 2.0854339599609375, 1.3409233093261719, -8.815010070800781, 2.3178176879882812, -8.725685119628906, 2.475189208984375, -1.7405681610107422, 0.7580909729003906, 18.7591552734375, 2.2032012939453125, 18.80362892150879, -2.9956417083740234, -2.4575977325439453, 19.164907455444336, -1.757638931274414, -0.34116554260253906, -3.4377517700195312, -0.2154064178466797, 13.283275604248047, 7.299846649169922, 4.28656005859375, 5.197071075439453, 6.1565093994140625, 15.130908966064453, 10.35385513305664, -3.046977996826172, 5.4287109375, 0.296783447265625, 11.679433822631836, 9.33721923828125, 9.093364715576172, 3.709768295288086, 9.689727783203125, 13.617805480957031, 2.376556396484375, 0.9486522674560547, -1.0763587951660156, -3.4293060302734375, 1.7594451904296875, -2.3347434997558594, -2.6295166015625, 5.7930908203125, 3.6061325073242188, 5.306755065917969, 12.389396667480469, 5.003166198730469, 3.9548301696777344, 10.164817810058594, -0.9089851379394531, 8.848882675170898, 12.198295593261719, 4.0910186767578125, -1.0627555847167969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000616.npy"}
{"epoch": 0.9312169312169312, "step": 617, "batch_size": 64, "mean": 4.382899284362793, "std": 6.446293830871582, "min": -10.088726043701172, "p10": -3.1065299987792963, "median": 3.824756622314453, "p90": 12.908609390258789, "max": 22.263275146484375, "pos_frac": 0.734375, "sample": [2.6804046630859375, 2.370685577392578, -3.5093841552734375, 9.450370788574219, 13.604766845703125, 4.798864364624023, 6.5460205078125, 9.788066864013672, 0.16997528076171875, -3.4134292602539062, 3.2994461059570312, 4.777679443359375, 2.0153045654296875, 4.8028106689453125, 1.2510833740234375, 6.548381805419922, 4.999488830566406, 7.2120819091796875, -2.1252288818359375, 2.7440242767333984, 19.0030517578125, 1.8433036804199219, -0.6032447814941406, 12.685752868652344, 4.373771667480469, -1.5657157897949219, 0.3156890869140625, 10.754215240478516, -0.377197265625, 6.253330230712891, 4.382518768310547, -0.16317367553710938, 3.2561721801757812, 12.926021575927734, 6.146240234375, 22.263275146484375, 3.3382797241210938, -3.3835067749023438, -0.2674140930175781, 15.232406616210938, 8.122093200683594, 3.7676544189453125, -2.4602508544921875, -0.8774986267089844, 17.201892852783203, 8.227407455444336, -10.088726043701172, -7.908843994140625, -5.597419738769531, 2.1405792236328125, 6.516273498535156, 12.86798095703125, 4.742332458496094, -3.8620834350585938, 20.71025848388672, 7.538782119750977, 4.9256744384765625, 3.8818588256835938, 7.3105316162109375, 2.647064208984375, 2.4117050170898438, -0.21014404296875, 6.413309097290039, -2.340057373046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000617.npy"}
{"epoch": 0.9327286470143613, "step": 618, "batch_size": 64, "mean": 4.818388938903809, "std": 6.317686080932617, "min": -9.769668579101562, "p10": -2.107361602783203, "median": 3.56949520111084, "p90": 12.649390411376954, "max": 22.039295196533203, "pos_frac": 0.8125, "sample": [17.495086669921875, 12.358078002929688, 5.756740570068359, 7.38214111328125, 0.9753952026367188, 0.9636154174804688, -1.5223884582519531, -2.1722564697265625, 4.41851806640625, 0.04238128662109375, 2.9528141021728516, 10.40899658203125, 5.663642883300781, 6.650543212890625, 22.039295196533203, 1.6802520751953125, 3.1762161254882812, 1.435342788696289, 19.4422607421875, 12.16998291015625, 3.5226974487304688, 20.80712127685547, 1.702646255493164, 7.301094055175781, -2.2277145385742188, 15.355575561523438, -0.10117530822753906, 5.473289489746094, 8.107215881347656, 2.002016067504883, 0.8312225341796875, -0.04754447937011719, 7.373619079589844, 2.5276718139648438, 7.6509552001953125, 6.841888427734375, 9.506454467773438, -1.9559402465820312, 7.358001708984375, 6.7417144775390625, -3.4373226165771484, 0.29668617248535156, -0.41397666931152344, -6.315788269042969, 1.6159439086914062, -4.234809875488281, 0.7883071899414062, -9.769668579101562, 6.395256042480469, 8.15719985961914, 0.8192672729492188, 12.774238586425781, 4.109611511230469, 3.616292953491211, 2.5399017333984375, 2.221019744873047, 1.0983047485351562, 5.8615570068359375, 4.821176528930664, 10.759521484375, 8.614280700683594, 17.261871337890625, -2.3312530517578125, 3.0418167114257812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000618.npy"}
{"epoch": 0.9342403628117913, "step": 619, "batch_size": 64, "mean": 4.809283256530762, "std": 6.717854022979736, "min": -12.818679809570312, "p10": -2.535034942626953, "median": 3.7959556579589844, "p90": 14.436737823486332, "max": 19.049917221069336, "pos_frac": 0.765625, "sample": [6.191764831542969, -2.079315185546875, 1.5285682678222656, 1.5537376403808594, 19.049917221069336, 13.559104919433594, 14.8128662109375, -4.3961029052734375, -6.728057861328125, 15.451080322265625, 8.550613403320312, 2.458282470703125, 10.636138916015625, 1.0477542877197266, 11.308189392089844, -2.0744094848632812, 6.5753631591796875, 5.9356689453125, 0.4042930603027344, 0.1125946044921875, -4.106163024902344, 16.091724395751953, -0.013584136962890625, -2.6849136352539062, -1.822774887084961, 7.656623840332031, 7.328022003173828, 6.320587158203125, 0.2650127410888672, -0.27239227294921875, 3.2785491943359375, -12.818679809570312, -2.7100830078125, 4.36083984375, 11.716995239257812, 13.556842803955078, 1.5907726287841797, 18.25121307373047, 18.15643310546875, -4.7899627685546875, 2.7276268005371094, 12.659515380859375, 6.193515777587891, -1.521677017211914, 16.310745239257812, 10.750831604003906, 1.9252185821533203, 7.3603668212890625, 7.230190277099609, 3.1790428161621094, 3.9749755859375, 5.658363342285156, 9.422142028808594, -2.1853179931640625, 12.859466552734375, 0.6876602172851562, 3.6169357299804688, 9.689815521240234, 0.5741939544677734, 0.6483631134033203, 0.7998504638671875, 6.051979064941406, -0.030029296875, 5.957225799560547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000619.npy"}
{"epoch": 0.9357520786092215, "step": 620, "batch_size": 64, "mean": 4.1958818435668945, "std": 6.545100688934326, "min": -7.49127197265625, "p10": -3.239489364624023, "median": 3.023946762084961, "p90": 14.266804504394534, "max": 20.284835815429688, "pos_frac": 0.703125, "sample": [1.8470230102539062, -4.1798095703125, -3.4348526000976562, 18.10143280029297, -6.5222625732421875, 1.850809097290039, 4.4854583740234375, 15.958084106445312, 2.3693161010742188, 7.516387939453125, 13.504074096679688, 16.010860443115234, 3.3467254638671875, 3.0462188720703125, 2.515106201171875, 11.951850891113281, 3.0016746520996094, 5.048837661743164, -1.418487548828125, 0.9550514221191406, -5.9140625, 1.2527122497558594, -7.49127197265625, 19.05115509033203, -5.6881561279296875, 9.743667602539062, 1.4361190795898438, -0.8760833740234375, 20.284835815429688, -4.2798004150390625, 9.616682052612305, 8.196807861328125, -0.6018581390380859, -2.783641815185547, -0.6662750244140625, 3.1091995239257812, 0.04892921447753906, 5.7111053466796875, 9.617286682128906, 0.3482208251953125, 8.589889526367188, 0.620697021484375, 7.779966354370117, 3.5846481323242188, -0.41011810302734375, -1.795257568359375, 14.59368896484375, 13.151763916015625, -1.1065406799316406, 7.6024169921875, 0.6646232604980469, 15.379844665527344, 4.75323486328125, -0.9333209991455078, -2.711956024169922, -0.2220458984375, 6.406290054321289, -0.7988548278808594, 6.1105804443359375, 4.3834075927734375, 7.75718879699707, 8.420120239257812, 7.671806335449219, 2.9752960205078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000620.npy"}
{"epoch": 0.9372637944066515, "step": 621, "batch_size": 64, "mean": 3.6776556968688965, "std": 6.406687259674072, "min": -10.220237731933594, "p10": -3.3471036911010743, "median": 2.3626365661621094, "p90": 12.44081802368164, "max": 18.91730499267578, "pos_frac": 0.6875, "sample": [3.304584503173828, 1.529052734375, 2.24224853515625, -1.3576240539550781, -5.42803955078125, -10.220237731933594, 6.067207336425781, 15.264007568359375, -3.3557777404785156, -1.6558513641357422, 3.465545654296875, 14.733444213867188, 12.427055358886719, 2.183534622192383, 18.91730499267578, 5.340782165527344, 7.813081741333008, 1.8804168701171875, 5.539493560791016, 9.744903564453125, -1.0641555786132812, 1.87054443359375, 10.073623657226562, -8.364017486572266, 13.669456481933594, 14.34788703918457, 10.538604736328125, -7.62744140625, 1.7747802734375, -1.7965660095214844, 12.083145141601562, 7.87396240234375, 5.7191162109375, 7.44195556640625, 2.4830245971679688, 1.0630340576171875, 5.039070129394531, 2.0712356567382812, 7.258907318115234, -0.315765380859375, -3.326864242553711, 0.7034740447998047, 11.091293334960938, 2.18243408203125, -0.07915687561035156, -0.8917045593261719, 3.7741355895996094, -1.5084037780761719, -5.18426513671875, 4.140895843505859, -0.18808937072753906, 9.264755249023438, 6.254554748535156, 1.3838748931884766, 9.091499328613281, -2.6562061309814453, -1.7553653717041016, 16.3326416015625, -7.4030303955078125, 6.2562103271484375, -1.7872314453125, 0.147125244140625, 4.505147933959961, 12.44671630859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000621.npy"}
{"epoch": 0.9387755102040817, "step": 622, "batch_size": 64, "mean": 4.305281639099121, "std": 6.06695556640625, "min": -7.136177062988281, "p10": -2.2474103927612306, "median": 2.9813232421875, "p90": 12.206280517578131, "max": 21.19948387145996, "pos_frac": 0.796875, "sample": [8.999107360839844, 2.206714630126953, 1.1312408447265625, -7.136177062988281, 6.896587371826172, 1.1968002319335938, 0.6380157470703125, 6.726158142089844, -2.294198989868164, -0.17333984375, 21.19948387145996, 6.532215118408203, 0.7241687774658203, -5.179252624511719, 10.772079467773438, 3.5163822174072266, 6.026576995849609, 5.771583557128906, 7.1732330322265625, 15.773147583007812, 3.095287322998047, 7.371328353881836, -2.8454971313476562, -0.13069725036621094, 17.585784912109375, 2.5556869506835938, 3.539947509765625, 9.181007385253906, 0.9188289642333984, 4.334850311279297, 17.004425048828125, -5.1606597900390625, 7.771982192993164, 1.6250801086425781, 2.0659141540527344, 1.0318355560302734, 13.4158935546875, 10.798301696777344, 8.19516372680664, -0.04864501953125, 10.893791198730469, 4.7108306884765625, 2.318866729736328, -0.4596595764160156, 2.867359161376953, 8.299125671386719, 0.11968994140625, -6.552093505859375, 2.2879371643066406, 3.7181129455566406, 12.768775939941406, 0.7661895751953125, 0.5753974914550781, -2.1382369995117188, 1.6588897705078125, -1.7023849487304688, 19.641647338867188, 0.9755172729492188, -4.136892318725586, 0.5844841003417969, 7.671375274658203, 4.479829788208008, 7.60260009765625, 5.780548095703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000622.npy"}
{"epoch": 0.9402872260015117, "step": 623, "batch_size": 64, "mean": 4.992826461791992, "std": 5.821357250213623, "min": -8.132583618164062, "p10": -2.242012786865234, "median": 5.210376739501953, "p90": 12.083907318115237, "max": 18.89990997314453, "pos_frac": 0.765625, "sample": [5.936546325683594, 2.0441360473632812, -0.6897010803222656, -3.5090980529785156, 3.364177703857422, 12.455718994140625, 7.268836975097656, 9.857559204101562, 7.950531005859375, 10.60797119140625, -1.0129508972167969, 1.8157768249511719, 11.216346740722656, 3.3159408569335938, 1.6516189575195312, 18.89990997314453, -2.3167495727539062, 13.171180725097656, 9.778709411621094, 7.479339599609375, 4.847728729248047, 2.260833740234375, 8.574880599975586, -4.7218170166015625, -3.9244766235351562, 7.9285888671875, 9.583816528320312, 8.695404052734375, -8.132583618164062, 12.755325317382812, 8.604667663574219, 16.139846801757812, 8.95086669921875, 9.01971435546875, -0.16064453125, -4.493797302246094, 6.621437072753906, 1.1893272399902344, 8.875537872314453, 10.3773193359375, 12.833610534667969, 17.422630310058594, 2.918272018432617, -0.47470855712890625, -2.067626953125, 0.40801048278808594, 0.13257598876953125, 4.19243049621582, 0.425750732421875, 6.494550704956055, 5.513580322265625, 10.5111083984375, 6.5761566162109375, 9.110733032226562, -0.4389934539794922, 4.907173156738281, 8.12396240234375, 1.4446525573730469, 3.2166290283203125, 1.0127716064453125, 10.823394775390625, -1.4150962829589844, -2.591888427734375, -1.8165664672851562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000623.npy"}
{"epoch": 0.9417989417989417, "step": 624, "batch_size": 64, "mean": 5.261911392211914, "std": 6.928031921386719, "min": -9.590497970581055, "p10": -2.3471982955932615, "median": 4.107082366943359, "p90": 14.86329803466797, "max": 22.3289794921875, "pos_frac": 0.765625, "sample": [-1.8624114990234375, -4.432807922363281, 7.438833236694336, 9.117435455322266, 20.186214447021484, 16.498945236206055, 0.32665443420410156, 3.3209075927734375, -0.06261444091796875, -2.075166702270508, 2.8659820556640625, 5.338279724121094, 7.676584243774414, 6.444128036499023, 8.577461242675781, 6.15081787109375, 3.0378189086914062, 10.526824951171875, 14.323226928710938, 0.7439765930175781, -0.9736366271972656, 2.6104087829589844, -4.364555358886719, 13.215675354003906, -0.4855518341064453, 6.848978042602539, 0.7923965454101562, -2.4637832641601562, 2.8095703125, 2.4313201904296875, 14.185684204101562, 15.094757080078125, 11.117721557617188, 13.160552978515625, 4.009311676025391, -3.6839141845703125, 17.08253288269043, 10.800983428955078, 22.3289794921875, 10.324928283691406, 8.50566291809082, 7.664340972900391, 9.092592239379883, 2.9859237670898438, -2.4780044555664062, 7.6499176025390625, 19.809555053710938, 0.003475189208984375, -1.272390365600586, 4.204853057861328, 7.635978698730469, 0.5213565826416016, -1.7608184814453125, 0.06407546997070312, -9.590497970581055, 11.167966842651367, 0.0892791748046875, -0.9557418823242188, 5.433685302734375, 0.44800567626953125, -7.123769760131836, 1.5987434387207031, 8.64605712890625, 15.438636779785156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000624.npy"}
{"epoch": 0.9433106575963719, "step": 625, "batch_size": 64, "mean": 3.8421425819396973, "std": 6.525014877319336, "min": -9.056516647338867, "p10": -3.5658695220947263, "median": 2.955197334289551, "p90": 13.040343093872071, "max": 19.49700927734375, "pos_frac": 0.71875, "sample": [2.595132827758789, 12.74114990234375, 2.496112823486328, -0.3448638916015625, -3.058462142944336, 7.243144989013672, 4.512540817260742, 2.2422256469726562, 4.834924697875977, 14.731216430664062, 0.7723960876464844, -1.8184394836425781, 8.452646255493164, 1.1082038879394531, 12.990646362304688, 1.324859619140625, 19.49700927734375, 4.026710510253906, -3.6249656677246094, 1.015462875366211, 8.079994201660156, 13.061641693115234, 0.30905914306640625, -7.4112091064453125, -0.4804229736328125, 3.5575218200683594, 3.3296737670898438, 9.561592102050781, -0.6754741668701172, 3.3152618408203125, -1.8544120788574219, 13.337173461914062, 12.39727783203125, -0.20505905151367188, 6.396640777587891, -1.7640228271484375, -3.8057308197021484, 17.35064697265625, 0.9543533325195312, 1.6341323852539062, 8.42999267578125, 18.668197631835938, 10.586906433105469, 5.89299201965332, 3.9107437133789062, 0.71490478515625, -8.560218811035156, -3.427978515625, -2.083690643310547, 5.761199951171875, -0.408782958984375, -6.61962890625, -9.056516647338867, 16.05880355834961, 5.678287506103516, 0.2864551544189453, 6.2103271484375, 1.8143081665039062, 7.654390335083008, -5.177314758300781, 2.4115333557128906, 10.29847526550293, 4.622079849243164, 3.405364990234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000625.npy"}
{"epoch": 0.9448223733938019, "step": 626, "batch_size": 64, "mean": 5.699305534362793, "std": 8.012341499328613, "min": -8.447620391845703, "p10": -4.953912734985352, "median": 4.87178897857666, "p90": 16.906714630126956, "max": 25.611236572265625, "pos_frac": 0.78125, "sample": [13.160110473632812, 5.529693603515625, 10.518142700195312, -5.055438995361328, 20.667816162109375, 14.516382217407227, 17.584197998046875, -6.4426422119140625, 3.866922378540039, 14.850332260131836, 0.8927421569824219, 1.015838623046875, 2.7905502319335938, 0.5625476837158203, -5.149688720703125, 20.815879821777344, 7.836296081542969, -8.447620391845703, 16.509925842285156, 2.6064910888671875, -2.794219970703125, 0.196502685546875, 5.399209976196289, 19.686904907226562, 4.123384475708008, 1.051961898803711, 5.468654632568359, 7.9935150146484375, -8.153640747070312, 25.611236572265625, 5.238685607910156, -2.031881332397461, 12.025215148925781, 8.8480224609375, 4.429662704467773, -7.843254089355469, -1.8324432373046875, 8.448719024658203, 0.7146244049072266, 4.960357666015625, 7.445049285888672, 2.5268478393554688, 15.4105224609375, 3.653423309326172, -3.8163299560546875, 4.373096466064453, 2.5545406341552734, 14.05868911743164, 11.222515106201172, 6.894950866699219, 13.171646118164062, 0.5491676330566406, -6.987422943115234, 8.3167724609375, -4.717018127441406, 13.688949584960938, 4.783220291137695, 17.4510498046875, 11.913993835449219, 5.925666809082031, 17.076766967773438, -2.776914596557617, 3.7838821411132812, -1.9171905517578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000626.npy"}
{"epoch": 0.9463340891912321, "step": 627, "batch_size": 64, "mean": 6.097313404083252, "std": 7.573387622833252, "min": -10.507011413574219, "p10": -2.1748523712158203, "median": 4.551225662231445, "p90": 17.015972709655767, "max": 22.909164428710938, "pos_frac": 0.796875, "sample": [8.198116302490234, 4.010082244873047, 11.596366882324219, 11.89404296875, 4.457645416259766, 10.214733123779297, 20.40345001220703, 17.815109252929688, 14.9254150390625, 2.719207763671875, -5.232643127441406, 12.900436401367188, 21.58652114868164, 8.396347045898438, 5.782585144042969, -1.242706298828125, 0.7413406372070312, 4.80950927734375, 20.250839233398438, 1.5148162841796875, 12.177146911621094, 3.8504581451416016, 1.7438812255859375, 2.827972412109375, -1.0118255615234375, -7.1986083984375, 14.602071762084961, -1.1731033325195312, 11.230962753295898, -0.14984512329101562, 2.4645252227783203, 19.81537628173828, 2.41259765625, -0.23519134521484375, 4.644805908203125, 5.695899963378906, -2.146453857421875, 2.7291336059570312, 0.7818737030029297, 10.074901580810547, 22.909164428710938, 7.386100769042969, -2.187023162841797, -3.1675148010253906, -4.4873809814453125, 3.0246353149414062, 1.6639328002929688, 7.633512496948242, 3.565399169921875, 14.615371704101562, 5.278482437133789, -10.507011413574219, 2.2078933715820312, 9.37266731262207, 15.615060806274414, 4.119354248046875, 2.2420578002929688, -8.971717834472656, 9.53173828125, 1.3672409057617188, 8.30909538269043, 14.057294845581055, 10.15554428100586, 17.616363525390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000627.npy"}
{"epoch": 0.9478458049886621, "step": 628, "batch_size": 64, "mean": 6.269933700561523, "std": 7.955796718597412, "min": -16.030601501464844, "p10": -2.3106456756591793, "median": 6.549315452575684, "p90": 16.0401424407959, "max": 23.694299697875977, "pos_frac": 0.796875, "sample": [23.694299697875977, 2.7283935546875, 10.862335205078125, 9.286064147949219, -3.4622535705566406, -1.544586181640625, 11.206451416015625, 1.8432159423828125, 15.405887603759766, 10.083038330078125, 21.73615264892578, 20.397384643554688, 9.960662841796875, 2.8144359588623047, 7.165777206420898, 1.8837013244628906, -16.030601501464844, -11.912801742553711, 11.220474243164062, 1.690399169921875, 13.02232551574707, 7.54931640625, 0.6245326995849609, 0.943450927734375, 9.726486206054688, 12.402084350585938, 4.752410888671875, 11.073028564453125, 5.53204345703125, 17.60810089111328, 7.776723861694336, 10.491607666015625, -0.6336212158203125, 17.801794052124023, 4.126369476318359, 14.986373901367188, -2.4141998291015625, 0.7869834899902344, 8.400077819824219, -1.5394058227539062, 8.925823211669922, 4.691795349121094, 5.048830032348633, 11.070858001708984, 5.932853698730469, 11.066810607910156, 18.84366798400879, 11.626222610473633, 13.26077651977539, 11.295528411865234, 3.795320510864258, -0.3494873046875, 1.2695980072021484, -8.202335357666016, 3.8018741607666016, 16.311965942382812, 1.931793212890625, 9.030017852783203, -0.4898529052734375, 3.6704483032226562, -3.7274322509765625, 13.013717651367188, -2.069019317626953, -10.518936157226562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000628.npy"}
{"epoch": 0.9493575207860923, "step": 629, "batch_size": 64, "mean": 5.028815746307373, "std": 6.043740272521973, "min": -5.543970108032227, "p10": -1.3943542480468747, "median": 3.220947265625, "p90": 13.90075454711914, "max": 22.54846954345703, "pos_frac": 0.796875, "sample": [-0.33774566650390625, 2.7000961303710938, -5.543970108032227, 5.473194122314453, 4.257379531860352, 2.4276199340820312, 0.101654052734375, 8.155445098876953, 2.235088348388672, 2.7664947509765625, 3.1658859252929688, 5.171974182128906, 2.454954147338867, 22.54846954345703, 1.1158638000488281, -1.1535797119140625, 4.800752639770508, 5.169345855712891, 0.3851318359375, 13.773345947265625, -0.47849273681640625, -1.4975433349609375, -1.6267375946044922, -0.15119171142578125, -2.9681129455566406, 1.2828788757324219, 13.856689453125, 9.8677978515625, 0.4877490997314453, 15.239570617675781, 7.70111083984375, -2.877655029296875, 6.661445617675781, 6.057657241821289, 7.375057220458984, 4.5839996337890625, 8.457216262817383, 2.214385986328125, 1.4484672546386719, -0.595184326171875, -0.57855224609375, 1.5559234619140625, 13.415695190429688, 1.7897377014160156, 1.2480087280273438, 2.6544113159179688, 3.2760086059570312, 8.982574462890625, 7.25050163269043, 7.497762680053711, 12.981376647949219, 0.9591865539550781, 16.74657440185547, 2.437532424926758, 8.375762939453125, 11.821090698242188, -2.4278182983398438, 17.892051696777344, 17.149211883544922, 15.249038696289062, 4.291015625, 13.919639587402344, -3.2519454956054688, 3.9029159545898438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000629.npy"}
{"epoch": 0.9508692365835223, "step": 630, "batch_size": 64, "mean": 5.478397846221924, "std": 6.817183017730713, "min": -4.7367706298828125, "p10": -2.8893598556518554, "median": 4.602316856384277, "p90": 14.988972473144536, "max": 23.08270263671875, "pos_frac": 0.734375, "sample": [-4.7367706298828125, 5.5971527099609375, 1.4404869079589844, -0.82208251953125, 10.481721878051758, 15.42901611328125, 4.66156005859375, -0.2905921936035156, 22.1512451171875, 13.962203979492188, -0.6390647888183594, 10.83358383178711, -3.42413330078125, 15.550521850585938, -0.8057975769042969, 12.453310012817383, -4.626377105712891, 5.760826110839844, 9.502395629882812, 8.37628173828125, 1.10076904296875, 2.8678512573242188, 8.378562927246094, 10.8514404296875, 16.110580444335938, 7.321891784667969, -4.042877197265625, 4.543073654174805, -4.58746337890625, 2.8555068969726562, -0.41721343994140625, 10.416862487792969, -0.3644905090332031, 15.786026000976562, 8.422683715820312, 2.8130111694335938, 12.355602264404297, 1.0521621704101562, 4.8029937744140625, 3.3086700439453125, 23.08270263671875, 9.29102897644043, 13.46742057800293, -3.119548797607422, 20.46428680419922, 10.106422424316406, 3.2068099975585938, 4.3510284423828125, 6.190547943115234, -1.516876220703125, 0.6935653686523438, -1.6479415893554688, -0.4958763122558594, 11.698692321777344, 5.9008331298828125, -2.785951614379883, 1.8386306762695312, 0.9303016662597656, 4.966178894042969, 13.199222564697266, -2.9336776733398438, 6.113555908203125, 2.5212669372558594, 0.6636943817138672], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000630.npy"}
{"epoch": 0.9523809523809523, "step": 631, "batch_size": 64, "mean": 6.156581878662109, "std": 8.057750701904297, "min": -12.729721069335938, "p10": -2.8795057296752926, "median": 5.04863166809082, "p90": 17.07966690063477, "max": 25.870452880859375, "pos_frac": 0.78125, "sample": [10.730396270751953, 1.8538818359375, 3.404541015625, 10.125022888183594, 7.543874740600586, 14.953681945800781, 13.091354370117188, 6.209861755371094, 0.3556556701660156, 2.6313552856445312, -12.729721069335938, -4.139135360717773, 6.8212890625, 0.6113967895507812, -1.1748275756835938, 1.8915977478027344, -2.581209182739258, 2.5834999084472656, 8.400108337402344, 18.309707641601562, 19.838455200195312, -1.3007354736328125, 24.58667755126953, 15.191741943359375, 11.92572021484375, 8.944877624511719, 1.5649337768554688, 18.543487548828125, -1.3931617736816406, 4.094198226928711, 2.5627822875976562, 2.1961822509765625, -6.02520751953125, 5.228851318359375, 7.303157806396484, 11.916793823242188, 9.408153533935547, 6.18017578125, 25.870452880859375, 14.083534240722656, 2.6354598999023438, -7.624691009521484, -0.7845306396484375, 7.724912643432617, -0.07390594482421875, -9.107242584228516, 3.3260440826416016, 4.868412017822266, -5.996795654296875, 4.3787841796875, -3.0073471069335938, 12.073348999023438, 2.394500732421875, 16.270126342773438, -2.1959896087646484, 12.008842468261719, 3.7504196166992188, 17.426612854003906, 6.240467071533203, 15.4130859375, 5.790962219238281, 18.717323303222656, 4.431301116943359, 15.747737884521484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000631.npy"}
{"epoch": 0.9538926681783825, "step": 632, "batch_size": 64, "mean": 6.329607963562012, "std": 6.630578517913818, "min": -9.195663452148438, "p10": -2.1341253280639645, "median": 6.620622634887695, "p90": 14.176540756225586, "max": 19.76868438720703, "pos_frac": 0.828125, "sample": [9.801223754882812, 12.150253295898438, 2.12017822265625, 1.3454627990722656, 10.208599090576172, 14.21567153930664, -0.28116607666015625, 14.085235595703125, 0.09016036987304688, 10.515037536621094, 1.7336883544921875, -2.000732421875, 6.7541046142578125, -1.2049674987792969, -2.191293716430664, 13.462055206298828, 12.044586181640625, 2.5724334716796875, 13.895856857299805, -7.497740745544434, 3.9523086547851562, 2.9472827911376953, -6.536405563354492, 6.9102020263671875, 2.2569580078125, 12.709762573242188, 3.057373046875, 11.209190368652344, 6.557361602783203, 6.515350341796875, 15.054000854492188, 19.11279296875, 4.6431884765625, -1.3839263916015625, 9.481063842773438, 7.327085494995117, 13.541824340820312, 5.653162002563477, 11.320777893066406, 7.062189102172852, 11.0538330078125, 12.659656524658203, 4.559043884277344, 1.995269775390625, 2.191102981567383, 14.289241790771484, 6.6838836669921875, 2.9122371673583984, 8.675178527832031, 11.200075149536133, 19.76868438720703, -9.195663452148438, 11.345199584960938, 17.054119110107422, -3.2686004638671875, 1.9573287963867188, 16.09845733642578, 11.084281921386719, 8.409622192382812, 3.826498031616211, 4.375572204589844, 2.7162322998046875, -6.200756072998047, -2.3058013916015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000632.npy"}
{"epoch": 0.9554043839758125, "step": 633, "batch_size": 64, "mean": 4.950506210327148, "std": 7.073336601257324, "min": -10.206146240234375, "p10": -2.4295482635498042, "median": 3.8732995986938477, "p90": 14.933204650878912, "max": 23.348663330078125, "pos_frac": 0.734375, "sample": [0.5033226013183594, 4.791847229003906, 7.251121520996094, 8.3509521484375, 9.771331787109375, 4.752532958984375, 4.75177001953125, 2.206085205078125, 0.9763298034667969, 4.690116882324219, 0.697479248046875, -1.8246097564697266, 17.642288208007812, 2.300657272338867, -2.562786102294922, 9.490474700927734, 0.9024581909179688, 0.8217620849609375, 2.9122543334960938, -2.1186599731445312, 2.948984146118164, -0.9227561950683594, 10.6544189453125, 12.449836730957031, 9.200153350830078, 6.889068603515625, -0.8951511383056641, 0.1273040771484375, 5.677141189575195, 7.593132019042969, 7.8737030029296875, 0.1794891357421875, 7.894037246704102, 1.5866775512695312, -1.7272872924804688, 5.9651947021484375, -4.071258544921875, -4.024223327636719, -1.1450634002685547, -4.327384948730469, -0.5732955932617188, -0.018840789794921875, 0.5752010345458984, 23.348663330078125, 8.430335998535156, -0.17818832397460938, 5.673593521118164, -6.224111557006836, 10.831535339355469, 15.487579345703125, 16.728546142578125, 8.520748138427734, 13.375473022460938, 13.43603515625, 3.0564823150634766, -6.174501419067383, 2.8096580505371094, -1.6441688537597656, 13.639663696289062, 18.344707489013672, 21.618011474609375, -10.206146240234375, 16.331039428710938, 11.411651611328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000633.npy"}
{"epoch": 0.9569160997732427, "step": 634, "batch_size": 64, "mean": 3.03094220161438, "std": 6.953175067901611, "min": -8.144927978515625, "p10": -4.764222717285156, "median": 1.3300409317016602, "p90": 13.32864990234375, "max": 19.46735382080078, "pos_frac": 0.578125, "sample": [-0.1288299560546875, -2.667266845703125, 2.365467071533203, 18.57684326171875, 13.332183837890625, -3.4386940002441406, -4.538871765136719, -0.9679355621337891, -0.9383163452148438, -3.329376220703125, 1.45648193359375, 0.33196258544921875, -1.9354934692382812, 1.2035999298095703, -0.119049072265625, 3.3449783325195312, 6.969532012939453, 3.4101696014404297, -0.8106231689453125, -1.4985084533691406, -6.270454406738281, -0.5291099548339844, 8.958099365234375, -6.643882751464844, 19.46735382080078, -1.1701946258544922, 13.730960845947266, 8.795951843261719, 14.798446655273438, -3.5548973083496094, -4.860801696777344, 7.860326766967773, 2.377368927001953, -4.050178527832031, 11.914478302001953, 0.8580188751220703, -6.977912902832031, -5.637794494628906, 17.17456817626953, 10.622190475463867, 9.42236328125, -2.40850830078125, -6.586090087890625, 5.097042083740234, 2.51751708984375, -8.144927978515625, -0.12821006774902344, 2.1898136138916016, 18.929527282714844, 2.9305782318115234, -2.6168899536132812, -1.4531478881835938, 4.629737854003906, 0.2174835205078125, 4.879245758056641, 2.254467010498047, 10.822349548339844, 9.987163543701172, 8.297920227050781, 9.8271484375, -1.3253860473632812, 3.0721263885498047, 0.7677841186523438, 13.320404052734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000634.npy"}
{"epoch": 0.9584278155706727, "step": 635, "batch_size": 64, "mean": 4.126021385192871, "std": 7.79518985748291, "min": -10.125274658203125, "p10": -5.54085693359375, "median": 2.8756103515625, "p90": 14.015221023559572, "max": 22.65068817138672, "pos_frac": 0.640625, "sample": [-1.8224525451660156, 10.460990905761719, -7.67158317565918, -0.5251255035400391, 11.027763366699219, 2.6759262084960938, -4.543712615966797, -7.639593124389648, -5.38739013671875, 16.948463439941406, 1.3640594482421875, 10.000129699707031, 0.338104248046875, -3.075439453125, 9.925796508789062, 14.165771484375, 13.315923690795898, 3.1401519775390625, 14.889892578125, 9.799003601074219, -10.125274658203125, -0.9650726318359375, -1.06939697265625, 19.925575256347656, 0.05743408203125, -1.0375747680664062, 8.51348876953125, -0.5534286499023438, -1.2014045715332031, 3.9979476928710938, 1.388702392578125, -4.705867767333984, 13.663936614990234, 3.0752944946289062, -1.223785400390625, 4.935596466064453, 13.510440826416016, -0.33361053466796875, 12.357475280761719, -6.908176422119141, 10.605323791503906, 18.632164001464844, -1.2545928955078125, 20.97858428955078, 3.8123626708984375, -5.60662841796875, 8.934959411621094, 7.777336120605469, 11.282554626464844, -2.95574951171875, 5.1430511474609375, 22.65068817138672, 2.45404052734375, 4.085716247558594, -7.649986267089844, 0.5094833374023438, 10.288619995117188, 2.028228759765625, 1.353759765625, 7.596784591674805, -7.423423767089844, 7.3816375732421875, 4.3273773193359375, -1.5759029388427734], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000635.npy"}
{"epoch": 0.9599395313681028, "step": 636, "batch_size": 64, "mean": 5.553957939147949, "std": 6.146595001220703, "min": -7.772102355957031, "p10": -0.8870628356933592, "median": 5.191112518310547, "p90": 13.996430969238283, "max": 20.716461181640625, "pos_frac": 0.84375, "sample": [10.236412048339844, -0.7399673461914062, 5.702060699462891, 1.1870269775390625, 7.619209289550781, 6.194404602050781, 9.344348907470703, -0.950103759765625, 0.8366546630859375, 13.456039428710938, 5.94342041015625, 0.33853721618652344, 14.22802734375, 8.781349182128906, 6.975639343261719, 2.7463455200195312, 3.777740478515625, 3.5423049926757812, 3.448760986328125, 5.2007904052734375, 4.472175598144531, 3.8442611694335938, 0.6806831359863281, 9.224475860595703, 3.0931529998779297, 2.971590042114258, 20.694259643554688, 0.24225997924804688, -0.5601463317871094, 7.0114288330078125, 2.749706268310547, 5.702091217041016, 19.569625854492188, 5.3855133056640625, -1.4351119995117188, 9.045391082763672, 5.291728973388672, 9.244409561157227, -2.329833984375, 11.961441040039062, 7.095069885253906, 14.699344635009766, -0.6012496948242188, 20.716461181640625, 0.9053916931152344, -7.772102355957031, 17.102142333984375, 5.181434631347656, 4.890336990356445, 10.869335174560547, 8.055191040039062, 3.0220718383789062, 9.546913146972656, 18.799118041992188, 6.717529296875, -6.014984130859375, -7.640098571777344, 1.470947265625, 2.501241683959961, 9.472131729125977, 2.5675506591796875, -1.0366439819335938, 4.496086120605469, 5.6819915771484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000636.npy"}
{"epoch": 0.9614512471655329, "step": 637, "batch_size": 64, "mean": 4.564568996429443, "std": 7.5405497550964355, "min": -14.709808349609375, "p10": -3.0938873291015625, "median": 3.313802719116211, "p90": 17.554279708862314, "max": 20.92493438720703, "pos_frac": 0.734375, "sample": [3.0807037353515625, 10.106857299804688, 3.771453857421875, 20.580520629882812, 0.48683929443359375, 3.368438720703125, -2.1660308837890625, 6.272005081176758, 12.0452880859375, 3.259166717529297, -0.7848682403564453, 3.1593017578125, 7.221488952636719, -3.0391464233398438, 4.019561767578125, 6.798652648925781, -0.13176727294921875, 6.97711181640625, 18.358173370361328, 2.037790298461914, 1.1360626220703125, 2.4270095825195312, 1.73284912109375, -0.1579742431640625, -0.3439521789550781, 6.549797058105469, -1.4163818359375, 19.783374786376953, 15.01800537109375, 20.92493438720703, 19.934242248535156, 0.0467987060546875, -8.253067016601562, 8.83209228515625, -3.1173477172851562, 4.3155517578125, -8.474472045898438, 4.043285369873047, 5.26580810546875, 12.072021484375, 1.5215606689453125, 11.352752685546875, -0.46549034118652344, 3.2475967407226562, 10.681747436523438, 3.6050949096679688, -3.1431350708007812, 5.152130126953125, -1.0749397277832031, 0.761505126953125, 2.212615966796875, 20.252761840820312, 4.421291351318359, 5.853782653808594, 3.1016387939453125, -8.951263427734375, 15.67852783203125, 5.50633430480957, -0.629486083984375, -14.709808349609375, 4.26568603515625, 18.648162841796875, -3.5247268676757812, 2.6278762817382812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000637.npy"}
{"epoch": 0.9629629629629629, "step": 638, "batch_size": 64, "mean": 5.6766815185546875, "std": 6.893887042999268, "min": -7.830543518066406, "p10": -2.0695220947265613, "median": 5.031951904296875, "p90": 15.438639450073243, "max": 20.81201934814453, "pos_frac": 0.765625, "sample": [0.1390838623046875, 11.812309265136719, 13.66314697265625, 0.9456748962402344, -7.830543518066406, 1.322723388671875, 17.98250389099121, 4.823333740234375, 0.7130203247070312, -2.510845184326172, 2.647857666015625, -2.689075469970703, 0.5224456787109375, 4.4559783935546875, 1.8277816772460938, 12.715682983398438, -6.444007873535156, -2.8708572387695312, 11.811439514160156, 1.6574172973632812, 7.5796356201171875, 6.334281921386719, 10.410030364990234, 9.883636474609375, -7.1773529052734375, 19.780136108398438, 8.1658935546875, 15.92022705078125, 0.8849964141845703, 3.5110931396484375, 4.600912094116211, 15.98857307434082, 9.044036865234375, 15.142154693603516, 5.240570068359375, -0.5698413848876953, 1.89471435546875, 10.513298034667969, 6.3880462646484375, 12.95041275024414, 9.894332885742188, 9.43392562866211, 10.740821838378906, -1.0397682189941406, 16.293014526367188, -0.4499244689941406, -0.05438423156738281, 14.135032653808594, 1.5214920043945312, 11.159442901611328, -0.1862812042236328, 6.1828155517578125, 14.24371337890625, 5.4673919677734375, -4.583972930908203, 7.034507751464844, -0.3831138610839844, 20.81201934814453, 15.565704345703125, 1.5353202819824219, 1.0660400390625, -0.9638290405273438, -1.0216140747070312, 5.724401473999023], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000638.npy"}
{"epoch": 0.9644746787603931, "step": 639, "batch_size": 64, "mean": 5.211551189422607, "std": 6.874271392822266, "min": -7.3587493896484375, "p10": -2.913045501708984, "median": 4.728847503662109, "p90": 14.278917312622072, "max": 23.076526641845703, "pos_frac": 0.75, "sample": [7.68589973449707, 8.47857666015625, 3.4573440551757812, 7.216758728027344, 7.062385559082031, 7.402336120605469, 3.7033004760742188, -5.6798858642578125, 0.29296302795410156, 10.166805267333984, 0.1104888916015625, 4.834663391113281, 22.696136474609375, 4.6230316162109375, 8.700199127197266, -0.2083282470703125, 5.315881729125977, -1.7505302429199219, -6.7672576904296875, 7.059814453125, 3.57904052734375, 13.907947540283203, 1.5377559661865234, 11.605010986328125, -3.800079345703125, 12.436943054199219, 4.2184906005859375, 10.736717224121094, 0.0600433349609375, 14.65850830078125, 16.546043395996094, -3.711423873901367, 0.7585296630859375, 7.289207458496094, -0.8794937133789062, 2.0107154846191406, 6.026695251464844, 1.9418144226074219, 23.076526641845703, 10.859687805175781, 11.848676681518555, 2.9062042236328125, 14.437904357910156, 7.0534515380859375, -2.20941162109375, 1.6534919738769531, -1.5616607666015625, -1.3892364501953125, 6.540626525878906, -0.4653739929199219, 15.266807556152344, 12.326501846313477, -1.1160507202148438, 6.185943603515625, 2.837249755859375, -7.3587493896484375, 1.0697174072265625, -2.5037384033203125, -3.0884628295898438, 20.744918823242188, -3.328563690185547, 6.211830139160156, 8.715957641601562, 11.501976013183594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000639.npy"}
{"epoch": 0.9659863945578231, "step": 640, "batch_size": 64, "mean": 4.130763053894043, "std": 6.82029390335083, "min": -17.439952850341797, "p10": -3.1036706924438473, "median": 4.70854377746582, "p90": 12.025573921203614, "max": 16.60477638244629, "pos_frac": 0.78125, "sample": [7.676998138427734, 9.811683654785156, 10.807327270507812, -10.531148910522461, 0.712677001953125, 5.4900360107421875, 10.123495101928711, 2.0582275390625, 2.845306396484375, 13.646644592285156, 0.5055313110351562, 0.7408599853515625, 11.63395881652832, 6.272735595703125, -7.6327056884765625, 5.4557037353515625, 7.172677993774414, 4.870372772216797, 1.9493350982666016, 15.642305374145508, -17.439952850341797, 3.0257110595703125, 8.918045043945312, 0.5940780639648438, -2.81451416015625, -2.873960494995117, 8.550636291503906, 0.5705413818359375, -5.338535308837891, 2.2924270629882812, 13.048528671264648, -0.190277099609375, 2.026641845703125, 11.465614318847656, 10.202247619628906, -0.0832366943359375, -0.956451416015625, 3.2945632934570312, 4.893310546875, 6.653587341308594, 0.578948974609375, 12.193408966064453, 1.5435562133789062, 7.249359130859375, 13.435798645019531, 7.016914367675781, 4.546714782714844, -14.859504699707031, 3.8029308319091797, 8.841255187988281, 7.599494934082031, 15.326972961425781, 3.6076698303222656, 9.370147705078125, -1.2080154418945312, -1.5235767364501953, 16.60477638244629, -5.033172607421875, -3.202117919921875, 7.709774017333984, 0.1448822021484375, 9.951156616210938, 6.280363082885742, 9.300086975097656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000640.npy"}
{"epoch": 0.9674981103552532, "step": 641, "batch_size": 64, "mean": 4.176577091217041, "std": 6.291577339172363, "min": -14.65008544921875, "p10": -1.2035106658935546, "median": 3.8689517974853516, "p90": 11.981729125976564, "max": 24.12969970703125, "pos_frac": 0.796875, "sample": [-3.4326133728027344, -8.468208312988281, 8.144922256469727, 5.36790657043457, 7.100555419921875, 0.21656036376953125, 8.956951141357422, 3.919036865234375, -1.1161003112792969, 0.3108482360839844, 3.3256683349609375, 2.7364768981933594, 6.316928863525391, 15.916275024414062, 1.836273193359375, 1.8740100860595703, -0.3214111328125, 10.007118225097656, 14.273855209350586, 6.281803131103516, 8.95492172241211, -1.1803512573242188, 0.6572494506835938, -0.12741851806640625, 7.779350280761719, 12.048736572265625, 4.236968994140625, 4.3613433837890625, 6.432228088378906, 7.313179016113281, -0.8553619384765625, 11.82537841796875, 0.4427642822265625, 4.087945938110352, -1.2134361267089844, -10.560554504394531, 3.470989227294922, 4.4204254150390625, -2.2815704345703125, 1.5177841186523438, -14.65008544921875, 16.242454528808594, 5.4883575439453125, 0.264617919921875, 4.3618927001953125, 15.252704620361328, 0.40634918212890625, -0.1582183837890625, 1.925802230834961, 4.58746337890625, 24.12969970703125, 8.701496124267578, 14.853250503540039, 6.350444793701172, -2.375701904296875, 3.2983627319335938, 7.309654235839844, 1.5569343566894531, 3.818866729736328, 3.1029052734375, 2.5861282348632812, 0.1531524658203125, 6.233371734619141, 9.283605575561523], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000641.npy"}
{"epoch": 0.9690098261526833, "step": 642, "batch_size": 64, "mean": 3.6544651985168457, "std": 7.205263614654541, "min": -17.530029296875, "p10": -4.789932823181152, "median": 2.5953140258789062, "p90": 13.694445037841799, "max": 17.621078491210938, "pos_frac": 0.65625, "sample": [13.901695251464844, 5.33447265625, 1.1160659790039062, -3.3090972900390625, 3.6203842163085938, -2.2615413665771484, 6.931251525878906, 15.551712036132812, 0.080047607421875, -17.530029296875, 12.967742919921875, 1.4485969543457031, 2.6241302490234375, -2.4550857543945312, 8.545806884765625, -0.0254364013671875, 9.923797607421875, 17.621078491210938, -2.929044723510742, -6.469886779785156, 3.0684661865234375, 6.2197113037109375, 5.666587829589844, -2.008380889892578, 12.692283630371094, 7.639686584472656, 16.72881317138672, 1.3661346435546875, 13.210861206054688, 2.1420669555664062, 4.949413299560547, 6.9355316162109375, 9.036872863769531, 0.7982597351074219, -2.0498085021972656, 14.65740966796875, -5.8987579345703125, 2.4134368896484375, -0.5680160522460938, -4.0641632080078125, -0.7090873718261719, 2.566497802734375, 0.10203742980957031, -1.8949661254882812, 8.250080108642578, 4.795770645141602, -6.969970703125, 9.851612091064453, 13.158004760742188, 17.40509796142578, 2.0880126953125, -2.2204818725585938, 9.803766250610352, 9.544769287109375, 16.97446632385254, 4.663143157958984, -2.82208251953125, 4.770301818847656, -5.100976943969727, -6.201641082763672, -0.3357086181640625, -2.0294189453125, -6.3085174560546875, 6.881980895996094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000642.npy"}
{"epoch": 0.9705215419501134, "step": 643, "batch_size": 64, "mean": 3.590852737426758, "std": 8.527931213378906, "min": -39.03070068359375, "p10": -5.293507385253906, "median": 3.722219467163086, "p90": 13.890299797058107, "max": 19.26318359375, "pos_frac": 0.71875, "sample": [1.364654541015625, 5.098453521728516, 3.5642318725585938, -6.8695526123046875, 1.5726146697998047, 15.85711669921875, 3.1891937255859375, 8.861244201660156, 10.344650268554688, 1.5954513549804688, -2.1980133056640625, 3.1254520416259766, 5.563240051269531, 0.7739410400390625, 5.651603698730469, -39.03070068359375, -1.1740188598632812, 0.014719009399414062, 14.083398818969727, 19.26318359375, 5.072486877441406, 6.111061096191406, 9.866065979003906, 2.8129425048828125, -0.911224365234375, 16.84747314453125, 3.5587158203125, -5.347198486328125, 18.744861602783203, 11.711761474609375, 7.978492736816406, 5.318309783935547, -0.192596435546875, 8.012142181396484, 3.880207061767578, -0.29337310791015625, 1.3445510864257812, 3.8942947387695312, 15.725807189941406, 10.532487869262695, 10.956274032592773, -7.603172302246094, 7.9697418212890625, -4.5453338623046875, 7.6511993408203125, -5.711639404296875, -6.097652435302734, -11.662689208984375, 2.7350921630859375, 4.642751693725586, -0.5723342895507812, 4.227142333984375, 14.168914794921875, 7.257171630859375, 3.9312896728515625, -5.1682281494140625, -2.29974365234375, 13.439735412597656, 3.4520111083984375, 3.1362228393554688, -2.4142723083496094, 9.636871337890625, 7.89361572265625, -0.526519775390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000643.npy"}
{"epoch": 0.9720332577475435, "step": 644, "batch_size": 64, "mean": 4.016593933105469, "std": 7.590508460998535, "min": -14.39764404296875, "p10": -5.010082626342773, "median": 3.7012710571289062, "p90": 14.088900756835942, "max": 19.046432495117188, "pos_frac": 0.703125, "sample": [9.198280334472656, -0.7607669830322266, -14.333942413330078, 5.138698577880859, -2.2657432556152344, -5.120761871337891, 3.637542724609375, 11.624277114868164, 14.81635856628418, 13.121711730957031, 5.589191436767578, -1.0330982208251953, 11.2625732421875, 11.815837860107422, 6.3312835693359375, 2.0241165161132812, 2.2415695190429688, 2.0803298950195312, 1.3083267211914062, 16.683086395263672, -0.7533607482910156, 11.451774597167969, -10.436485290527344, -0.843994140625, -3.5389556884765625, 8.836990356445312, -0.4601402282714844, 5.588809967041016, 0.349853515625, -6.0855255126953125, 10.876911163330078, -7.17108154296875, 3.9536800384521484, 9.088348388671875, 13.031364440917969, -3.350004196166992, 3.21612548828125, 18.526756286621094, 14.503410339355469, 3.0320663452148438, 6.088630676269531, 9.89144515991211, 3.7649993896484375, 2.844724655151367, -0.8656387329101562, -14.39764404296875, -0.7527751922607422, 13.043006896972656, 2.121461868286133, 15.072772979736328, 0.9207916259765625, 4.1235809326171875, -0.30438804626464844, -4.7518310546875, 3.9667205810546875, 5.649150848388672, -10.113197326660156, 3.847980499267578, 8.827667236328125, 1.3846359252929688, 1.77734375, 19.046432495117188, 4.2741851806640625, 18.42652130126953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000644.npy"}
{"epoch": 0.9735449735449735, "step": 645, "batch_size": 64, "mean": 4.7189226150512695, "std": 6.123188495635986, "min": -9.644424438476562, "p10": -2.1606044769287105, "median": 4.165872573852539, "p90": 12.775379371643067, "max": 21.49720001220703, "pos_frac": 0.796875, "sample": [3.618244171142578, 13.76617431640625, 8.811637878417969, -3.4994583129882812, 7.767059326171875, 21.49720001220703, 8.969245910644531, 4.596728324890137, 3.2808761596679688, -0.35015106201171875, -1.5558509826660156, 1.0309295654296875, 4.304130554199219, 0.5840663909912109, 11.763580322265625, 1.1329574584960938, 8.424760818481445, 1.2785987854003906, 0.4500923156738281, -0.0142059326171875, 16.756877899169922, -3.154083251953125, 2.2515525817871094, 6.3062286376953125, 6.156700134277344, -2.8897438049316406, 0.1904296875, 5.089569091796875, 12.805639266967773, 9.75177001953125, 3.4134750366210938, 0.9895820617675781, 6.259357452392578, -0.3341484069824219, 0.621612548828125, 7.210227966308594, -7.1962738037109375, -0.9560470581054688, -2.92852783203125, 9.326202392578125, 9.231239318847656, 6.376365661621094, 19.866806030273438, 0.23730087280273438, 0.13409805297851562, 7.626714706420898, 7.5089569091796875, 3.967164993286133, 5.509696960449219, 5.136314392089844, -2.4197845458984375, 12.70477294921875, 4.027614593505859, 16.88097381591797, 6.399932861328125, 0.089752197265625, 11.608634948730469, -0.4182624816894531, 4.880706787109375, -9.644424438476562, 1.5310440063476562, 13.576082229614258, 8.955657958984375, 2.7166824340820312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000645.npy"}
{"epoch": 0.9750566893424036, "step": 646, "batch_size": 64, "mean": 5.120940208435059, "std": 6.940869331359863, "min": -13.535619735717773, "p10": -2.3059463500976562, "median": 4.120945930480957, "p90": 13.924954223632817, "max": 22.49005126953125, "pos_frac": 0.765625, "sample": [1.4179229736328125, 0.5851039886474609, 3.0105514526367188, 5.2829742431640625, 13.04171371459961, 10.585220336914062, -5.1751708984375, 10.84197998046875, 7.965635299682617, 9.048553466796875, -1.1192779541015625, 3.869354248046875, 9.603202819824219, 17.305747985839844, 8.541778564453125, 0.420166015625, 5.952171325683594, -0.000152587890625, 17.225818634033203, 10.254066467285156, 12.833404541015625, 4.939483642578125, -7.810468673706055, -0.81927490234375, 0.8088951110839844, 11.147941589355469, 3.733783721923828, -2.3592071533203125, 1.0755081176757812, 2.080963134765625, 3.16796875, 2.7313308715820312, 7.6034698486328125, 14.303485870361328, 7.3918609619140625, 1.320648193359375, 3.2158584594726562, -2.181671142578125, 4.372537612915039, 10.056951522827148, -0.11242294311523438, -2.5418357849121094, 7.824867248535156, 2.1293487548828125, 10.142364501953125, 22.49005126953125, 9.060050964355469, 14.828025817871094, 2.670238494873047, 21.877853393554688, 9.867036819458008, -0.917388916015625, 5.73406982421875, 6.257209777832031, -13.535619735717773, 2.2309112548828125, 10.746818542480469, -5.333250045776367, 0.7064666748046875, 18.493309020996094, -4.696727752685547, -0.11632537841796875, 4.787349700927734, -1.1230812072753906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000646.npy"}
{"epoch": 0.9765684051398337, "step": 647, "batch_size": 64, "mean": 6.294255256652832, "std": 7.1791229248046875, "min": -9.896820068359375, "p10": -1.0115612030029297, "median": 4.97133731842041, "p90": 15.62230911254883, "max": 23.003494262695312, "pos_frac": 0.859375, "sample": [1.0427284240722656, 6.179496765136719, 11.209596633911133, 4.753623962402344, 7.712310791015625, 7.2100067138671875, 1.629302978515625, 20.168014526367188, 0.567108154296875, 15.835968017578125, 5.075225830078125, 5.67767333984375, 7.15179443359375, 0.4080085754394531, 1.3399200439453125, 4.00225830078125, 19.640167236328125, 1.6807708740234375, -3.3644447326660156, -9.818084716796875, 12.07948112487793, 8.830913543701172, 10.778251647949219, 2.678905487060547, 23.003494262695312, 13.727876663208008, 12.008373260498047, -1.0048561096191406, 13.610797882080078, 4.318370819091797, 6.422939300537109, -1.014434814453125, 11.78961181640625, 1.2428054809570312, 6.959136962890625, -3.709348678588867, 1.9105548858642578, 15.123771667480469, 1.2775421142578125, -3.0047531127929688, 10.105838775634766, 4.079475402832031, 3.378622055053711, 11.642524719238281, -9.896820068359375, 3.0505008697509766, 2.9192886352539062, 7.415824890136719, 2.2416458129882812, 4.867448806762695, 0.439453125, 18.379409790039062, 4.669956207275391, -3.38018798828125, -0.9893341064453125, 14.286548614501953, 0.6992626190185547, 21.784881591796875, 7.722648620605469, 11.976593017578125, 20.394393920898438, 1.5463485717773438, 8.814781188964844, 11.5523681640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000647.npy"}
{"epoch": 0.9780801209372638, "step": 648, "batch_size": 64, "mean": 4.785521507263184, "std": 6.801364421844482, "min": -14.327709197998047, "p10": -2.1633678436279298, "median": 4.72245979309082, "p90": 13.644800567626955, "max": 19.422317504882812, "pos_frac": 0.78125, "sample": [11.811195373535156, 10.067924499511719, 5.359405517578125, -14.327709197998047, 17.23564910888672, -0.10321044921875, -12.36492919921875, 4.8520050048828125, 5.8006591796875, 4.699626922607422, 1.9149665832519531, 6.676738739013672, 2.5485992431640625, 0.2556610107421875, -3.4810104370117188, -6.635858535766602, 9.03720474243164, 0.43598175048828125, 19.422317504882812, 9.63568115234375, 18.371967315673828, 2.090564727783203, 7.625457763671875, 13.808357238769531, 9.44708251953125, 1.7436256408691406, 0.9547805786132812, 14.397392272949219, 4.745292663574219, 14.439483642578125, 3.0598068237304688, -0.3590564727783203, 11.81689453125, 13.263168334960938, 0.7240638732910156, -5.586235046386719, 3.4990081787109375, 3.5844497680664062, 5.422386169433594, 11.140975952148438, 8.089706420898438, 8.512378692626953, 16.601459503173828, 0.3397789001464844, -2.1652297973632812, -0.902099609375, 9.549530029296875, 7.952350616455078, 4.67431640625, 6.162494659423828, 7.807498931884766, 8.481117248535156, 8.98935317993164, -8.1943359375, -0.51104736328125, 3.527740478515625, 10.897918701171875, -0.036579132080078125, 1.7386703491210938, -2.1590232849121094, 2.6067962646484375, -1.486001968383789, 5.012847900390625, 3.753347396850586], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000648.npy"}
{"epoch": 0.9795918367346939, "step": 649, "batch_size": 64, "mean": 5.14467716217041, "std": 7.090489387512207, "min": -10.61676025390625, "p10": -4.43806266784668, "median": 6.9079132080078125, "p90": 15.11437683105469, "max": 20.353225708007812, "pos_frac": 0.765625, "sample": [15.362411499023438, 9.522867202758789, -0.6971244812011719, 8.158447265625, 9.163644790649414, 11.596092224121094, 8.400833129882812, 7.457807540893555, 1.86126708984375, 14.535629272460938, 2.3339710235595703, 15.887680053710938, 6.943939208984375, -7.969493865966797, 9.046527862548828, 4.152843475341797, 2.917644500732422, 9.750160217285156, 7.023616790771484, 8.135444641113281, 17.3042049407959, -1.1910057067871094, -3.8378524780273438, -5.613380432128906, -10.61676025390625, 11.433429718017578, 8.58636474609375, 1.506317138671875, -6.470075607299805, -3.9890594482421875, 9.330730438232422, 1.2694854736328125, 13.144989013671875, 1.1408500671386719, 7.822727203369141, 7.047088623046875, 5.164283752441406, 8.49197006225586, 12.611106872558594, 6.87188720703125, 1.9485015869140625, 16.857990264892578, 8.401321411132812, -2.8859291076660156, 0.2035369873046875, 15.502321243286133, 1.3496685028076172, 1.5300521850585938, 3.752044677734375, 1.5810089111328125, -4.566837310791016, 16.994468688964844, -5.742116928100586, 1.152069091796875, 20.353225708007812, -4.1375885009765625, -5.686138153076172, -0.173492431640625, 9.276725769042969, 10.969871520996094, 11.044471740722656, 9.855674743652344, 0.5639114379882812, -2.4769287109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000649.npy"}
{"epoch": 0.981103552532124, "step": 650, "batch_size": 64, "mean": 4.575124740600586, "std": 7.529107570648193, "min": -11.1302490234375, "p10": -5.259366798400879, "median": 4.4854736328125, "p90": 15.338664054870607, "max": 21.045808792114258, "pos_frac": 0.75, "sample": [-4.369556427001953, 14.28424072265625, -7.100860595703125, 16.18450927734375, 0.7172012329101562, 15.519203186035156, 4.8802032470703125, 1.6148529052734375, 1.371002197265625, 1.5482597351074219, 6.778221130371094, 10.673892974853516, -5.7064971923828125, 4.598365783691406, 0.2169170379638672, 12.512481689453125, 14.91740608215332, 5.69230842590332, 11.874168395996094, 1.8796062469482422, 6.525226593017578, 5.70526123046875, -1.9722881317138672, -5.25758171081543, 16.43025779724121, 12.579521179199219, 21.045808792114258, 12.603458404541016, 6.195686340332031, -6.6020660400390625, 0.8525505065917969, -11.1302490234375, 0.6813545227050781, 11.928718566894531, -9.595245361328125, 1.642486572265625, 17.8585262298584, 7.896800994873047, 0.03484344482421875, -5.2601318359375, 4.372581481933594, 11.630990982055664, -6.660102844238281, 11.969467163085938, -3.0024566650390625, -0.44806671142578125, 15.915481567382812, 10.238292694091797, 4.27862548828125, 0.8096179962158203, 5.947881698608398, 0.2845611572265625, 6.575542449951172, 0.589996337890625, -0.9765415191650391, 11.807748794555664, -3.1676483154296875, 6.5225982666015625, 8.161653518676758, 17.001724243164062, -4.3934326171875, 1.3338050842285156, 4.760189056396484, -0.993408203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000650.npy"}
{"epoch": 0.982615268329554, "step": 651, "batch_size": 64, "mean": 4.506626129150391, "std": 6.576936721801758, "min": -10.334831237792969, "p10": -2.0997108459472655, "median": 3.3781795501708984, "p90": 15.32511940002442, "max": 21.049102783203125, "pos_frac": 0.75, "sample": [0.8379135131835938, 1.4760208129882812, 4.352142333984375, 2.1010284423828125, 6.669685363769531, -10.334831237792969, -2.1840553283691406, 5.206144332885742, -1.3496322631835938, 10.882490158081055, 1.8690338134765625, 1.4975738525390625, -0.3396148681640625, -2.131500244140625, 9.69644546508789, -6.7503204345703125, 0.23601341247558594, 15.928787231445312, 3.4247512817382812, 6.842155456542969, 6.386360168457031, -0.2005767822265625, -0.10100555419921875, 0.4217681884765625, 1.779052734375, 1.270782470703125, 6.765388488769531, 8.023040771484375, -1.7082061767578125, 16.149749755859375, 6.151897430419922, 10.434120178222656, 6.3569183349609375, -1.0610885620117188, 5.6923065185546875, 0.3859424591064453, 7.330120086669922, 7.1161651611328125, -3.863861083984375, 5.1782684326171875, 16.933944702148438, 6.4919281005859375, 13.916561126708984, 2.334716796875, -1.4277381896972656, 20.143753051757812, 21.049102783203125, 16.981109619140625, -2.0255355834960938, 12.330081939697266, 3.24554443359375, -1.4488067626953125, 0.23722076416015625, 8.934043884277344, 6.339385986328125, 5.7731170654296875, 9.861091613769531, 3.3316078186035156, 7.1923065185546875, -8.011474609375, 1.4623336791992188, -3.0844497680664062, 16.213279724121094, 1.21356201171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000651.npy"}
{"epoch": 0.9841269841269841, "step": 652, "batch_size": 64, "mean": 4.446678161621094, "std": 6.575527191162109, "min": -9.116043090820312, "p10": -3.743074035644531, "median": 4.299367904663086, "p90": 14.133871841430665, "max": 17.957901000976562, "pos_frac": 0.6875, "sample": [-0.9249229431152344, 12.993249893188477, 14.893516540527344, 5.808124542236328, 4.5149078369140625, 13.965694427490234, 9.579004287719727, 8.334598541259766, 17.957901000976562, 0.9500198364257812, 11.428977966308594, -7.6385955810546875, -0.2018871307373047, 5.117990493774414, 5.478994369506836, 6.6507110595703125, 2.70111083984375, 11.423164367675781, 4.465354919433594, -0.24707412719726562, -4.194629669189453, 6.837646484375, 4.6514739990234375, 0.023496627807617188, 1.0948486328125, -0.7925872802734375, -5.5863037109375, -0.783050537109375, 14.205947875976562, 7.8164825439453125, 4.195953369140625, -2.0254669189453125, 4.402782440185547, 3.418609619140625, 10.410820007324219, -1.0211105346679688, 15.835220336914062, 2.977874755859375, -2.7756309509277344, -9.116043090820312, 0.28399658203125, 17.718307495117188, 3.520610809326172, 5.089132308959961, -0.9236907958984375, -0.022003173828125, 13.147102355957031, 6.4239654541015625, -0.4325695037841797, 10.0125732421875, 16.2059326171875, 1.63543701171875, 8.96600341796875, 2.6193313598632812, -4.259876251220703, 6.6409149169921875, -3.90069580078125, 2.2966270446777344, -1.767547607421875, 17.126558303833008, 4.570671081542969, -3.3752899169921875, -4.4615325927734375, 10.646270751953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000652.npy"}
{"epoch": 0.9856386999244142, "step": 653, "batch_size": 64, "mean": 4.73805570602417, "std": 7.0281171798706055, "min": -10.273054122924805, "p10": -2.3175670623779294, "median": 3.2376394271850586, "p90": 15.543437194824223, "max": 21.982986450195312, "pos_frac": 0.75, "sample": [7.690889358520508, 17.534942626953125, -0.46576690673828125, 0.43221282958984375, 4.548135757446289, 1.9437522888183594, 8.16861343383789, 2.3310699462890625, 19.329986572265625, -0.4378204345703125, -2.3992652893066406, 1.2328720092773438, 13.328460693359375, 1.8277053833007812, 19.445755004882812, 11.059722900390625, 6.324256896972656, 16.824188232421875, 1.6457328796386719, 16.028167724609375, 10.433250427246094, 16.760467529296875, 2.003816604614258, -0.9026451110839844, 1.4622783660888672, -1.1256599426269531, 10.126640319824219, 0.45674896240234375, 0.06341361999511719, 0.142181396484375, 21.982986450195312, -3.89166259765625, -2.1269378662109375, 2.527799606323242, 7.003337860107422, -3.435302734375, 6.138294219970703, 4.036537170410156, 5.3126220703125, 0.5858039855957031, 1.5796890258789062, -9.91748046875, 5.83551025390625, 3.947479248046875, 12.152053833007812, -10.273054122924805, 14.412399291992188, -1.0198974609375, 14.145126342773438, 4.4033660888671875, 1.4481430053710938, 12.210697174072266, -7.0672607421875, -0.28271484375, -1.466939926147461, -0.22966575622558594, 10.048530578613281, 5.284324645996094, 1.4252700805664062, 6.863433837890625, 4.754539489746094, 5.412858963012695, -2.4113311767578125, 8.032913208007812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000653.npy"}
{"epoch": 0.9871504157218443, "step": 654, "batch_size": 64, "mean": 4.607398986816406, "std": 7.053544998168945, "min": -9.695571899414062, "p10": -2.933746147155761, "median": 3.392183303833008, "p90": 15.767136192321784, "max": 24.27813720703125, "pos_frac": 0.75, "sample": [3.0106201171875, 12.920639038085938, -6.459686279296875, 7.013425827026367, 2.35479736328125, -6.700225830078125, 10.864204406738281, 2.6257247924804688, 4.489646911621094, 1.0006942749023438, 2.3480224609375, -4.728141784667969, 16.45032501220703, 3.9987869262695312, -0.41436004638671875, 4.453268051147461, 4.307579040527344, -5.272789001464844, 14.173028945922852, -0.20491790771484375, 1.67425537109375, 3.505779266357422, 12.216484069824219, 21.062362670898438, -0.33731842041015625, 1.5746231079101562, -0.07089805603027344, -1.5132369995117188, -0.12058258056640625, 3.472393035888672, 1.3578529357910156, 6.256011962890625, 24.27813720703125, 3.721773147583008, 6.664459228515625, 11.463722229003906, 6.399314880371094, -2.4988937377929688, 2.8400726318359375, 0.0726318359375, 10.317209243774414, -9.695571899414062, 6.7827606201171875, -3.1201114654541016, 2.307893753051758, -0.9473114013671875, 18.61571502685547, 17.453229904174805, 3.3119735717773438, 4.411811828613281, 1.5570068359375, 19.722320556640625, 2.155557632446289, 4.298515319824219, 19.975582122802734, -3.2095298767089844, 5.197406768798828, 3.6880531311035156, -1.8490447998046875, 10.753814697265625, 6.662708282470703, 1.9814682006835938, 2.1531219482421875, 4.099395751953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000654.npy"}
{"epoch": 0.9886621315192744, "step": 655, "batch_size": 64, "mean": 5.344045639038086, "std": 5.858038902282715, "min": -5.968025207519531, "p10": -1.4389060974121093, "median": 4.280174255371094, "p90": 12.715650749206546, "max": 26.165267944335938, "pos_frac": 0.828125, "sample": [9.834951400756836, 1.5756072998046875, -1.2374420166015625, 3.0691566467285156, 1.5947647094726562, 13.060531616210938, 10.0, 7.421436309814453, 4.025794982910156, 2.0542144775390625, 0.9080963134765625, 5.114402770996094, 3.0939559936523438, -0.37641143798828125, -3.2820358276367188, -2.0358047485351562, 4.576272964477539, 8.154670715332031, 17.615829467773438, 1.6014766693115234, 15.450565338134766, -2.4713516235351562, 11.910928726196289, 7.724517822265625, 4.457891464233398, -0.8224716186523438, 3.3519821166992188, -5.092578887939453, 13.716018676757812, 2.327423095703125, 8.027257919311523, 3.6545639038085938, 10.048629760742188, 2.7510757446289062, 7.9398345947265625, 1.3471183776855469, 2.4701881408691406, 10.707168579101562, 7.4699554443359375, 26.165267944335938, 2.021026611328125, -1.421661376953125, -1.4462966918945312, 13.345733642578125, 4.102457046508789, -5.968025207519531, 5.276069641113281, 10.039627075195312, 13.953653335571289, 8.74151611328125, 0.8785057067871094, 2.5475997924804688, 8.96319580078125, 3.722257614135742, 3.2915878295898438, 8.609588623046875, 11.298473358154297, 4.680931091308594, 10.376754760742188, 1.26544189453125, -4.423199653625488, 9.26511001586914, 6.570037841796875, 8.425086975097656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000655.npy"}
{"epoch": 0.9901738473167044, "step": 656, "batch_size": 64, "mean": 5.719700336456299, "std": 6.963493347167969, "min": -8.958839416503906, "p10": -2.2816905975341792, "median": 4.147302627563477, "p90": 15.859647369384767, "max": 21.50316619873047, "pos_frac": 0.828125, "sample": [15.554576873779297, 2.4446640014648438, 15.95733642578125, 7.110118865966797, -0.9546165466308594, 1.475982666015625, 2.327136993408203, 0.8513908386230469, 6.71356201171875, -1.8320884704589844, 2.756793975830078, 16.143898010253906, 0.7934494018554688, -1.8776092529296875, 1.3761367797851562, 14.301895141601562, -6.1361541748046875, -1.9059677124023438, 6.371208190917969, 11.024574279785156, 21.50316619873047, 5.080955505371094, -8.958839416503906, 6.869770050048828, 13.847000122070312, -2.849292755126953, 3.5660324096679688, 12.77972412109375, -4.496063232421875, 2.9712867736816406, 19.794158935546875, 1.0085105895996094, 3.6994762420654297, 4.915489196777344, 4.138336181640625, 13.724288940429688, 6.9094390869140625, 4.156269073486328, 15.631706237792969, 8.140762329101562, 0.2092132568359375, -2.942584991455078, 6.4100494384765625, 15.126228332519531, 5.387447357177734, 10.444786071777344, 2.7147274017333984, 11.724250793457031, 2.060821533203125, 8.852012634277344, 16.642974853515625, -2.4427146911621094, 5.3414306640625, 20.1419677734375, 7.5504150390625, 2.161773681640625, 0.44589996337890625, -4.315277099609375, 0.9952011108398438, 3.3623504638671875, 0.13584136962890625, 3.2450408935546875, 10.907211303710938, 16.973297119140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000656.npy"}
{"epoch": 0.9916855631141346, "step": 657, "batch_size": 64, "mean": 5.956955909729004, "std": 7.46238374710083, "min": -11.91043472290039, "p10": -2.518032455444336, "median": 6.740997314453125, "p90": 16.26165466308594, "max": 21.959300994873047, "pos_frac": 0.75, "sample": [16.845916748046875, 7.87152099609375, -0.6100273132324219, 12.960357666015625, 7.757265090942383, 3.5961360931396484, 18.948486328125, 17.518756866455078, 7.557960510253906, -11.91043472290039, -2.8942489624023438, -2.596050262451172, 9.516429901123047, 0.2934112548828125, 12.089218139648438, 3.1250877380371094, 8.675430297851562, 20.25042152404785, -5.1776275634765625, 10.6741943359375, -1.568511962890625, 9.061601638793945, -7.235107421875, 2.1517601013183594, 8.179624557495117, 14.648231506347656, 3.22589111328125, 2.6648902893066406, -0.6610107421875, 4.323921203613281, 8.071426391601562, 10.009685516357422, 8.524940490722656, 14.069465637207031, 9.22622299194336, 7.912239074707031, 5.924034118652344, 18.09149169921875, 14.89837646484375, 1.7550697326660156, 11.910797119140625, 1.9269866943359375, -0.1187744140625, 14.064483642578125, -0.49310302734375, -0.5870361328125, 21.959300994873047, 1.3119659423828125, -2.3359909057617188, 3.8505706787109375, 2.5938644409179688, -9.0098876953125, 11.229812622070312, -0.14046669006347656, 2.1044654846191406, 1.6596908569335938, -5.715522766113281, 7.8373870849609375, 8.661785125732422, 19.0889892578125, -0.46450042724609375, 10.260784149169922, 1.4854507446289062, 12.397695541381836], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000657.npy"}
{"epoch": 0.9931972789115646, "step": 658, "batch_size": 64, "mean": 4.452823638916016, "std": 7.959387302398682, "min": -11.948028564453125, "p10": -4.321629524230957, "median": 3.0897064208984375, "p90": 15.330417060852051, "max": 20.796066284179688, "pos_frac": 0.703125, "sample": [7.498775482177734, 5.3622589111328125, 11.822219848632812, -5.631317138671875, 2.185455322265625, 1.276632308959961, -10.532312393188477, 1.1216888427734375, 7.080636978149414, 10.0379638671875, 1.574462890625, 4.947967529296875, -10.234952926635742, 13.97762680053711, 8.839275360107422, 2.1785011291503906, -4.0597076416015625, 7.433929443359375, 18.98504638671875, -1.0376815795898438, -1.5624961853027344, 0.051422119140625, -2.8338470458984375, 10.028205871582031, 13.1318359375, 15.068099975585938, -6.341033935546875, 9.26356315612793, 9.854782104492188, 1.1187591552734375, 0.5353126525878906, -0.4424629211425781, 4.264934539794922, 12.166397094726562, -0.26428985595703125, 10.60699462890625, 15.422683715820312, -4.433881759643555, 1.4125328063964844, 4.1870880126953125, 20.422386169433594, 1.1918506622314453, 6.0084075927734375, 15.923870086669922, -1.2590751647949219, 8.406122207641602, -0.5303878784179688, 15.32562255859375, 15.33247184753418, 6.377887725830078, 1.5913772583007812, -2.668436050415039, -3.1210784912109375, -11.552900314331055, 3.99395751953125, 8.085792541503906, -3.314128875732422, -11.948028564453125, 20.796066284179688, -2.8018836975097656, 18.510009765625, 1.86712646484375, 14.047035217285156, 0.23560523986816406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000658.npy"}
{"epoch": 0.9947089947089947, "step": 659, "batch_size": 64, "mean": 3.8894009590148926, "std": 7.140342712402344, "min": -14.40261459350586, "p10": -5.022989654541016, "median": 3.579486846923828, "p90": 12.688568115234377, "max": 19.637435913085938, "pos_frac": 0.734375, "sample": [3.3349380493164062, 9.875774383544922, 15.371566772460938, 7.245706558227539, 1.0111885070800781, 15.545806884765625, 3.5849609375, 9.508123397827148, 4.171405792236328, 17.86862564086914, 10.520355224609375, -2.8199310302734375, 0.8789749145507812, -0.2856178283691406, 1.1183948516845703, 3.6298065185546875, 9.568260192871094, -8.594669342041016, 3.1083221435546875, -5.65814208984375, 12.867691040039062, -1.2970256805419922, -0.43846893310546875, 6.633796691894531, 1.2317333221435547, -0.9810066223144531, 2.0848960876464844, 4.314800262451172, -0.4773235321044922, 6.5472259521484375, 4.1908721923828125, 1.2569351196289062, 12.270614624023438, 4.2003936767578125, 3.3115005493164062, 0.8051071166992188, 16.458885192871094, 8.159576416015625, 18.876842498779297, 4.884151458740234, 12.131072998046875, -5.0296478271484375, -3.7365798950195312, 9.055351257324219, -1.6600627899169922, 0.2975921630859375, 5.9729156494140625, 9.259605407714844, 6.434394836425781, -5.095848083496094, 3.5162429809570312, 7.913565635681152, -13.273216247558594, -5.007453918457031, 10.686996459960938, 5.460453033447266, 0.18975067138671875, -14.40261459350586, 19.637435913085938, 3.5740127563476562, -3.7617626190185547, 8.698381423950195, 0.288299560546875, -6.112274169921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000659.npy"}
{"epoch": 0.9962207105064248, "step": 660, "batch_size": 64, "mean": 5.384605407714844, "std": 6.330997467041016, "min": -7.8674774169921875, "p10": -1.9624092102050772, "median": 4.5010223388671875, "p90": 14.348161315917972, "max": 20.350006103515625, "pos_frac": 0.8125, "sample": [16.95477294921875, 3.8077392578125, 2.6610794067382812, -0.6851577758789062, 6.617286682128906, -2.815593719482422, 1.6333770751953125, 9.310009002685547, -4.463115692138672, -7.8674774169921875, 12.215259552001953, 20.350006103515625, 3.4833106994628906, 5.199522018432617, 4.53466796875, 17.070175170898438, 13.416519165039062, 6.176486968994141, 2.850748062133789, -2.3558502197265625, 4.4613189697265625, -6.46112060546875, 14.7474365234375, 0.26317596435546875, 5.2212066650390625, 17.38945770263672, -5.451175689697266, 8.441139221191406, 6.0471954345703125, -2.618377685546875, 2.3983802795410156, 9.693832397460938, 17.64764404296875, 3.9451675415039062, 0.4715862274169922, -0.7813873291015625, 3.143310546875, 4.665142059326172, 8.845739364624023, 0.38385009765625, 3.1472549438476562, 7.747575759887695, 7.125740051269531, 7.373481750488281, 11.906801223754883, 10.109725952148438, 0.548583984375, 1.8412399291992188, 8.884279251098633, 11.098686218261719, 4.467376708984375, 19.072784423828125, 2.0195236206054688, -0.19650840759277344, 13.285362243652344, 2.053577423095703, 6.171998977661133, 9.255783081054688, 3.6466445922851562, -0.6500988006591797, 1.8588485717773438, 4.622802734375, -1.0443801879882812, 9.720359802246094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000660.npy"}
{"epoch": 0.9977324263038548, "step": 661, "batch_size": 64, "mean": 4.449113845825195, "std": 7.42781400680542, "min": -14.693126678466797, "p10": -4.273970031738281, "median": 3.976555824279785, "p90": 13.947264099121096, "max": 18.974123001098633, "pos_frac": 0.71875, "sample": [2.401947021484375, -3.177898406982422, -8.749748229980469, -4.381359100341797, 10.16843032836914, 3.6705589294433594, 13.133216857910156, -1.29461669921875, -13.937423706054688, 6.733518600463867, -4.847999572753906, 2.5509567260742188, -0.00678253173828125, 16.344562530517578, 10.710075378417969, 14.160293579101562, 18.974123001098633, 4.8415679931640625, 18.248519897460938, -0.00891876220703125, -14.693126678466797, 4.588043212890625, 15.459470748901367, 0.8271026611328125, -1.3088150024414062, 2.0857925415039062, 2.551973342895508, -4.023395538330078, 0.4493274688720703, 3.501678466796875, 8.03854751586914, 6.307430267333984, -8.150991439819336, 13.098091125488281, 13.4501953125, 1.7474536895751953, 5.760948181152344, 15.053443908691406, 1.0445060729980469, 10.89306640625, 7.296905517578125, 1.9587554931640625, -0.5282135009765625, 13.250263214111328, 11.745315551757812, 7.089210510253906, 13.154777526855469, 3.682771682739258, 1.9332447052001953, 2.2269954681396484, -5.144569396972656, 9.970352172851562, -3.7714996337890625, -1.317007064819336, 6.331333160400391, 10.645942687988281, 7.5381927490234375, 5.39862060546875, 4.2703399658203125, 16.005569458007812, 4.636993408203125, -1.8270416259765625, -2.102764129638672, 10.08502197265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85/margin_logs/step_0000661.npy"}