From 6d01dcb8746d93af42faed414e6b39413d2abed4 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 10 May 2026 11:52:05 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: jackf857/llama-3-8b-base-margin-dpo-hh-harmless-beta0.01 Source: Original Platform --- .gitattributes | 36 + README.md | 76 + all_results.json | 22 + config.json | 29 + eval_results.json | 16 + generation_config.json | 9 + margin_logs/margins.jsonl | 661 ++ margin_logs/step_0000001.npy | 3 + margin_logs/step_0000002.npy | 3 + margin_logs/step_0000003.npy | 3 + margin_logs/step_0000004.npy | 3 + margin_logs/step_0000005.npy | 3 + margin_logs/step_0000006.npy | 3 + margin_logs/step_0000007.npy | 3 + margin_logs/step_0000008.npy | 3 + margin_logs/step_0000009.npy | 3 + margin_logs/step_0000010.npy | 3 + margin_logs/step_0000011.npy | 3 + margin_logs/step_0000012.npy | 3 + margin_logs/step_0000013.npy | 3 + margin_logs/step_0000014.npy | 3 + margin_logs/step_0000015.npy | 3 + margin_logs/step_0000016.npy | 3 + margin_logs/step_0000017.npy | 3 + margin_logs/step_0000018.npy | 3 + margin_logs/step_0000019.npy | 3 + margin_logs/step_0000020.npy | 3 + margin_logs/step_0000021.npy | 3 + margin_logs/step_0000022.npy | 3 + margin_logs/step_0000023.npy | 3 + margin_logs/step_0000024.npy | 3 + margin_logs/step_0000025.npy | 3 + margin_logs/step_0000026.npy | 3 + margin_logs/step_0000027.npy | 3 + margin_logs/step_0000028.npy | 3 + margin_logs/step_0000029.npy | 3 + margin_logs/step_0000030.npy | 3 + margin_logs/step_0000031.npy | 3 + margin_logs/step_0000032.npy | 3 + margin_logs/step_0000033.npy | 3 + margin_logs/step_0000034.npy | 3 + margin_logs/step_0000035.npy | 3 + margin_logs/step_0000036.npy | 3 + margin_logs/step_0000037.npy | 3 + margin_logs/step_0000038.npy | 3 + margin_logs/step_0000039.npy | 3 + margin_logs/step_0000040.npy | 3 + margin_logs/step_0000041.npy | 3 + margin_logs/step_0000042.npy | 3 + margin_logs/step_0000043.npy | 3 + margin_logs/step_0000044.npy | 3 + margin_logs/step_0000045.npy | 3 + margin_logs/step_0000046.npy | 3 + margin_logs/step_0000047.npy | 3 + margin_logs/step_0000048.npy | 3 + margin_logs/step_0000049.npy | 3 + margin_logs/step_0000050.npy | 3 + margin_logs/step_0000051.npy | 3 + margin_logs/step_0000052.npy | 3 + margin_logs/step_0000053.npy | 3 + margin_logs/step_0000054.npy | 3 + margin_logs/step_0000055.npy | 3 + margin_logs/step_0000056.npy | 3 + margin_logs/step_0000057.npy | 3 + margin_logs/step_0000058.npy | 3 + margin_logs/step_0000059.npy | 3 + margin_logs/step_0000060.npy | 3 + margin_logs/step_0000061.npy | 3 + margin_logs/step_0000062.npy | 3 + margin_logs/step_0000063.npy | 3 + margin_logs/step_0000064.npy | 3 + margin_logs/step_0000065.npy | 3 + margin_logs/step_0000066.npy | 3 + margin_logs/step_0000067.npy | 3 + margin_logs/step_0000068.npy | 3 + margin_logs/step_0000069.npy | 3 + margin_logs/step_0000070.npy | 3 + margin_logs/step_0000071.npy | 3 + margin_logs/step_0000072.npy | 3 + margin_logs/step_0000073.npy | 3 + margin_logs/step_0000074.npy | 3 + margin_logs/step_0000075.npy | 3 + margin_logs/step_0000076.npy | 3 + margin_logs/step_0000077.npy | 3 + margin_logs/step_0000078.npy | 3 + margin_logs/step_0000079.npy | 3 + margin_logs/step_0000080.npy | 3 + margin_logs/step_0000081.npy | 3 + margin_logs/step_0000082.npy | 3 + margin_logs/step_0000083.npy | 3 + margin_logs/step_0000084.npy | 3 + margin_logs/step_0000085.npy | 3 + margin_logs/step_0000086.npy | 3 + margin_logs/step_0000087.npy | 3 + margin_logs/step_0000088.npy | 3 + margin_logs/step_0000089.npy | 3 + margin_logs/step_0000090.npy | 3 + margin_logs/step_0000091.npy | 3 + margin_logs/step_0000092.npy | 3 + margin_logs/step_0000093.npy | 3 + margin_logs/step_0000094.npy | 3 + margin_logs/step_0000095.npy | 3 + margin_logs/step_0000096.npy | 3 + margin_logs/step_0000097.npy | 3 + margin_logs/step_0000098.npy | 3 + margin_logs/step_0000099.npy | 3 + margin_logs/step_0000100.npy | 3 + margin_logs/step_0000101.npy | 3 + margin_logs/step_0000102.npy | 3 + margin_logs/step_0000103.npy | 3 + margin_logs/step_0000104.npy | 3 + margin_logs/step_0000105.npy | 3 + margin_logs/step_0000106.npy | 3 + margin_logs/step_0000107.npy | 3 + margin_logs/step_0000108.npy | 3 + margin_logs/step_0000109.npy | 3 + margin_logs/step_0000110.npy | 3 + margin_logs/step_0000111.npy | 3 + margin_logs/step_0000112.npy | 3 + margin_logs/step_0000113.npy | 3 + margin_logs/step_0000114.npy | 3 + margin_logs/step_0000115.npy | 3 + margin_logs/step_0000116.npy | 3 + margin_logs/step_0000117.npy | 3 + margin_logs/step_0000118.npy | 3 + margin_logs/step_0000119.npy | 3 + margin_logs/step_0000120.npy | 3 + margin_logs/step_0000121.npy | 3 + margin_logs/step_0000122.npy | 3 + margin_logs/step_0000123.npy | 3 + margin_logs/step_0000124.npy | 3 + margin_logs/step_0000125.npy | 3 + margin_logs/step_0000126.npy | 3 + margin_logs/step_0000127.npy | 3 + margin_logs/step_0000128.npy | 3 + margin_logs/step_0000129.npy | 3 + margin_logs/step_0000130.npy | 3 + margin_logs/step_0000131.npy | 3 + margin_logs/step_0000132.npy | 3 + margin_logs/step_0000133.npy | 3 + margin_logs/step_0000134.npy | 3 + margin_logs/step_0000135.npy | 3 + margin_logs/step_0000136.npy | 3 + margin_logs/step_0000137.npy | 3 + margin_logs/step_0000138.npy | 3 + margin_logs/step_0000139.npy | 3 + margin_logs/step_0000140.npy | 3 + margin_logs/step_0000141.npy | 3 + margin_logs/step_0000142.npy | 3 + margin_logs/step_0000143.npy | 3 + margin_logs/step_0000144.npy | 3 + margin_logs/step_0000145.npy | 3 + margin_logs/step_0000146.npy | 3 + margin_logs/step_0000147.npy | 3 + margin_logs/step_0000148.npy | 3 + margin_logs/step_0000149.npy | 3 + margin_logs/step_0000150.npy | 3 + margin_logs/step_0000151.npy | 3 + margin_logs/step_0000152.npy | 3 + margin_logs/step_0000153.npy | 3 + margin_logs/step_0000154.npy | 3 + margin_logs/step_0000155.npy | 3 + margin_logs/step_0000156.npy | 3 + margin_logs/step_0000157.npy | 3 + margin_logs/step_0000158.npy | 3 + margin_logs/step_0000159.npy | 3 + margin_logs/step_0000160.npy | 3 + margin_logs/step_0000161.npy | 3 + margin_logs/step_0000162.npy | 3 + margin_logs/step_0000163.npy | 3 + margin_logs/step_0000164.npy | 3 + margin_logs/step_0000165.npy | 3 + margin_logs/step_0000166.npy | 3 + margin_logs/step_0000167.npy | 3 + margin_logs/step_0000168.npy | 3 + margin_logs/step_0000169.npy | 3 + margin_logs/step_0000170.npy | 3 + margin_logs/step_0000171.npy | 3 + margin_logs/step_0000172.npy | 3 + margin_logs/step_0000173.npy | 3 + margin_logs/step_0000174.npy | 3 + margin_logs/step_0000175.npy | 3 + margin_logs/step_0000176.npy | 3 + margin_logs/step_0000177.npy | 3 + margin_logs/step_0000178.npy | 3 + margin_logs/step_0000179.npy | 3 + margin_logs/step_0000180.npy | 3 + margin_logs/step_0000181.npy | 3 + margin_logs/step_0000182.npy | 3 + margin_logs/step_0000183.npy | 3 + margin_logs/step_0000184.npy | 3 + margin_logs/step_0000185.npy | 3 + margin_logs/step_0000186.npy | 3 + margin_logs/step_0000187.npy | 3 + margin_logs/step_0000188.npy | 3 + margin_logs/step_0000189.npy | 3 + margin_logs/step_0000190.npy | 3 + margin_logs/step_0000191.npy | 3 + margin_logs/step_0000192.npy | 3 + margin_logs/step_0000193.npy | 3 + margin_logs/step_0000194.npy | 3 + margin_logs/step_0000195.npy | 3 + margin_logs/step_0000196.npy | 3 + margin_logs/step_0000197.npy | 3 + margin_logs/step_0000198.npy | 3 + margin_logs/step_0000199.npy | 3 + margin_logs/step_0000200.npy | 3 + margin_logs/step_0000201.npy | 3 + margin_logs/step_0000202.npy | 3 + margin_logs/step_0000203.npy | 3 + margin_logs/step_0000204.npy | 3 + margin_logs/step_0000205.npy | 3 + margin_logs/step_0000206.npy | 3 + margin_logs/step_0000207.npy | 3 + margin_logs/step_0000208.npy | 3 + margin_logs/step_0000209.npy | 3 + margin_logs/step_0000210.npy | 3 + margin_logs/step_0000211.npy | 3 + margin_logs/step_0000212.npy | 3 + margin_logs/step_0000213.npy | 3 + margin_logs/step_0000214.npy | 3 + margin_logs/step_0000215.npy | 3 + margin_logs/step_0000216.npy | 3 + margin_logs/step_0000217.npy | 3 + margin_logs/step_0000218.npy | 3 + margin_logs/step_0000219.npy | 3 + margin_logs/step_0000220.npy | 3 + margin_logs/step_0000221.npy | 3 + margin_logs/step_0000222.npy | 3 + margin_logs/step_0000223.npy | 3 + margin_logs/step_0000224.npy | 3 + margin_logs/step_0000225.npy | 3 + margin_logs/step_0000226.npy | 3 + margin_logs/step_0000227.npy | 3 + margin_logs/step_0000228.npy | 3 + margin_logs/step_0000229.npy | 3 + margin_logs/step_0000230.npy | 3 + margin_logs/step_0000231.npy | 3 + margin_logs/step_0000232.npy | 3 + margin_logs/step_0000233.npy | 3 + margin_logs/step_0000234.npy | 3 + margin_logs/step_0000235.npy | 3 + margin_logs/step_0000236.npy | 3 + margin_logs/step_0000237.npy | 3 + margin_logs/step_0000238.npy | 3 + margin_logs/step_0000239.npy | 3 + margin_logs/step_0000240.npy | 3 + margin_logs/step_0000241.npy | 3 + margin_logs/step_0000242.npy | 3 + margin_logs/step_0000243.npy | 3 + margin_logs/step_0000244.npy | 3 + margin_logs/step_0000245.npy | 3 + margin_logs/step_0000246.npy | 3 + margin_logs/step_0000247.npy | 3 + margin_logs/step_0000248.npy | 3 + margin_logs/step_0000249.npy | 3 + margin_logs/step_0000250.npy | 3 + margin_logs/step_0000251.npy | 3 + margin_logs/step_0000252.npy | 3 + margin_logs/step_0000253.npy | 3 + margin_logs/step_0000254.npy | 3 + margin_logs/step_0000255.npy | 3 + margin_logs/step_0000256.npy | 3 + margin_logs/step_0000257.npy | 3 + margin_logs/step_0000258.npy | 3 + margin_logs/step_0000259.npy | 3 + margin_logs/step_0000260.npy | 3 + margin_logs/step_0000261.npy | 3 + margin_logs/step_0000262.npy | 3 + margin_logs/step_0000263.npy | 3 + margin_logs/step_0000264.npy | 3 + margin_logs/step_0000265.npy | 3 + margin_logs/step_0000266.npy | 3 + margin_logs/step_0000267.npy | 3 + margin_logs/step_0000268.npy | 3 + margin_logs/step_0000269.npy | 3 + margin_logs/step_0000270.npy | 3 + margin_logs/step_0000271.npy | 3 + margin_logs/step_0000272.npy | 3 + margin_logs/step_0000273.npy | 3 + margin_logs/step_0000274.npy | 3 + margin_logs/step_0000275.npy | 3 + margin_logs/step_0000276.npy | 3 + margin_logs/step_0000277.npy | 3 + margin_logs/step_0000278.npy | 3 + margin_logs/step_0000279.npy | 3 + margin_logs/step_0000280.npy | 3 + margin_logs/step_0000281.npy | 3 + margin_logs/step_0000282.npy | 3 + margin_logs/step_0000283.npy | 3 + margin_logs/step_0000284.npy | 3 + margin_logs/step_0000285.npy | 3 + margin_logs/step_0000286.npy | 3 + margin_logs/step_0000287.npy | 3 + margin_logs/step_0000288.npy | 3 + margin_logs/step_0000289.npy | 3 + margin_logs/step_0000290.npy | 3 + margin_logs/step_0000291.npy | 3 + margin_logs/step_0000292.npy | 3 + margin_logs/step_0000293.npy | 3 + margin_logs/step_0000294.npy | 3 + margin_logs/step_0000295.npy | 3 + margin_logs/step_0000296.npy | 3 + margin_logs/step_0000297.npy | 3 + margin_logs/step_0000298.npy | 3 + margin_logs/step_0000299.npy | 3 + margin_logs/step_0000300.npy | 3 + margin_logs/step_0000301.npy | 3 + margin_logs/step_0000302.npy | 3 + margin_logs/step_0000303.npy | 3 + margin_logs/step_0000304.npy | 3 + margin_logs/step_0000305.npy | 3 + margin_logs/step_0000306.npy | 3 + margin_logs/step_0000307.npy | 3 + margin_logs/step_0000308.npy | 3 + margin_logs/step_0000309.npy | 3 + margin_logs/step_0000310.npy | 3 + margin_logs/step_0000311.npy | 3 + margin_logs/step_0000312.npy | 3 + margin_logs/step_0000313.npy | 3 + margin_logs/step_0000314.npy | 3 + margin_logs/step_0000315.npy | 3 + margin_logs/step_0000316.npy | 3 + margin_logs/step_0000317.npy | 3 + margin_logs/step_0000318.npy | 3 + margin_logs/step_0000319.npy | 3 + margin_logs/step_0000320.npy | 3 + margin_logs/step_0000321.npy | 3 + margin_logs/step_0000322.npy | 3 + margin_logs/step_0000323.npy | 3 + margin_logs/step_0000324.npy | 3 + margin_logs/step_0000325.npy | 3 + margin_logs/step_0000326.npy | 3 + margin_logs/step_0000327.npy | 3 + margin_logs/step_0000328.npy | 3 + margin_logs/step_0000329.npy | 3 + margin_logs/step_0000330.npy | 3 + margin_logs/step_0000331.npy | 3 + margin_logs/step_0000332.npy | 3 + margin_logs/step_0000333.npy | 3 + margin_logs/step_0000334.npy | 3 + margin_logs/step_0000335.npy | 3 + margin_logs/step_0000336.npy | 3 + margin_logs/step_0000337.npy | 3 + margin_logs/step_0000338.npy | 3 + margin_logs/step_0000339.npy | 3 + margin_logs/step_0000340.npy | 3 + margin_logs/step_0000341.npy | 3 + margin_logs/step_0000342.npy | 3 + margin_logs/step_0000343.npy | 3 + margin_logs/step_0000344.npy | 3 + margin_logs/step_0000345.npy | 3 + margin_logs/step_0000346.npy | 3 + margin_logs/step_0000347.npy | 3 + margin_logs/step_0000348.npy | 3 + margin_logs/step_0000349.npy | 3 + margin_logs/step_0000350.npy | 3 + margin_logs/step_0000351.npy | 3 + margin_logs/step_0000352.npy | 3 + margin_logs/step_0000353.npy | 3 + margin_logs/step_0000354.npy | 3 + margin_logs/step_0000355.npy | 3 + margin_logs/step_0000356.npy | 3 + margin_logs/step_0000357.npy | 3 + margin_logs/step_0000358.npy | 3 + margin_logs/step_0000359.npy | 3 + margin_logs/step_0000360.npy | 3 + margin_logs/step_0000361.npy | 3 + margin_logs/step_0000362.npy | 3 + margin_logs/step_0000363.npy | 3 + margin_logs/step_0000364.npy | 3 + margin_logs/step_0000365.npy | 3 + margin_logs/step_0000366.npy | 3 + margin_logs/step_0000367.npy | 3 + margin_logs/step_0000368.npy | 3 + margin_logs/step_0000369.npy | 3 + margin_logs/step_0000370.npy | 3 + margin_logs/step_0000371.npy | 3 + margin_logs/step_0000372.npy | 3 + margin_logs/step_0000373.npy | 3 + margin_logs/step_0000374.npy | 3 + margin_logs/step_0000375.npy | 3 + margin_logs/step_0000376.npy | 3 + margin_logs/step_0000377.npy | 3 + margin_logs/step_0000378.npy | 3 + margin_logs/step_0000379.npy | 3 + margin_logs/step_0000380.npy | 3 + margin_logs/step_0000381.npy | 3 + margin_logs/step_0000382.npy | 3 + margin_logs/step_0000383.npy | 3 + margin_logs/step_0000384.npy | 3 + margin_logs/step_0000385.npy | 3 + margin_logs/step_0000386.npy | 3 + margin_logs/step_0000387.npy | 3 + margin_logs/step_0000388.npy | 3 + margin_logs/step_0000389.npy | 3 + margin_logs/step_0000390.npy | 3 + margin_logs/step_0000391.npy | 3 + margin_logs/step_0000392.npy | 3 + margin_logs/step_0000393.npy | 3 + margin_logs/step_0000394.npy | 3 + margin_logs/step_0000395.npy | 3 + margin_logs/step_0000396.npy | 3 + margin_logs/step_0000397.npy | 3 + margin_logs/step_0000398.npy | 3 + margin_logs/step_0000399.npy | 3 + margin_logs/step_0000400.npy | 3 + margin_logs/step_0000401.npy | 3 + margin_logs/step_0000402.npy | 3 + margin_logs/step_0000403.npy | 3 + margin_logs/step_0000404.npy | 3 + margin_logs/step_0000405.npy | 3 + margin_logs/step_0000406.npy | 3 + margin_logs/step_0000407.npy | 3 + margin_logs/step_0000408.npy | 3 + margin_logs/step_0000409.npy | 3 + margin_logs/step_0000410.npy | 3 + margin_logs/step_0000411.npy | 3 + margin_logs/step_0000412.npy | 3 + margin_logs/step_0000413.npy | 3 + margin_logs/step_0000414.npy | 3 + margin_logs/step_0000415.npy | 3 + margin_logs/step_0000416.npy | 3 + margin_logs/step_0000417.npy | 3 + margin_logs/step_0000418.npy | 3 + margin_logs/step_0000419.npy | 3 + margin_logs/step_0000420.npy | 3 + margin_logs/step_0000421.npy | 3 + margin_logs/step_0000422.npy | 3 + margin_logs/step_0000423.npy | 3 + margin_logs/step_0000424.npy | 3 + margin_logs/step_0000425.npy | 3 + margin_logs/step_0000426.npy | 3 + margin_logs/step_0000427.npy | 3 + margin_logs/step_0000428.npy | 3 + margin_logs/step_0000429.npy | 3 + margin_logs/step_0000430.npy | 3 + margin_logs/step_0000431.npy | 3 + margin_logs/step_0000432.npy | 3 + margin_logs/step_0000433.npy | 3 + margin_logs/step_0000434.npy | 3 + margin_logs/step_0000435.npy | 3 + margin_logs/step_0000436.npy | 3 + margin_logs/step_0000437.npy | 3 + margin_logs/step_0000438.npy | 3 + margin_logs/step_0000439.npy | 3 + margin_logs/step_0000440.npy | 3 + margin_logs/step_0000441.npy | 3 + margin_logs/step_0000442.npy | 3 + margin_logs/step_0000443.npy | 3 + margin_logs/step_0000444.npy | 3 + margin_logs/step_0000445.npy | 3 + margin_logs/step_0000446.npy | 3 + margin_logs/step_0000447.npy | 3 + margin_logs/step_0000448.npy | 3 + margin_logs/step_0000449.npy | 3 + margin_logs/step_0000450.npy | 3 + margin_logs/step_0000451.npy | 3 + margin_logs/step_0000452.npy | 3 + margin_logs/step_0000453.npy | 3 + margin_logs/step_0000454.npy | 3 + margin_logs/step_0000455.npy | 3 + margin_logs/step_0000456.npy | 3 + margin_logs/step_0000457.npy | 3 + margin_logs/step_0000458.npy | 3 + margin_logs/step_0000459.npy | 3 + margin_logs/step_0000460.npy | 3 + margin_logs/step_0000461.npy | 3 + margin_logs/step_0000462.npy | 3 + margin_logs/step_0000463.npy | 3 + margin_logs/step_0000464.npy | 3 + margin_logs/step_0000465.npy | 3 + margin_logs/step_0000466.npy | 3 + margin_logs/step_0000467.npy | 3 + margin_logs/step_0000468.npy | 3 + margin_logs/step_0000469.npy | 3 + margin_logs/step_0000470.npy | 3 + margin_logs/step_0000471.npy | 3 + margin_logs/step_0000472.npy | 3 + margin_logs/step_0000473.npy | 3 + margin_logs/step_0000474.npy | 3 + margin_logs/step_0000475.npy | 3 + margin_logs/step_0000476.npy | 3 + margin_logs/step_0000477.npy | 3 + margin_logs/step_0000478.npy | 3 + margin_logs/step_0000479.npy | 3 + margin_logs/step_0000480.npy | 3 + margin_logs/step_0000481.npy | 3 + margin_logs/step_0000482.npy | 3 + margin_logs/step_0000483.npy | 3 + margin_logs/step_0000484.npy | 3 + margin_logs/step_0000485.npy | 3 + margin_logs/step_0000486.npy | 3 + margin_logs/step_0000487.npy | 3 + margin_logs/step_0000488.npy | 3 + margin_logs/step_0000489.npy | 3 + margin_logs/step_0000490.npy | 3 + margin_logs/step_0000491.npy | 3 + margin_logs/step_0000492.npy | 3 + margin_logs/step_0000493.npy | 3 + margin_logs/step_0000494.npy | 3 + margin_logs/step_0000495.npy | 3 + margin_logs/step_0000496.npy | 3 + margin_logs/step_0000497.npy | 3 + margin_logs/step_0000498.npy | 3 + margin_logs/step_0000499.npy | 3 + margin_logs/step_0000500.npy | 3 + margin_logs/step_0000501.npy | 3 + margin_logs/step_0000502.npy | 3 + margin_logs/step_0000503.npy | 3 + margin_logs/step_0000504.npy | 3 + margin_logs/step_0000505.npy | 3 + margin_logs/step_0000506.npy | 3 + margin_logs/step_0000507.npy | 3 + margin_logs/step_0000508.npy | 3 + margin_logs/step_0000509.npy | 3 + margin_logs/step_0000510.npy | 3 + margin_logs/step_0000511.npy | 3 + margin_logs/step_0000512.npy | 3 + margin_logs/step_0000513.npy | 3 + margin_logs/step_0000514.npy | 3 + margin_logs/step_0000515.npy | 3 + margin_logs/step_0000516.npy | 3 + margin_logs/step_0000517.npy | 3 + margin_logs/step_0000518.npy | 3 + margin_logs/step_0000519.npy | 3 + margin_logs/step_0000520.npy | 3 + margin_logs/step_0000521.npy | 3 + margin_logs/step_0000522.npy | 3 + margin_logs/step_0000523.npy | 3 + margin_logs/step_0000524.npy | 3 + margin_logs/step_0000525.npy | 3 + margin_logs/step_0000526.npy | 3 + margin_logs/step_0000527.npy | 3 + margin_logs/step_0000528.npy | 3 + margin_logs/step_0000529.npy | 3 + margin_logs/step_0000530.npy | 3 + margin_logs/step_0000531.npy | 3 + margin_logs/step_0000532.npy | 3 + margin_logs/step_0000533.npy | 3 + margin_logs/step_0000534.npy | 3 + margin_logs/step_0000535.npy | 3 + margin_logs/step_0000536.npy | 3 + margin_logs/step_0000537.npy | 3 + margin_logs/step_0000538.npy | 3 + margin_logs/step_0000539.npy | 3 + margin_logs/step_0000540.npy | 3 + margin_logs/step_0000541.npy | 3 + margin_logs/step_0000542.npy | 3 + margin_logs/step_0000543.npy | 3 + margin_logs/step_0000544.npy | 3 + margin_logs/step_0000545.npy | 3 + margin_logs/step_0000546.npy | 3 + margin_logs/step_0000547.npy | 3 + margin_logs/step_0000548.npy | 3 + margin_logs/step_0000549.npy | 3 + margin_logs/step_0000550.npy | 3 + margin_logs/step_0000551.npy | 3 + margin_logs/step_0000552.npy | 3 + margin_logs/step_0000553.npy | 3 + margin_logs/step_0000554.npy | 3 + margin_logs/step_0000555.npy | 3 + margin_logs/step_0000556.npy | 3 + margin_logs/step_0000557.npy | 3 + margin_logs/step_0000558.npy | 3 + margin_logs/step_0000559.npy | 3 + margin_logs/step_0000560.npy | 3 + margin_logs/step_0000561.npy | 3 + margin_logs/step_0000562.npy | 3 + margin_logs/step_0000563.npy | 3 + margin_logs/step_0000564.npy | 3 + margin_logs/step_0000565.npy | 3 + margin_logs/step_0000566.npy | 3 + margin_logs/step_0000567.npy | 3 + margin_logs/step_0000568.npy | 3 + margin_logs/step_0000569.npy | 3 + margin_logs/step_0000570.npy | 3 + margin_logs/step_0000571.npy | 3 + margin_logs/step_0000572.npy | 3 + margin_logs/step_0000573.npy | 3 + margin_logs/step_0000574.npy | 3 + margin_logs/step_0000575.npy | 3 + margin_logs/step_0000576.npy | 3 + margin_logs/step_0000577.npy | 3 + margin_logs/step_0000578.npy | 3 + margin_logs/step_0000579.npy | 3 + margin_logs/step_0000580.npy | 3 + margin_logs/step_0000581.npy | 3 + margin_logs/step_0000582.npy | 3 + margin_logs/step_0000583.npy | 3 + margin_logs/step_0000584.npy | 3 + margin_logs/step_0000585.npy | 3 + margin_logs/step_0000586.npy | 3 + margin_logs/step_0000587.npy | 3 + margin_logs/step_0000588.npy | 3 + margin_logs/step_0000589.npy | 3 + margin_logs/step_0000590.npy | 3 + margin_logs/step_0000591.npy | 3 + margin_logs/step_0000592.npy | 3 + margin_logs/step_0000593.npy | 3 + margin_logs/step_0000594.npy | 3 + margin_logs/step_0000595.npy | 3 + margin_logs/step_0000596.npy | 3 + margin_logs/step_0000597.npy | 3 + margin_logs/step_0000598.npy | 3 + margin_logs/step_0000599.npy | 3 + margin_logs/step_0000600.npy | 3 + margin_logs/step_0000601.npy | 3 + margin_logs/step_0000602.npy | 3 + margin_logs/step_0000603.npy | 3 + margin_logs/step_0000604.npy | 3 + margin_logs/step_0000605.npy | 3 + margin_logs/step_0000606.npy | 3 + margin_logs/step_0000607.npy | 3 + margin_logs/step_0000608.npy | 3 + margin_logs/step_0000609.npy | 3 + margin_logs/step_0000610.npy | 3 + margin_logs/step_0000611.npy | 3 + margin_logs/step_0000612.npy | 3 + margin_logs/step_0000613.npy | 3 + margin_logs/step_0000614.npy | 3 + margin_logs/step_0000615.npy | 3 + margin_logs/step_0000616.npy | 3 + margin_logs/step_0000617.npy | 3 + margin_logs/step_0000618.npy | 3 + margin_logs/step_0000619.npy | 3 + margin_logs/step_0000620.npy | 3 + margin_logs/step_0000621.npy | 3 + margin_logs/step_0000622.npy | 3 + margin_logs/step_0000623.npy | 3 + margin_logs/step_0000624.npy | 3 + margin_logs/step_0000625.npy | 3 + margin_logs/step_0000626.npy | 3 + margin_logs/step_0000627.npy | 3 + margin_logs/step_0000628.npy | 3 + margin_logs/step_0000629.npy | 3 + margin_logs/step_0000630.npy | 3 + margin_logs/step_0000631.npy | 3 + margin_logs/step_0000632.npy | 3 + margin_logs/step_0000633.npy | 3 + margin_logs/step_0000634.npy | 3 + margin_logs/step_0000635.npy | 3 + margin_logs/step_0000636.npy | 3 + margin_logs/step_0000637.npy | 3 + margin_logs/step_0000638.npy | 3 + margin_logs/step_0000639.npy | 3 + margin_logs/step_0000640.npy | 3 + margin_logs/step_0000641.npy | 3 + margin_logs/step_0000642.npy | 3 + margin_logs/step_0000643.npy | 3 + margin_logs/step_0000644.npy | 3 + margin_logs/step_0000645.npy | 3 + margin_logs/step_0000646.npy | 3 + margin_logs/step_0000647.npy | 3 + margin_logs/step_0000648.npy | 3 + margin_logs/step_0000649.npy | 3 + margin_logs/step_0000650.npy | 3 + margin_logs/step_0000651.npy | 3 + margin_logs/step_0000652.npy | 3 + margin_logs/step_0000653.npy | 3 + margin_logs/step_0000654.npy | 3 + margin_logs/step_0000655.npy | 3 + margin_logs/step_0000656.npy | 3 + margin_logs/step_0000657.npy | 3 + margin_logs/step_0000658.npy | 3 + margin_logs/step_0000659.npy | 3 + margin_logs/step_0000660.npy | 3 + margin_logs/step_0000661.npy | 3 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++++ train_results.json | 9 + trainer_state.json | 9990 ++++++++++++++++++++++++++++++ 681 files changed, 15240 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 margin_logs/step_0000001.npy create mode 100644 margin_logs/step_0000002.npy create mode 100644 margin_logs/step_0000003.npy create mode 100644 margin_logs/step_0000004.npy create mode 100644 margin_logs/step_0000005.npy create mode 100644 margin_logs/step_0000006.npy create mode 100644 margin_logs/step_0000007.npy create mode 100644 margin_logs/step_0000008.npy create mode 100644 margin_logs/step_0000009.npy create mode 100644 margin_logs/step_0000010.npy create mode 100644 margin_logs/step_0000011.npy create mode 100644 margin_logs/step_0000012.npy create mode 100644 margin_logs/step_0000013.npy create mode 100644 margin_logs/step_0000014.npy create mode 100644 margin_logs/step_0000015.npy create mode 100644 margin_logs/step_0000016.npy create mode 100644 margin_logs/step_0000017.npy create mode 100644 margin_logs/step_0000018.npy create mode 100644 margin_logs/step_0000019.npy create mode 100644 margin_logs/step_0000020.npy create mode 100644 margin_logs/step_0000021.npy create mode 100644 margin_logs/step_0000022.npy create mode 100644 margin_logs/step_0000023.npy create mode 100644 margin_logs/step_0000024.npy create mode 100644 margin_logs/step_0000025.npy create mode 100644 margin_logs/step_0000026.npy create mode 100644 margin_logs/step_0000027.npy create mode 100644 margin_logs/step_0000028.npy create mode 100644 margin_logs/step_0000029.npy create mode 100644 margin_logs/step_0000030.npy create mode 100644 margin_logs/step_0000031.npy create mode 100644 margin_logs/step_0000032.npy create mode 100644 margin_logs/step_0000033.npy create mode 100644 margin_logs/step_0000034.npy create mode 100644 margin_logs/step_0000035.npy create mode 100644 margin_logs/step_0000036.npy create mode 100644 margin_logs/step_0000037.npy create mode 100644 margin_logs/step_0000038.npy create mode 100644 margin_logs/step_0000039.npy create mode 100644 margin_logs/step_0000040.npy create mode 100644 margin_logs/step_0000041.npy create mode 100644 margin_logs/step_0000042.npy create mode 100644 margin_logs/step_0000043.npy create mode 100644 margin_logs/step_0000044.npy create mode 100644 margin_logs/step_0000045.npy create mode 100644 margin_logs/step_0000046.npy create mode 100644 margin_logs/step_0000047.npy create mode 100644 margin_logs/step_0000048.npy create mode 100644 margin_logs/step_0000049.npy create mode 100644 margin_logs/step_0000050.npy create mode 100644 margin_logs/step_0000051.npy create mode 100644 margin_logs/step_0000052.npy create mode 100644 margin_logs/step_0000053.npy create mode 100644 margin_logs/step_0000054.npy create mode 100644 margin_logs/step_0000055.npy create mode 100644 margin_logs/step_0000056.npy create mode 100644 margin_logs/step_0000057.npy create mode 100644 margin_logs/step_0000058.npy create mode 100644 margin_logs/step_0000059.npy create mode 100644 margin_logs/step_0000060.npy create mode 100644 margin_logs/step_0000061.npy create mode 100644 margin_logs/step_0000062.npy create mode 100644 margin_logs/step_0000063.npy create mode 100644 margin_logs/step_0000064.npy create mode 100644 margin_logs/step_0000065.npy create mode 100644 margin_logs/step_0000066.npy create mode 100644 margin_logs/step_0000067.npy create mode 100644 margin_logs/step_0000068.npy create mode 100644 margin_logs/step_0000069.npy create mode 100644 margin_logs/step_0000070.npy create mode 100644 margin_logs/step_0000071.npy create mode 100644 margin_logs/step_0000072.npy create mode 100644 margin_logs/step_0000073.npy create mode 100644 margin_logs/step_0000074.npy create mode 100644 margin_logs/step_0000075.npy create mode 100644 margin_logs/step_0000076.npy create mode 100644 margin_logs/step_0000077.npy create mode 100644 margin_logs/step_0000078.npy create mode 100644 margin_logs/step_0000079.npy create mode 100644 margin_logs/step_0000080.npy create mode 100644 margin_logs/step_0000081.npy create mode 100644 margin_logs/step_0000082.npy create mode 100644 margin_logs/step_0000083.npy create mode 100644 margin_logs/step_0000084.npy create mode 100644 margin_logs/step_0000085.npy create mode 100644 margin_logs/step_0000086.npy create mode 100644 margin_logs/step_0000087.npy create mode 100644 margin_logs/step_0000088.npy create mode 100644 margin_logs/step_0000089.npy create mode 100644 margin_logs/step_0000090.npy create mode 100644 margin_logs/step_0000091.npy create mode 100644 margin_logs/step_0000092.npy create mode 100644 margin_logs/step_0000093.npy create mode 100644 margin_logs/step_0000094.npy create mode 100644 margin_logs/step_0000095.npy create mode 100644 margin_logs/step_0000096.npy create mode 100644 margin_logs/step_0000097.npy create mode 100644 margin_logs/step_0000098.npy create mode 100644 margin_logs/step_0000099.npy create mode 100644 margin_logs/step_0000100.npy create mode 100644 margin_logs/step_0000101.npy create mode 100644 margin_logs/step_0000102.npy create mode 100644 margin_logs/step_0000103.npy create mode 100644 margin_logs/step_0000104.npy create mode 100644 margin_logs/step_0000105.npy create mode 100644 margin_logs/step_0000106.npy create mode 100644 margin_logs/step_0000107.npy create mode 100644 margin_logs/step_0000108.npy create mode 100644 margin_logs/step_0000109.npy create mode 100644 margin_logs/step_0000110.npy create mode 100644 margin_logs/step_0000111.npy create mode 100644 margin_logs/step_0000112.npy create mode 100644 margin_logs/step_0000113.npy create mode 100644 margin_logs/step_0000114.npy create mode 100644 margin_logs/step_0000115.npy create mode 100644 margin_logs/step_0000116.npy create mode 100644 margin_logs/step_0000117.npy create mode 100644 margin_logs/step_0000118.npy create mode 100644 margin_logs/step_0000119.npy create mode 100644 margin_logs/step_0000120.npy create mode 100644 margin_logs/step_0000121.npy create mode 100644 margin_logs/step_0000122.npy create mode 100644 margin_logs/step_0000123.npy create mode 100644 margin_logs/step_0000124.npy create mode 100644 margin_logs/step_0000125.npy create mode 100644 margin_logs/step_0000126.npy create mode 100644 margin_logs/step_0000127.npy create mode 100644 margin_logs/step_0000128.npy create mode 100644 margin_logs/step_0000129.npy create mode 100644 margin_logs/step_0000130.npy create mode 100644 margin_logs/step_0000131.npy create mode 100644 margin_logs/step_0000132.npy create mode 100644 margin_logs/step_0000133.npy create mode 100644 margin_logs/step_0000134.npy create mode 100644 margin_logs/step_0000135.npy create mode 100644 margin_logs/step_0000136.npy create mode 100644 margin_logs/step_0000137.npy create mode 100644 margin_logs/step_0000138.npy create mode 100644 margin_logs/step_0000139.npy create mode 100644 margin_logs/step_0000140.npy create mode 100644 margin_logs/step_0000141.npy create mode 100644 margin_logs/step_0000142.npy create mode 100644 margin_logs/step_0000143.npy create mode 100644 margin_logs/step_0000144.npy create mode 100644 margin_logs/step_0000145.npy create mode 100644 margin_logs/step_0000146.npy create mode 100644 margin_logs/step_0000147.npy create mode 100644 margin_logs/step_0000148.npy create mode 100644 margin_logs/step_0000149.npy create mode 100644 margin_logs/step_0000150.npy create mode 100644 margin_logs/step_0000151.npy create mode 100644 margin_logs/step_0000152.npy create mode 100644 margin_logs/step_0000153.npy create mode 100644 margin_logs/step_0000154.npy create mode 100644 margin_logs/step_0000155.npy create mode 100644 margin_logs/step_0000156.npy create mode 100644 margin_logs/step_0000157.npy create mode 100644 margin_logs/step_0000158.npy create mode 100644 margin_logs/step_0000159.npy create mode 100644 margin_logs/step_0000160.npy create mode 100644 margin_logs/step_0000161.npy create mode 100644 margin_logs/step_0000162.npy create mode 100644 margin_logs/step_0000163.npy create mode 100644 margin_logs/step_0000164.npy create mode 100644 margin_logs/step_0000165.npy create mode 100644 margin_logs/step_0000166.npy create mode 100644 margin_logs/step_0000167.npy create mode 100644 margin_logs/step_0000168.npy create mode 100644 margin_logs/step_0000169.npy create mode 100644 margin_logs/step_0000170.npy create mode 100644 margin_logs/step_0000171.npy create mode 100644 margin_logs/step_0000172.npy create mode 100644 margin_logs/step_0000173.npy create mode 100644 margin_logs/step_0000174.npy create mode 100644 margin_logs/step_0000175.npy create mode 100644 margin_logs/step_0000176.npy create mode 100644 margin_logs/step_0000177.npy create mode 100644 margin_logs/step_0000178.npy create mode 100644 margin_logs/step_0000179.npy create mode 100644 margin_logs/step_0000180.npy create mode 100644 margin_logs/step_0000181.npy create mode 100644 margin_logs/step_0000182.npy create mode 100644 margin_logs/step_0000183.npy create mode 100644 margin_logs/step_0000184.npy create mode 100644 margin_logs/step_0000185.npy create mode 100644 margin_logs/step_0000186.npy create mode 100644 margin_logs/step_0000187.npy create mode 100644 margin_logs/step_0000188.npy create mode 100644 margin_logs/step_0000189.npy create mode 100644 margin_logs/step_0000190.npy create mode 100644 margin_logs/step_0000191.npy create mode 100644 margin_logs/step_0000192.npy create mode 100644 margin_logs/step_0000193.npy create mode 100644 margin_logs/step_0000194.npy create mode 100644 margin_logs/step_0000195.npy create mode 100644 margin_logs/step_0000196.npy create mode 100644 margin_logs/step_0000197.npy create mode 100644 margin_logs/step_0000198.npy create mode 100644 margin_logs/step_0000199.npy create mode 100644 margin_logs/step_0000200.npy create mode 100644 margin_logs/step_0000201.npy create mode 100644 margin_logs/step_0000202.npy create mode 100644 margin_logs/step_0000203.npy create mode 100644 margin_logs/step_0000204.npy create mode 100644 margin_logs/step_0000205.npy create mode 100644 margin_logs/step_0000206.npy create mode 100644 margin_logs/step_0000207.npy create mode 100644 margin_logs/step_0000208.npy create mode 100644 margin_logs/step_0000209.npy create mode 100644 margin_logs/step_0000210.npy create mode 100644 margin_logs/step_0000211.npy create mode 100644 margin_logs/step_0000212.npy create mode 100644 margin_logs/step_0000213.npy create mode 100644 margin_logs/step_0000214.npy create mode 100644 margin_logs/step_0000215.npy create mode 100644 margin_logs/step_0000216.npy create mode 100644 margin_logs/step_0000217.npy create mode 100644 margin_logs/step_0000218.npy create mode 100644 margin_logs/step_0000219.npy create mode 100644 margin_logs/step_0000220.npy create mode 100644 margin_logs/step_0000221.npy create mode 100644 margin_logs/step_0000222.npy create mode 100644 margin_logs/step_0000223.npy create mode 100644 margin_logs/step_0000224.npy create mode 100644 margin_logs/step_0000225.npy create mode 100644 margin_logs/step_0000226.npy create mode 100644 margin_logs/step_0000227.npy create mode 100644 margin_logs/step_0000228.npy create mode 100644 margin_logs/step_0000229.npy create mode 100644 margin_logs/step_0000230.npy create mode 100644 margin_logs/step_0000231.npy create mode 100644 margin_logs/step_0000232.npy create mode 100644 margin_logs/step_0000233.npy create mode 100644 margin_logs/step_0000234.npy create mode 100644 margin_logs/step_0000235.npy create mode 100644 margin_logs/step_0000236.npy create mode 100644 margin_logs/step_0000237.npy create mode 100644 margin_logs/step_0000238.npy create mode 100644 margin_logs/step_0000239.npy create mode 100644 margin_logs/step_0000240.npy create mode 100644 margin_logs/step_0000241.npy create mode 100644 margin_logs/step_0000242.npy create mode 100644 margin_logs/step_0000243.npy create mode 100644 margin_logs/step_0000244.npy create mode 100644 margin_logs/step_0000245.npy create mode 100644 margin_logs/step_0000246.npy create mode 100644 margin_logs/step_0000247.npy create mode 100644 margin_logs/step_0000248.npy create mode 100644 margin_logs/step_0000249.npy create mode 100644 margin_logs/step_0000250.npy create mode 100644 margin_logs/step_0000251.npy create mode 100644 margin_logs/step_0000252.npy create mode 100644 margin_logs/step_0000253.npy create mode 100644 margin_logs/step_0000254.npy create mode 100644 margin_logs/step_0000255.npy create mode 100644 margin_logs/step_0000256.npy create mode 100644 margin_logs/step_0000257.npy create mode 100644 margin_logs/step_0000258.npy create mode 100644 margin_logs/step_0000259.npy create mode 100644 margin_logs/step_0000260.npy create mode 100644 margin_logs/step_0000261.npy create mode 100644 margin_logs/step_0000262.npy create mode 100644 margin_logs/step_0000263.npy create mode 100644 margin_logs/step_0000264.npy create mode 100644 margin_logs/step_0000265.npy create mode 100644 margin_logs/step_0000266.npy create mode 100644 margin_logs/step_0000267.npy create mode 100644 margin_logs/step_0000268.npy create mode 100644 margin_logs/step_0000269.npy create mode 100644 margin_logs/step_0000270.npy create mode 100644 margin_logs/step_0000271.npy create mode 100644 margin_logs/step_0000272.npy create mode 100644 margin_logs/step_0000273.npy create mode 100644 margin_logs/step_0000274.npy create mode 100644 margin_logs/step_0000275.npy create mode 100644 margin_logs/step_0000276.npy create mode 100644 margin_logs/step_0000277.npy create mode 100644 margin_logs/step_0000278.npy create mode 100644 margin_logs/step_0000279.npy create mode 100644 margin_logs/step_0000280.npy create mode 100644 margin_logs/step_0000281.npy create mode 100644 margin_logs/step_0000282.npy create mode 100644 margin_logs/step_0000283.npy create mode 100644 margin_logs/step_0000284.npy create mode 100644 margin_logs/step_0000285.npy create mode 100644 margin_logs/step_0000286.npy create mode 100644 margin_logs/step_0000287.npy create mode 100644 margin_logs/step_0000288.npy create mode 100644 margin_logs/step_0000289.npy create mode 100644 margin_logs/step_0000290.npy create mode 100644 margin_logs/step_0000291.npy create mode 100644 margin_logs/step_0000292.npy create mode 100644 margin_logs/step_0000293.npy create mode 100644 margin_logs/step_0000294.npy create mode 100644 margin_logs/step_0000295.npy create mode 100644 margin_logs/step_0000296.npy create mode 100644 margin_logs/step_0000297.npy create mode 100644 margin_logs/step_0000298.npy create mode 100644 margin_logs/step_0000299.npy create mode 100644 margin_logs/step_0000300.npy create mode 100644 margin_logs/step_0000301.npy create mode 100644 margin_logs/step_0000302.npy create mode 100644 margin_logs/step_0000303.npy create mode 100644 margin_logs/step_0000304.npy create mode 100644 margin_logs/step_0000305.npy create mode 100644 margin_logs/step_0000306.npy create mode 100644 margin_logs/step_0000307.npy create mode 100644 margin_logs/step_0000308.npy create mode 100644 margin_logs/step_0000309.npy create mode 100644 margin_logs/step_0000310.npy create mode 100644 margin_logs/step_0000311.npy create mode 100644 margin_logs/step_0000312.npy create mode 100644 margin_logs/step_0000313.npy create mode 100644 margin_logs/step_0000314.npy create mode 100644 margin_logs/step_0000315.npy create mode 100644 margin_logs/step_0000316.npy create mode 100644 margin_logs/step_0000317.npy create mode 100644 margin_logs/step_0000318.npy create mode 100644 margin_logs/step_0000319.npy create mode 100644 margin_logs/step_0000320.npy create mode 100644 margin_logs/step_0000321.npy create mode 100644 margin_logs/step_0000322.npy create mode 100644 margin_logs/step_0000323.npy create mode 100644 margin_logs/step_0000324.npy create mode 100644 margin_logs/step_0000325.npy create mode 100644 margin_logs/step_0000326.npy create mode 100644 margin_logs/step_0000327.npy create mode 100644 margin_logs/step_0000328.npy create mode 100644 margin_logs/step_0000329.npy create mode 100644 margin_logs/step_0000330.npy create mode 100644 margin_logs/step_0000331.npy create mode 100644 margin_logs/step_0000332.npy create mode 100644 margin_logs/step_0000333.npy create mode 100644 margin_logs/step_0000334.npy create mode 100644 margin_logs/step_0000335.npy create mode 100644 margin_logs/step_0000336.npy create mode 100644 margin_logs/step_0000337.npy create mode 100644 margin_logs/step_0000338.npy create mode 100644 margin_logs/step_0000339.npy create mode 100644 margin_logs/step_0000340.npy create mode 100644 margin_logs/step_0000341.npy create mode 100644 margin_logs/step_0000342.npy create mode 100644 margin_logs/step_0000343.npy create mode 100644 margin_logs/step_0000344.npy create mode 100644 margin_logs/step_0000345.npy create mode 100644 margin_logs/step_0000346.npy create mode 100644 margin_logs/step_0000347.npy create mode 100644 margin_logs/step_0000348.npy create mode 100644 margin_logs/step_0000349.npy create mode 100644 margin_logs/step_0000350.npy create mode 100644 margin_logs/step_0000351.npy create mode 100644 margin_logs/step_0000352.npy create mode 100644 margin_logs/step_0000353.npy create mode 100644 margin_logs/step_0000354.npy create mode 100644 margin_logs/step_0000355.npy create mode 100644 margin_logs/step_0000356.npy create mode 100644 margin_logs/step_0000357.npy create mode 100644 margin_logs/step_0000358.npy create mode 100644 margin_logs/step_0000359.npy create mode 100644 margin_logs/step_0000360.npy create mode 100644 margin_logs/step_0000361.npy create mode 100644 margin_logs/step_0000362.npy create mode 100644 margin_logs/step_0000363.npy create mode 100644 margin_logs/step_0000364.npy create mode 100644 margin_logs/step_0000365.npy create mode 100644 margin_logs/step_0000366.npy create mode 100644 margin_logs/step_0000367.npy create mode 100644 margin_logs/step_0000368.npy create mode 100644 margin_logs/step_0000369.npy create mode 100644 margin_logs/step_0000370.npy create mode 100644 margin_logs/step_0000371.npy create mode 100644 margin_logs/step_0000372.npy create mode 100644 margin_logs/step_0000373.npy create mode 100644 margin_logs/step_0000374.npy create mode 100644 margin_logs/step_0000375.npy create mode 100644 margin_logs/step_0000376.npy create mode 100644 margin_logs/step_0000377.npy create mode 100644 margin_logs/step_0000378.npy create mode 100644 margin_logs/step_0000379.npy create mode 100644 margin_logs/step_0000380.npy create mode 100644 margin_logs/step_0000381.npy create mode 100644 margin_logs/step_0000382.npy create mode 100644 margin_logs/step_0000383.npy create mode 100644 margin_logs/step_0000384.npy create mode 100644 margin_logs/step_0000385.npy create mode 100644 margin_logs/step_0000386.npy create mode 100644 margin_logs/step_0000387.npy create mode 100644 margin_logs/step_0000388.npy create mode 100644 margin_logs/step_0000389.npy create mode 100644 margin_logs/step_0000390.npy create mode 100644 margin_logs/step_0000391.npy create mode 100644 margin_logs/step_0000392.npy create mode 100644 margin_logs/step_0000393.npy create mode 100644 margin_logs/step_0000394.npy create mode 100644 margin_logs/step_0000395.npy create mode 100644 margin_logs/step_0000396.npy create mode 100644 margin_logs/step_0000397.npy create mode 100644 margin_logs/step_0000398.npy create mode 100644 margin_logs/step_0000399.npy create mode 100644 margin_logs/step_0000400.npy create mode 100644 margin_logs/step_0000401.npy create mode 100644 margin_logs/step_0000402.npy create mode 100644 margin_logs/step_0000403.npy create mode 100644 margin_logs/step_0000404.npy create mode 100644 margin_logs/step_0000405.npy create mode 100644 margin_logs/step_0000406.npy create mode 100644 margin_logs/step_0000407.npy create mode 100644 margin_logs/step_0000408.npy create mode 100644 margin_logs/step_0000409.npy create mode 100644 margin_logs/step_0000410.npy create mode 100644 margin_logs/step_0000411.npy create mode 100644 margin_logs/step_0000412.npy create mode 100644 margin_logs/step_0000413.npy create mode 100644 margin_logs/step_0000414.npy create mode 100644 margin_logs/step_0000415.npy create mode 100644 margin_logs/step_0000416.npy create mode 100644 margin_logs/step_0000417.npy create mode 100644 margin_logs/step_0000418.npy create mode 100644 margin_logs/step_0000419.npy create mode 100644 margin_logs/step_0000420.npy create mode 100644 margin_logs/step_0000421.npy create mode 100644 margin_logs/step_0000422.npy create mode 100644 margin_logs/step_0000423.npy create mode 100644 margin_logs/step_0000424.npy create mode 100644 margin_logs/step_0000425.npy create mode 100644 margin_logs/step_0000426.npy create mode 100644 margin_logs/step_0000427.npy create mode 100644 margin_logs/step_0000428.npy create mode 100644 margin_logs/step_0000429.npy create mode 100644 margin_logs/step_0000430.npy create mode 100644 margin_logs/step_0000431.npy create mode 100644 margin_logs/step_0000432.npy create mode 100644 margin_logs/step_0000433.npy create mode 100644 margin_logs/step_0000434.npy create mode 100644 margin_logs/step_0000435.npy create mode 100644 margin_logs/step_0000436.npy create mode 100644 margin_logs/step_0000437.npy create mode 100644 margin_logs/step_0000438.npy create mode 100644 margin_logs/step_0000439.npy create mode 100644 margin_logs/step_0000440.npy create mode 100644 margin_logs/step_0000441.npy create mode 100644 margin_logs/step_0000442.npy create mode 100644 margin_logs/step_0000443.npy create mode 100644 margin_logs/step_0000444.npy create mode 100644 margin_logs/step_0000445.npy create mode 100644 margin_logs/step_0000446.npy create mode 100644 margin_logs/step_0000447.npy create mode 100644 margin_logs/step_0000448.npy create mode 100644 margin_logs/step_0000449.npy create mode 100644 margin_logs/step_0000450.npy create mode 100644 margin_logs/step_0000451.npy create mode 100644 margin_logs/step_0000452.npy create mode 100644 margin_logs/step_0000453.npy create mode 100644 margin_logs/step_0000454.npy create mode 100644 margin_logs/step_0000455.npy create mode 100644 margin_logs/step_0000456.npy create mode 100644 margin_logs/step_0000457.npy create mode 100644 margin_logs/step_0000458.npy create mode 100644 margin_logs/step_0000459.npy create mode 100644 margin_logs/step_0000460.npy create mode 100644 margin_logs/step_0000461.npy create mode 100644 margin_logs/step_0000462.npy create mode 100644 margin_logs/step_0000463.npy create mode 100644 margin_logs/step_0000464.npy create mode 100644 margin_logs/step_0000465.npy create mode 100644 margin_logs/step_0000466.npy create mode 100644 margin_logs/step_0000467.npy create mode 100644 margin_logs/step_0000468.npy create mode 100644 margin_logs/step_0000469.npy create mode 100644 margin_logs/step_0000470.npy create mode 100644 margin_logs/step_0000471.npy create mode 100644 margin_logs/step_0000472.npy create mode 100644 margin_logs/step_0000473.npy create mode 100644 margin_logs/step_0000474.npy create mode 100644 margin_logs/step_0000475.npy create mode 100644 margin_logs/step_0000476.npy create mode 100644 margin_logs/step_0000477.npy create mode 100644 margin_logs/step_0000478.npy create mode 100644 margin_logs/step_0000479.npy create mode 100644 margin_logs/step_0000480.npy create mode 100644 margin_logs/step_0000481.npy create mode 100644 margin_logs/step_0000482.npy create mode 100644 margin_logs/step_0000483.npy create mode 100644 margin_logs/step_0000484.npy create mode 100644 margin_logs/step_0000485.npy create mode 100644 margin_logs/step_0000486.npy create mode 100644 margin_logs/step_0000487.npy create mode 100644 margin_logs/step_0000488.npy create mode 100644 margin_logs/step_0000489.npy create mode 100644 margin_logs/step_0000490.npy create mode 100644 margin_logs/step_0000491.npy create mode 100644 margin_logs/step_0000492.npy create mode 100644 margin_logs/step_0000493.npy create mode 100644 margin_logs/step_0000494.npy create mode 100644 margin_logs/step_0000495.npy create mode 100644 margin_logs/step_0000496.npy create mode 100644 margin_logs/step_0000497.npy create mode 100644 margin_logs/step_0000498.npy create mode 100644 margin_logs/step_0000499.npy create mode 100644 margin_logs/step_0000500.npy create mode 100644 margin_logs/step_0000501.npy create mode 100644 margin_logs/step_0000502.npy create mode 100644 margin_logs/step_0000503.npy create mode 100644 margin_logs/step_0000504.npy create mode 100644 margin_logs/step_0000505.npy create mode 100644 margin_logs/step_0000506.npy create mode 100644 margin_logs/step_0000507.npy create mode 100644 margin_logs/step_0000508.npy create mode 100644 margin_logs/step_0000509.npy create mode 100644 margin_logs/step_0000510.npy create mode 100644 margin_logs/step_0000511.npy create mode 100644 margin_logs/step_0000512.npy create mode 100644 margin_logs/step_0000513.npy create mode 100644 margin_logs/step_0000514.npy create mode 100644 margin_logs/step_0000515.npy create mode 100644 margin_logs/step_0000516.npy create mode 100644 margin_logs/step_0000517.npy create mode 100644 margin_logs/step_0000518.npy create mode 100644 margin_logs/step_0000519.npy create mode 100644 margin_logs/step_0000520.npy create mode 100644 margin_logs/step_0000521.npy create mode 100644 margin_logs/step_0000522.npy create mode 100644 margin_logs/step_0000523.npy create mode 100644 margin_logs/step_0000524.npy create mode 100644 margin_logs/step_0000525.npy create mode 100644 margin_logs/step_0000526.npy create mode 100644 margin_logs/step_0000527.npy create mode 100644 margin_logs/step_0000528.npy create mode 100644 margin_logs/step_0000529.npy create mode 100644 margin_logs/step_0000530.npy create mode 100644 margin_logs/step_0000531.npy create mode 100644 margin_logs/step_0000532.npy create mode 100644 margin_logs/step_0000533.npy create mode 100644 margin_logs/step_0000534.npy create mode 100644 margin_logs/step_0000535.npy create mode 100644 margin_logs/step_0000536.npy create mode 100644 margin_logs/step_0000537.npy create mode 100644 margin_logs/step_0000538.npy create mode 100644 margin_logs/step_0000539.npy create mode 100644 margin_logs/step_0000540.npy create mode 100644 margin_logs/step_0000541.npy create mode 100644 margin_logs/step_0000542.npy create mode 100644 margin_logs/step_0000543.npy create mode 100644 margin_logs/step_0000544.npy create mode 100644 margin_logs/step_0000545.npy create mode 100644 margin_logs/step_0000546.npy create mode 100644 margin_logs/step_0000547.npy create mode 100644 margin_logs/step_0000548.npy create mode 100644 margin_logs/step_0000549.npy create mode 100644 margin_logs/step_0000550.npy create mode 100644 margin_logs/step_0000551.npy create mode 100644 margin_logs/step_0000552.npy create mode 100644 margin_logs/step_0000553.npy create mode 100644 margin_logs/step_0000554.npy create mode 100644 margin_logs/step_0000555.npy create mode 100644 margin_logs/step_0000556.npy create mode 100644 margin_logs/step_0000557.npy create mode 100644 margin_logs/step_0000558.npy create mode 100644 margin_logs/step_0000559.npy create mode 100644 margin_logs/step_0000560.npy create mode 100644 margin_logs/step_0000561.npy create mode 100644 margin_logs/step_0000562.npy create mode 100644 margin_logs/step_0000563.npy create mode 100644 margin_logs/step_0000564.npy create mode 100644 margin_logs/step_0000565.npy create mode 100644 margin_logs/step_0000566.npy create mode 100644 margin_logs/step_0000567.npy create mode 100644 margin_logs/step_0000568.npy create mode 100644 margin_logs/step_0000569.npy create mode 100644 margin_logs/step_0000570.npy create mode 100644 margin_logs/step_0000571.npy create mode 100644 margin_logs/step_0000572.npy create mode 100644 margin_logs/step_0000573.npy create mode 100644 margin_logs/step_0000574.npy create mode 100644 margin_logs/step_0000575.npy create mode 100644 margin_logs/step_0000576.npy create mode 100644 margin_logs/step_0000577.npy create mode 100644 margin_logs/step_0000578.npy create mode 100644 margin_logs/step_0000579.npy create mode 100644 margin_logs/step_0000580.npy create mode 100644 margin_logs/step_0000581.npy create mode 100644 margin_logs/step_0000582.npy create mode 100644 margin_logs/step_0000583.npy create mode 100644 margin_logs/step_0000584.npy create mode 100644 margin_logs/step_0000585.npy create mode 100644 margin_logs/step_0000586.npy create mode 100644 margin_logs/step_0000587.npy create mode 100644 margin_logs/step_0000588.npy create mode 100644 margin_logs/step_0000589.npy create mode 100644 margin_logs/step_0000590.npy create mode 100644 margin_logs/step_0000591.npy create mode 100644 margin_logs/step_0000592.npy create mode 100644 margin_logs/step_0000593.npy create mode 100644 margin_logs/step_0000594.npy create mode 100644 margin_logs/step_0000595.npy create mode 100644 margin_logs/step_0000596.npy create mode 100644 margin_logs/step_0000597.npy create mode 100644 margin_logs/step_0000598.npy create mode 100644 margin_logs/step_0000599.npy create mode 100644 margin_logs/step_0000600.npy create mode 100644 margin_logs/step_0000601.npy create mode 100644 margin_logs/step_0000602.npy create mode 100644 margin_logs/step_0000603.npy create mode 100644 margin_logs/step_0000604.npy create mode 100644 margin_logs/step_0000605.npy create mode 100644 margin_logs/step_0000606.npy create mode 100644 margin_logs/step_0000607.npy create mode 100644 margin_logs/step_0000608.npy create mode 100644 margin_logs/step_0000609.npy create mode 100644 margin_logs/step_0000610.npy create mode 100644 margin_logs/step_0000611.npy create mode 100644 margin_logs/step_0000612.npy create mode 100644 margin_logs/step_0000613.npy create mode 100644 margin_logs/step_0000614.npy create mode 100644 margin_logs/step_0000615.npy create mode 100644 margin_logs/step_0000616.npy create mode 100644 margin_logs/step_0000617.npy create mode 100644 margin_logs/step_0000618.npy create mode 100644 margin_logs/step_0000619.npy create mode 100644 margin_logs/step_0000620.npy create mode 100644 margin_logs/step_0000621.npy create mode 100644 margin_logs/step_0000622.npy create mode 100644 margin_logs/step_0000623.npy create mode 100644 margin_logs/step_0000624.npy create mode 100644 margin_logs/step_0000625.npy create mode 100644 margin_logs/step_0000626.npy create mode 100644 margin_logs/step_0000627.npy create mode 100644 margin_logs/step_0000628.npy create mode 100644 margin_logs/step_0000629.npy create mode 100644 margin_logs/step_0000630.npy create mode 100644 margin_logs/step_0000631.npy create mode 100644 margin_logs/step_0000632.npy create mode 100644 margin_logs/step_0000633.npy create mode 100644 margin_logs/step_0000634.npy create mode 100644 margin_logs/step_0000635.npy create mode 100644 margin_logs/step_0000636.npy create mode 100644 margin_logs/step_0000637.npy create mode 100644 margin_logs/step_0000638.npy create mode 100644 margin_logs/step_0000639.npy create mode 100644 margin_logs/step_0000640.npy create mode 100644 margin_logs/step_0000641.npy create mode 100644 margin_logs/step_0000642.npy create mode 100644 margin_logs/step_0000643.npy create mode 100644 margin_logs/step_0000644.npy create mode 100644 margin_logs/step_0000645.npy create mode 100644 margin_logs/step_0000646.npy create mode 100644 margin_logs/step_0000647.npy create mode 100644 margin_logs/step_0000648.npy create mode 100644 margin_logs/step_0000649.npy create mode 100644 margin_logs/step_0000650.npy create mode 100644 margin_logs/step_0000651.npy create mode 100644 margin_logs/step_0000652.npy create mode 100644 margin_logs/step_0000653.npy create mode 100644 margin_logs/step_0000654.npy create mode 100644 margin_logs/step_0000655.npy create mode 100644 margin_logs/step_0000656.npy create mode 100644 margin_logs/step_0000657.npy create mode 100644 margin_logs/step_0000658.npy create mode 100644 margin_logs/step_0000659.npy create mode 100644 margin_logs/step_0000660.npy create mode 100644 margin_logs/step_0000661.npy create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..568e1db --- /dev/null +++ b/README.md @@ -0,0 +1,76 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-harmless-4xh200 +tags: +- alignment-handbook +- margin-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-margin-dpo-hh-harmless + results: [] +--- + + + +# llama-3-8b-base-margin-dpo-hh-harmless + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-harmless-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-harmless-4xh200) on the Anthropic/hh-rlhf dataset. +It achieves the following results on the evaluation set: +- Loss: 0.5348 +- Margin Dpo/margin Mean: 60.1785 +- Margin Dpo/margin Std: 94.6210 +- Logps/chosen: -211.5150 +- Logps/rejected: -274.1422 +- Logps/ref Chosen: -75.3065 +- Logps/ref Rejected: -77.7551 +- Logits/chosen: 0.8960 +- Logits/rejected: 0.8635 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Margin Dpo/margin Mean | Margin Dpo/margin Std | Logps/chosen | Logps/rejected | Logps/ref Chosen | Logps/ref Rejected | Logits/chosen | Logits/rejected | +|:-------------:|:------:|:----:|:---------------:|:----------------------:|:---------------------:|:------------:|:--------------:|:----------------:|:------------------:|:-------------:|:---------------:| +| 1.1015 | 0.4535 | 300 | 0.5574 | 48.6165 | 83.3105 | -200.8800 | -251.9452 | -75.3065 | -77.7551 | 0.9108 | 0.8757 | +| 1.1391 | 0.9070 | 600 | 0.5348 | 60.1785 | 94.6210 | -211.5150 | -274.1422 | -75.3065 | -77.7551 | 0.8960 | 0.8635 | + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..77b1160 --- /dev/null +++ b/all_results.json @@ -0,0 +1,22 @@ +{ + "epoch": 0.999244142101285, + "eval_logits/chosen": 0.900075376033783, + "eval_logits/rejected": 0.867337167263031, + "eval_logps/chosen": -211.55142211914062, + "eval_logps/ref_chosen": -75.30646514892578, + "eval_logps/ref_rejected": -77.75511932373047, + "eval_logps/rejected": -274.3049621582031, + "eval_loss": 0.5347095727920532, + "eval_margin_dpo/margin_mean": 60.304866790771484, + "eval_margin_dpo/margin_std": 94.75297546386719, + "eval_runtime": 37.4264, + "eval_samples": 2303, + "eval_samples_per_second": 61.534, + "eval_steps_per_second": 1.924, + "total_flos": 0.0, + "train_loss": 1.1443162902220294, + "train_runtime": 1702.0491, + "train_samples": 42336, + "train_samples_per_second": 24.874, + "train_steps_per_second": 0.388 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..e766abd --- /dev/null +++ b/eval_results.json @@ -0,0 +1,16 @@ +{ + "epoch": 0.999244142101285, + "eval_logits/chosen": 0.900075376033783, + "eval_logits/rejected": 0.867337167263031, + "eval_logps/chosen": -211.55142211914062, + "eval_logps/ref_chosen": -75.30646514892578, + "eval_logps/ref_rejected": -77.75511932373047, + "eval_logps/rejected": -274.3049621582031, + "eval_loss": 0.5347095727920532, + "eval_margin_dpo/margin_mean": 60.304866790771484, + "eval_margin_dpo/margin_std": 94.75297546386719, + "eval_runtime": 37.4264, + "eval_samples": 2303, + "eval_samples_per_second": 61.534, + "eval_steps_per_second": 1.924 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..824d40d --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,661 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.0013527870178222656, "std": 0.2564818859100342, "min": -0.736083984375, "p10": -0.3432229995727539, "median": 0.038166046142578125, "p90": 0.29227676391601565, "max": 0.645111083984375, "pos_frac": 0.578125, "sample": [0.1120758056640625, 0.12518310546875, 0.31621551513671875, 0.13765716552734375, -0.12592506408691406, 0.23141098022460938, -0.21887779235839844, 0.21950721740722656, 0.04480743408203125, 0.020877838134765625, 0.0570220947265625, 0.058269500732421875, -0.4338226318359375, -0.030628204345703125, 0.645111083984375, -0.395477294921875, 0.09050941467285156, 0.0007190704345703125, -0.34615325927734375, 0.016077041625976562, -0.33638572692871094, 0.293853759765625, 0.17610931396484375, 0.22386932373046875, 0.21470260620117188, -0.08536529541015625, 0.0907745361328125, -0.03816986083984375, 0.39190101623535156, 0.16336441040039062, 0.08024787902832031, -0.031158447265625, 0.08477020263671875, 0.002460479736328125, -0.242034912109375, 0.07232666015625, -0.60186767578125, 0.20531463623046875, 0.155731201171875, -0.14299774169921875, -0.25698089599609375, 0.12331962585449219, -0.26497650146484375, 0.15140533447265625, -0.0920257568359375, -0.18599319458007812, 0.19028091430664062, 0.2496490478515625, 0.42162322998046875, 0.17873382568359375, -0.1525421142578125, -0.4972076416015625, 0.32010650634765625, -0.10365867614746094, -0.233795166015625, -0.19828224182128906, -0.4018898010253906, -0.13407135009765625, -0.09596633911132812, 0.031524658203125, 0.28859710693359375, -0.192962646484375, -0.736083984375, 0.3026123046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000001.npy"} +{"epoch": 0.0015117157974300832, "step": 2, "batch_size": 64, "mean": 0.03744968771934509, "std": 0.2875921130180359, "min": -0.7604827880859375, "p10": -0.2812448501586914, "median": 0.03963661193847656, "p90": 0.3654294967651367, "max": 0.8134727478027344, "pos_frac": 0.5625, "sample": [0.30594635009765625, -0.24289894104003906, -0.11509323120117188, -0.13417816162109375, 0.06942558288574219, 0.36568641662597656, -0.14640045166015625, 0.1497650146484375, 0.30261993408203125, 0.10124588012695312, 0.13028717041015625, -0.0031890869140625, 0.0361480712890625, 0.5662612915039062, 0.09694290161132812, -0.01091766357421875, 0.1128997802734375, 0.0411834716796875, -0.21860504150390625, -0.1236419677734375, -0.08812713623046875, 0.10360527038574219, 0.1790008544921875, -0.5114288330078125, 0.3056755065917969, -0.14553451538085938, 0.28168487548828125, 0.26990509033203125, 0.1686878204345703, 0.038089752197265625, 0.19541168212890625, -0.10783576965332031, -0.2644004821777344, -0.19707489013671875, -0.140472412109375, 0.1349811553955078, 0.19672012329101562, -0.0714111328125, 0.53369140625, 0.1271820068359375, 0.8134727478027344, 0.2990264892578125, -0.7604827880859375, -0.08274078369140625, 0.05890846252441406, 0.029361724853515625, 0.4510040283203125, -0.1599273681640625, -0.29346656799316406, 0.10005569458007812, -0.27509117126464844, -0.1937713623046875, 0.19167327880859375, 0.28173065185546875, -0.09406471252441406, -0.3380699157714844, -0.29186248779296875, 0.36483001708984375, 0.009979248046875, 0.44391632080078125, -0.126708984375, -0.6550216674804688, 0.6160736083984375, -0.28388214111328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000002.npy"} +{"epoch": 0.0030234315948601664, "step": 3, "batch_size": 64, "mean": -0.02706262469291687, "std": 0.3446815311908722, "min": -0.8575935363769531, "p10": -0.43239173889160154, "median": -0.013734817504882812, "p90": 0.408241081237793, "max": 0.7957000732421875, "pos_frac": 0.484375, "sample": [0.14762496948242188, -0.015893936157226562, -0.4559745788574219, -0.697296142578125, 0.3914794921875, 0.2433319091796875, -0.19615554809570312, 0.015438079833984375, 0.581146240234375, 0.01549530029296875, -0.26671600341796875, -0.1466064453125, -0.275848388671875, 0.056865692138671875, 0.192718505859375, -0.0487518310546875, -0.26129150390625, 0.014984130859375, 0.30069923400878906, -0.8566207885742188, -0.7052154541015625, -0.192291259765625, -0.0136260986328125, 0.10274887084960938, -0.19922637939453125, 0.3939323425292969, 0.4426422119140625, 0.621337890625, -0.07215118408203125, -0.0175933837890625, 0.27059364318847656, -0.4307975769042969, -0.036224365234375, 0.01605224609375, -0.08233833312988281, -0.27430152893066406, 0.01399993896484375, 0.0142059326171875, -0.08812713623046875, 0.6147537231445312, 0.374237060546875, -0.21138763427734375, 0.229827880859375, 0.7957000732421875, 0.4366798400878906, -0.2652626037597656, 0.078460693359375, 0.41437339782714844, 0.11383056640625, -0.6709671020507812, -0.8575935363769531, 0.1324005126953125, -0.013843536376953125, 0.03302955627441406, -0.18176651000976562, 0.00586700439453125, 0.1981964111328125, -0.18857574462890625, -0.0420989990234375, -0.433074951171875, -0.29685211181640625, 0.18144989013671875, -0.40134429931640625, -0.28029632568359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000003.npy"} +{"epoch": 0.0045351473922902496, "step": 4, "batch_size": 64, "mean": -0.007019072771072388, "std": 0.2986670434474945, "min": -0.5875244140625, "p10": -0.3667743682861328, "median": -0.016908645629882812, "p90": 0.36061515808105477, "max": 0.7291336059570312, "pos_frac": 0.484375, "sample": [-0.328521728515625, 0.7200164794921875, 0.0731048583984375, -0.0040435791015625, -0.0699462890625, -0.2946929931640625, 0.0059185028076171875, 0.24393081665039062, 0.1821441650390625, 0.7291336059570312, -0.18677520751953125, 0.3713645935058594, 0.46734619140625, -0.2777862548828125, -0.2972526550292969, -0.428741455078125, 0.45037841796875, -0.237457275390625, 0.3110198974609375, -0.33592987060546875, 0.09278106689453125, -0.11517715454101562, 0.041473388671875, -0.137908935546875, -0.26422882080078125, -0.138214111328125, 0.18069076538085938, 0.2823467254638672, 0.14845657348632812, 0.0137786865234375, 0.31146240234375, 0.13599395751953125, 0.04340362548828125, -0.16207122802734375, -0.56890869140625, 0.6466827392578125, -0.238677978515625, -0.19162750244140625, -0.455230712890625, -0.4306793212890625, 0.07817840576171875, -0.05598258972167969, 0.1644134521484375, -0.3799934387207031, -0.15818023681640625, -0.1722259521484375, -0.14470481872558594, 0.017526626586914062, -0.032989501953125, -0.029773712158203125, 0.33553314208984375, -0.4234771728515625, -0.11665725708007812, 0.040607452392578125, -0.5875244140625, 0.304443359375, 0.0194091796875, 0.5542335510253906, -0.15218353271484375, -0.15783309936523438, -0.2893218994140625, 0.09674072265625, 0.1428375244140625, 0.21014785766601562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000004.npy"} +{"epoch": 0.006046863189720333, "step": 5, "batch_size": 64, "mean": 0.0004509389400482178, "std": 0.33185315132141113, "min": -1.0544281005859375, "p10": -0.3405084609985351, "median": 0.03404998779296875, "p90": 0.2703605651855469, "max": 1.088134765625, "pos_frac": 0.578125, "sample": [-0.19089889526367188, 0.00637054443359375, 0.119476318359375, 0.14837646484375, -0.4174957275390625, 0.1625518798828125, 0.18166732788085938, 0.160980224609375, 0.43889617919921875, 0.2558746337890625, 0.07808685302734375, 0.266510009765625, 0.011409759521484375, 0.24654197692871094, -0.6805534362792969, -0.054901123046875, 0.26181793212890625, -0.10700225830078125, -1.0544281005859375, -0.13399124145507812, -0.02956390380859375, 0.07042503356933594, 0.14293289184570312, -0.091583251953125, -0.982666015625, 0.697021484375, -0.25603485107421875, -0.15493202209472656, 0.1404876708984375, -0.09030532836914062, 0.3459014892578125, 0.2750701904296875, -0.7388229370117188, 0.22727012634277344, -0.026092529296875, -0.220245361328125, -0.2645854949951172, 0.014499664306640625, 0.12965965270996094, 0.03710174560546875, -0.013813018798828125, -0.373046875, 0.00984954833984375, 0.1828155517578125, 0.29988861083984375, -0.04519462585449219, 0.1583404541015625, 0.215850830078125, 0.27201080322265625, 0.03099822998046875, 0.11893081665039062, 0.08972930908203125, 0.04695892333984375, 1.088134765625, 0.052703857421875, 0.09865570068359375, -0.5457420349121094, -0.08452987670898438, -0.05063438415527344, -0.14504432678222656, -0.1829700469970703, -0.1717681884765625, -0.17935752868652344, 0.23126602172851562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000005.npy"} +{"epoch": 0.007558578987150416, "step": 6, "batch_size": 64, "mean": -0.07278254628181458, "std": 0.32753172516822815, "min": -0.8779296875, "p10": -0.4224882125854492, "median": -0.09411811828613281, "p90": 0.29323444366455087, "max": 0.936767578125, "pos_frac": 0.359375, "sample": [-0.39754486083984375, -0.06403732299804688, -0.1007843017578125, 0.17398834228515625, -0.2674674987792969, -0.021478652954101562, -0.17603492736816406, -0.2545509338378906, -0.19429779052734375, -0.118682861328125, 0.10923385620117188, -0.425323486328125, -0.6517448425292969, -0.22415924072265625, 0.569671630859375, -0.8779296875, 0.2687225341796875, 0.571197509765625, 0.07745361328125, -0.45178985595703125, -0.3032989501953125, -0.09978485107421875, -0.03862571716308594, 0.2706336975097656, -0.1270751953125, -0.2432861328125, -0.019012451171875, 0.145721435546875, -0.41587257385253906, 0.4837169647216797, 0.09996604919433594, 0.22754669189453125, -0.0668182373046875, 0.24387359619140625, -0.3297271728515625, -0.178802490234375, -0.44724273681640625, -0.35831451416015625, -0.3880958557128906, 0.02870941162109375, -0.07257843017578125, -0.2409839630126953, -0.09039688110351562, 0.044857025146484375, -0.161895751953125, 0.45211029052734375, 0.936767578125, 0.27411651611328125, -0.3943977355957031, 0.029468536376953125, 0.15431976318359375, 0.40001678466796875, -0.588836669921875, -0.33201026916503906, -0.21604156494140625, 0.083526611328125, -0.06256103515625, -0.09893798828125, -0.813720703125, -0.2781982421875, -0.09783935546875, 0.30142784118652344, -0.05513572692871094, 0.14018821716308594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000006.npy"} +{"epoch": 0.009070294784580499, "step": 7, "batch_size": 64, "mean": -0.014032810926437378, "std": 0.2737120985984802, "min": -0.5912322998046875, "p10": -0.367327880859375, "median": -0.0020475387573242188, "p90": 0.3407440185546875, "max": 0.7162704467773438, "pos_frac": 0.453125, "sample": [-0.030429840087890625, -0.4172859191894531, -0.41265869140625, -0.016450881958007812, 0.05595588684082031, 0.1972179412841797, -0.1763324737548828, -0.0903167724609375, 0.36717987060546875, 0.7162704467773438, 0.1137847900390625, 0.11435508728027344, 0.051197052001953125, 0.13887786865234375, -0.0018978118896484375, -0.31720733642578125, 0.2994041442871094, -0.00910186767578125, 0.0906829833984375, -0.459808349609375, 0.47783470153808594, 0.39994049072265625, -0.020294189453125, 0.3137969970703125, 0.1997833251953125, -0.368408203125, -0.43369293212890625, -0.00139617919921875, 0.115447998046875, 0.1522235870361328, -0.36480712890625, 0.5878219604492188, -0.33290863037109375, 0.16619110107421875, -0.27696990966796875, 0.34487152099609375, -0.07723617553710938, 0.01689910888671875, 0.07933616638183594, -0.2959728240966797, -0.26609039306640625, -0.3137779235839844, -0.002197265625, -0.1734771728515625, -0.4299468994140625, 0.25688934326171875, -0.5912322998046875, -0.07060432434082031, 0.3430328369140625, 0.2506256103515625, 0.030725479125976562, -0.09586715698242188, 0.17388916015625, -0.23558425903320312, -0.072967529296875, -0.3023834228515625, -0.27913665771484375, -0.18144989013671875, 0.1211395263671875, 0.3354034423828125, -0.0005035400390625, -0.100921630859375, 0.02854156494140625, -0.21810340881347656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000007.npy"} +{"epoch": 0.010582010582010581, "step": 8, "batch_size": 64, "mean": -0.007560908794403076, "std": 0.2669219970703125, "min": -0.5561065673828125, "p10": -0.34194374084472656, "median": -0.027618408203125, "p90": 0.36644973754882815, "max": 0.6739578247070312, "pos_frac": 0.40625, "sample": [-0.0048694610595703125, -0.1565704345703125, -0.509490966796875, -0.04717254638671875, -0.5561065673828125, 0.0374755859375, -0.00545501708984375, 0.44992828369140625, -0.1876659393310547, 0.1771087646484375, -0.1973114013671875, 0.42600250244140625, -0.12958335876464844, -0.2839775085449219, 0.36727142333984375, 0.6739578247070312, 0.411163330078125, 0.04642295837402344, -0.1110687255859375, -0.0114593505859375, 0.012889862060546875, -0.3507843017578125, -0.5282821655273438, -0.2315673828125, -0.20064163208007812, -0.19512557983398438, 0.10354995727539062, -0.05286407470703125, 0.21923446655273438, -0.08149337768554688, 0.364532470703125, -0.0108795166015625, -0.09438514709472656, 0.03603363037109375, 0.0770111083984375, 0.0947265625, -0.02642822265625, 0.18836212158203125, 0.5451507568359375, -0.3834342956542969, 0.2935791015625, -0.22661209106445312, -0.068511962890625, -0.07680511474609375, 0.1577911376953125, -0.01497650146484375, 0.2579174041748047, -0.3155937194824219, -0.3901824951171875, -0.02880859375, -0.11685562133789062, 0.45489501953125, -0.4241600036621094, -0.15615463256835938, 0.155975341796875, 0.2581672668457031, 0.06581497192382812, -0.11273193359375, 0.2654228210449219, -0.3213157653808594, -0.10963821411132812, 0.34711456298828125, -0.0914764404296875, -0.16095733642578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000008.npy"} +{"epoch": 0.012093726379440665, "step": 9, "batch_size": 64, "mean": 0.00360676646232605, "std": 0.33720654249191284, "min": -1.0557861328125, "p10": -0.4605937957763671, "median": 0.01922607421875, "p90": 0.33740444183349616, "max": 0.8075027465820312, "pos_frac": 0.5625, "sample": [0.24054718017578125, -0.37979888916015625, -0.11317825317382812, 0.0189666748046875, -0.2051525115966797, -0.5683135986328125, 0.22642135620117188, -0.3798789978027344, -0.3368072509765625, -1.0557861328125, 0.0194854736328125, -0.06376457214355469, 0.8075027465820312, 0.015605926513671875, 0.067474365234375, 0.004180908203125, -0.6521682739257812, -0.038604736328125, 0.002208709716796875, 0.31670570373535156, 0.059051513671875, 0.13554000854492188, 0.43206787109375, 0.17363739013671875, -0.17594146728515625, 0.20375823974609375, 0.19673728942871094, 0.13177490234375, 0.04344749450683594, -0.23775863647460938, -0.49518585205078125, -0.7006378173828125, 0.7113113403320312, 0.27783203125, -0.06490516662597656, -0.23614883422851562, 0.1600189208984375, -0.23134613037109375, -0.72088623046875, 0.085662841796875, 0.2088470458984375, -0.5134296417236328, -0.0295257568359375, -0.0164337158203125, -0.0046596527099609375, 0.14000701904296875, 0.5427398681640625, 0.1670684814453125, 0.06237983703613281, -0.0244140625, 0.21481704711914062, -0.023529052734375, 0.08557319641113281, 0.036407470703125, 0.5387954711914062, 0.665008544921875, -0.16896629333496094, 0.0884552001953125, 0.34627532958984375, 0.2710762023925781, -0.015533447265625, -0.0088958740234375, -0.0316619873046875, 0.02675628662109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000009.npy"} +{"epoch": 0.013605442176870748, "step": 10, "batch_size": 64, "mean": -0.0313323438167572, "std": 0.40687647461891174, "min": -1.027374267578125, "p10": -0.47310485839843747, "median": -0.0535125732421875, "p90": 0.38631591796875003, "max": 1.247711181640625, "pos_frac": 0.40625, "sample": [-0.30517578125, -0.3930797576904297, 0.31974029541015625, -0.1620330810546875, -0.09853363037109375, -0.7093658447265625, -0.3825035095214844, -0.27629852294921875, -0.3654632568359375, -0.05693817138671875, -0.2551116943359375, 0.18732833862304688, -0.5862808227539062, 0.3349609375, 0.173858642578125, -0.13510894775390625, 0.04115104675292969, -0.5187530517578125, 0.73394775390625, -0.046642303466796875, -0.12277412414550781, -0.0308837890625, 0.29254150390625, 0.3818511962890625, -0.14603614807128906, -0.4970550537109375, -0.35129547119140625, 0.10540771484375, 0.0641021728515625, -0.09766387939453125, -0.02828216552734375, 0.29332733154296875, -0.24881744384765625, -0.05008697509765625, 0.14102745056152344, 0.3882293701171875, -0.7496452331542969, 0.06853103637695312, 0.35897064208984375, 0.31731605529785156, 0.24442481994628906, -0.30562782287597656, 0.5745086669921875, -0.361968994140625, -0.2177734375, -0.38576507568359375, -0.4172210693359375, -0.04769134521484375, 1.247711181640625, -0.1743927001953125, -0.7537994384765625, 0.5172386169433594, -0.036563873291015625, 0.0825347900390625, -0.11162185668945312, 0.19881820678710938, 1.1808624267578125, 0.08620452880859375, -0.2003936767578125, -0.127655029296875, 0.14905738830566406, -0.1385345458984375, 0.4312896728515625, -1.027374267578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000010.npy"} +{"epoch": 0.015117157974300832, "step": 11, "batch_size": 64, "mean": 0.022221386432647705, "std": 0.3169333040714264, "min": -0.9056472778320312, "p10": -0.3103645324707031, "median": -0.004576683044433594, "p90": 0.4049591064453125, "max": 1.0320587158203125, "pos_frac": 0.5, "sample": [0.20845794677734375, 0.10430526733398438, -0.2865028381347656, 0.4077606201171875, -0.4453849792480469, 0.17185592651367188, 0.016916275024414062, 0.010274887084960938, -0.115203857421875, -0.3668212890625, -0.13169097900390625, 0.4821434020996094, -0.13994598388671875, 0.2095489501953125, -0.19581985473632812, 0.3890495300292969, 0.6784477233886719, 1.0320587158203125, -0.3916778564453125, 0.15306854248046875, -0.10852432250976562, 0.0052490234375, -0.10103607177734375, -0.28882598876953125, -0.12545394897460938, 0.1724395751953125, -0.119659423828125, -0.0638275146484375, -0.016149520874023438, -0.13327789306640625, 0.10980415344238281, 0.12547683715820312, -0.014402389526367188, 0.5356597900390625, -0.028041839599609375, 0.031982421875, -0.2555274963378906, -0.1302032470703125, 0.11809539794921875, -0.6097335815429688, -0.263885498046875, 0.07085037231445312, -0.38332366943359375, 0.21094512939453125, 0.3984222412109375, -0.9056472778320312, 0.068756103515625, 0.16245079040527344, 0.17767333984375, 0.8022308349609375, -0.026531219482421875, 0.034149169921875, -0.20511627197265625, -0.12894821166992188, -0.3195953369140625, -0.08990478515625, -0.017862319946289062, 0.19133567810058594, -0.16764068603515625, -0.078887939453125, 0.08344650268554688, 0.54644775390625, 0.320831298828125, 0.047088623046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000011.npy"} +{"epoch": 0.016628873771730914, "step": 12, "batch_size": 64, "mean": 0.011939138174057007, "std": 0.29164984822273254, "min": -0.6508007049560547, "p10": -0.3679134368896484, "median": 0.0032062530517578125, "p90": 0.36173515319824223, "max": 0.835601806640625, "pos_frac": 0.5, "sample": [0.12910842895507812, -0.39501190185546875, 0.011745452880859375, 0.29802894592285156, -0.15013504028320312, -0.13299560546875, 0.3675270080566406, -0.2258758544921875, 0.1334686279296875, -0.071563720703125, -0.4534454345703125, -0.5585594177246094, -0.2105560302734375, 0.677490234375, 0.09377479553222656, -0.17704010009765625, 0.032470703125, -0.00821685791015625, -0.05089569091796875, 0.06563186645507812, -0.3316535949707031, 0.409759521484375, -0.296051025390625, 0.4525604248046875, 0.024770736694335938, -0.0981292724609375, 0.015058517456054688, -0.0929718017578125, 0.08528900146484375, 0.2979106903076172, -0.42066192626953125, -0.383453369140625, -0.14488983154296875, -0.1669158935546875, 0.09911727905273438, 0.12027740478515625, -0.38498687744140625, 0.27739715576171875, 0.2804603576660156, -0.2845172882080078, 0.11456298828125, 0.07435035705566406, -0.0196380615234375, 0.07681465148925781, 0.21244430541992188, 0.5084686279296875, 0.3482208251953125, -0.00533294677734375, 0.5078201293945312, 0.21851730346679688, -0.06513595581054688, 0.2452068328857422, 0.2759208679199219, -0.125213623046875, 0.835601806640625, -0.10860443115234375, -0.6508007049560547, 0.27475738525390625, -0.24457931518554688, -0.18406105041503906, -0.0668487548828125, -0.24291610717773438, 0.22700881958007812, -0.27577972412109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000012.npy"} +{"epoch": 0.018140589569160998, "step": 13, "batch_size": 64, "mean": 0.03117358684539795, "std": 0.30930477380752563, "min": -1.05718994140625, "p10": -0.29412384033203126, "median": 0.03803253173828125, "p90": 0.43209991455078134, "max": 0.642730712890625, "pos_frac": 0.578125, "sample": [-1.05718994140625, 0.36025238037109375, -0.1612396240234375, 0.45758056640625, 0.642730712890625, -0.8389663696289062, 0.34891510009765625, 0.0483245849609375, -0.2371826171875, -0.113037109375, -0.007373809814453125, 0.1553783416748047, 0.48712158203125, 0.416046142578125, 0.058498382568359375, 0.44751930236816406, 0.34326171875, 0.06509780883789062, -0.1593017578125, 0.08002471923828125, -0.3005828857421875, 0.08205413818359375, -0.12957763671875, -0.1607818603515625, 0.24068832397460938, -0.01314544677734375, 0.041412353515625, -0.09195518493652344, 0.21656036376953125, -0.3537139892578125, -0.07411384582519531, -0.279052734375, 0.24671173095703125, -0.27362823486328125, -0.25873565673828125, 0.5086574554443359, 0.02388763427734375, -0.1938934326171875, 0.32485008239746094, -0.2001190185546875, 0.0064334869384765625, -0.004058837890625, -0.31996917724609375, 2.86102294921875e-05, -0.4491729736328125, 0.18904876708984375, -0.07866668701171875, 0.0346527099609375, 0.08853912353515625, -0.30318450927734375, 0.29167938232421875, 0.40203094482421875, -0.2709827423095703, 0.2448406219482422, 0.2652130126953125, -0.1930999755859375, -0.082763671875, 0.12954330444335938, 0.5241241455078125, 0.4389801025390625, 0.09029388427734375, 0.233673095703125, 0.05214881896972656, 0.013795852661132812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000013.npy"} +{"epoch": 0.019652305366591082, "step": 14, "batch_size": 64, "mean": 0.03497576713562012, "std": 0.2729725241661072, "min": -0.532989501953125, "p10": -0.3234214782714844, "median": 0.012725830078125, "p90": 0.33532047271728527, "max": 0.794036865234375, "pos_frac": 0.546875, "sample": [-0.16667938232421875, -0.532989501953125, -0.167572021484375, 0.42926788330078125, 0.19231224060058594, 0.3484325408935547, -0.19769287109375, 0.0897216796875, -0.10731315612792969, -0.013011932373046875, 0.127685546875, -0.034912109375, -0.21465492248535156, -0.3354949951171875, -0.02645111083984375, -0.03395843505859375, -0.1336956024169922, 0.07704925537109375, -0.2317047119140625, 0.009334564208984375, -0.266021728515625, -0.3574981689453125, -0.022174835205078125, 0.0214080810546875, -0.22751617431640625, 0.00823974609375, -0.33013916015625, -0.11524200439453125, 0.2534332275390625, 0.6907196044921875, 0.10667037963867188, 0.6371002197265625, 0.058696746826171875, 0.794036865234375, -0.11749649047851562, 0.19012451171875, 0.2851142883300781, 0.1588287353515625, 0.00377655029296875, 0.10735321044921875, 0.15936279296875, 0.5510749816894531, 0.016117095947265625, 0.603515625, 0.11523628234863281, 0.019008636474609375, 0.30472564697265625, 0.27091217041015625, 0.24777603149414062, 0.1793994903564453, -0.032390594482421875, -0.30774688720703125, 0.08507347106933594, -0.10248565673828125, 0.12953948974609375, -0.19586944580078125, -0.420562744140625, -0.036792755126953125, -0.3961467742919922, -0.3330059051513672, 0.13326263427734375, 0.17053985595703125, -0.13974761962890625, 0.26056671142578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000014.npy"} +{"epoch": 0.021164021164021163, "step": 15, "batch_size": 64, "mean": -0.038463592529296875, "std": 0.3646548092365265, "min": -1.1634521484375, "p10": -0.5244338989257813, "median": 0.019306182861328125, "p90": 0.3524065017700196, "max": 0.7220001220703125, "pos_frac": 0.53125, "sample": [-0.18024444580078125, -0.3603057861328125, -0.11577796936035156, -0.025537490844726562, -0.09783363342285156, -0.52081298828125, -0.5141143798828125, -0.16335296630859375, -0.7314453125, -1.1634521484375, 0.31569671630859375, -0.13597869873046875, -0.63385009765625, -0.14391136169433594, 0.0774688720703125, 0.0969696044921875, 0.48424530029296875, -0.0020751953125, 0.39368247985839844, -0.49706459045410156, 0.04973602294921875, 0.18431854248046875, -0.004062652587890625, 0.13748550415039062, 0.0041656494140625, 0.01592254638671875, 0.05743408203125, -0.5259857177734375, -0.0669403076171875, -0.3448333740234375, -0.19409561157226562, -0.26088714599609375, -0.5970687866210938, -0.20672607421875, 0.5390052795410156, 0.0915679931640625, 0.31711578369140625, 0.044342041015625, 0.10509872436523438, 0.050018310546875, 0.10821533203125, -0.10176849365234375, -0.02901458740234375, 0.7220001220703125, 0.33611106872558594, -0.9283905029296875, 0.3593902587890625, 0.0320587158203125, 0.14995765686035156, 0.0226898193359375, -0.32479095458984375, 0.5367259979248047, 0.15587234497070312, 0.285797119140625, -0.1516246795654297, 0.135498046875, 0.134613037109375, 0.16777801513671875, 0.2623882293701172, -0.26911163330078125, 0.65802001953125, -0.6722259521484375, 0.21935272216796875, 0.2508716583251953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000015.npy"} +{"epoch": 0.022675736961451247, "step": 16, "batch_size": 64, "mean": 0.016503140330314636, "std": 0.24475596845149994, "min": -0.45635986328125, "p10": -0.27330169677734373, "median": 0.012271881103515625, "p90": 0.30383930206298837, "max": 0.72589111328125, "pos_frac": 0.53125, "sample": [-0.21613311767578125, 0.13186264038085938, -0.44295501708984375, -0.09219551086425781, 0.3818702697753906, 0.0310211181640625, -0.016462326049804688, -0.19184112548828125, 0.65289306640625, -0.1347980499267578, 0.07903671264648438, 0.14214324951171875, 0.01844024658203125, -0.03113555908203125, 0.21177291870117188, -0.2578887939453125, 0.31165504455566406, -0.03713798522949219, 0.1803131103515625, 0.0416259765625, 0.1071014404296875, -0.14934158325195312, -0.06591796875, 0.4275398254394531, 0.16754722595214844, -0.3018035888671875, -0.08208847045898438, -0.151092529296875, -0.05962944030761719, 0.21622467041015625, 0.33353424072265625, 0.22055435180664062, -0.4468536376953125, 0.0146636962890625, 0.4761962890625, -0.09452056884765625, 0.217864990234375, 0.0189666748046875, -0.3240394592285156, 0.0129547119140625, -0.43273162841796875, 0.09182929992675781, 0.1448516845703125, 0.2856025695800781, 0.72589111328125, -0.20475196838378906, 0.179962158203125, -0.10131072998046875, 0.1053924560546875, -0.21626853942871094, 0.09760665893554688, -0.07442283630371094, -0.0005178451538085938, 0.01158905029296875, -0.20116806030273438, 0.07664108276367188, -0.2799072265625, -0.45635986328125, -0.0837554931640625, 0.18361663818359375, -0.16097068786621094, 0.0055789947509765625, 0.284637451171875, -0.22478103637695312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000016.npy"} +{"epoch": 0.02418745275888133, "step": 17, "batch_size": 64, "mean": 0.03283247351646423, "std": 0.3127223551273346, "min": -0.871246337890625, "p10": -0.33483085632324217, "median": 0.023339271545410156, "p90": 0.387236785888672, "max": 0.896392822265625, "pos_frac": 0.59375, "sample": [0.01763153076171875, 0.085418701171875, -0.08115196228027344, 0.12957382202148438, 0.896392822265625, 0.12240409851074219, 0.39748382568359375, 0.09506034851074219, 0.16104507446289062, -0.6419296264648438, -0.181854248046875, -0.3565940856933594, -0.10286712646484375, 0.20259475708007812, -0.11071586608886719, 0.4930229187011719, 0.06976318359375, -0.060268402099609375, -0.0007266998291015625, -0.238372802734375, 0.3633270263671875, 0.10893440246582031, 0.008636474609375, 0.29615020751953125, 0.0282135009765625, -0.06970024108886719, -0.11171722412109375, -0.28404998779296875, 0.023233413696289062, 0.02344512939453125, -0.13840103149414062, -0.2434234619140625, -0.100860595703125, -0.12353134155273438, 0.53887939453125, 0.7333984375, -0.04449462890625, 0.7281417846679688, -0.5074005126953125, -0.871246337890625, 0.234405517578125, -0.13390350341796875, 0.14256858825683594, 0.05817413330078125, 0.16843414306640625, 0.5628814697265625, -0.5621185302734375, 0.27740478515625, 0.2274761199951172, -0.145904541015625, -0.054912567138671875, -0.53985595703125, -0.38126373291015625, -0.013208389282226562, 0.28704071044921875, 0.06256484985351562, 0.03808021545410156, 0.21653366088867188, 0.1541290283203125, 0.022441864013671875, 0.010501861572265625, 0.014404296875, 0.07726478576660156, 0.12469482421875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000017.npy"} +{"epoch": 0.025699168556311415, "step": 18, "batch_size": 64, "mean": 0.05475503206253052, "std": 0.23016396164894104, "min": -0.5016937255859375, "p10": -0.23275070190429686, "median": 0.04494762420654297, "p90": 0.3194284439086914, "max": 0.5848388671875, "pos_frac": 0.640625, "sample": [0.04001617431640625, 0.10540008544921875, 0.1851329803466797, -0.04723930358886719, 0.03225898742675781, 0.25550079345703125, 0.00359344482421875, 0.257843017578125, 0.5848388671875, 0.1190948486328125, 0.16579627990722656, 0.369049072265625, -0.22955703735351562, -0.230224609375, -0.11217498779296875, 0.48388671875, 0.019527435302734375, -0.46958160400390625, 0.2730903625488281, 0.0055999755859375, 0.3173789978027344, 0.2248077392578125, -0.05698394775390625, 0.17437744140625, -0.2033557891845703, 0.08414649963378906, -0.2342376708984375, -0.006378173828125, 0.3662376403808594, 0.5411300659179688, -0.04950141906738281, 0.2328948974609375, -0.5016937255859375, 0.0464324951171875, -0.2040691375732422, -0.01422882080078125, -0.2967185974121094, 0.32030677795410156, 0.07512664794921875, 0.2554473876953125, 0.17772674560546875, 0.26575469970703125, -0.047821044921875, -0.104949951171875, 0.014783859252929688, 0.431396484375, -0.3511466979980469, 0.1338176727294922, -0.23383331298828125, -0.32831573486328125, -0.055576324462890625, 0.018148422241210938, -0.12343025207519531, 0.0272369384765625, 0.1943511962890625, 0.04346275329589844, 0.11938095092773438, 0.11771011352539062, 0.273956298828125, -0.1656951904296875, -0.19725799560546875, 0.051116943359375, 0.20852279663085938, 0.15201187133789062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000018.npy"} +{"epoch": 0.027210884353741496, "step": 19, "batch_size": 64, "mean": 0.028590813279151917, "std": 0.32338249683380127, "min": -1.048583984375, "p10": -0.3340309143066406, "median": 0.07567214965820312, "p90": 0.3595436096191407, "max": 0.718292236328125, "pos_frac": 0.5625, "sample": [0.5651397705078125, -0.28166961669921875, -0.2801799774169922, 0.01806640625, 0.18434906005859375, 0.4688987731933594, -0.5843582153320312, -0.3486175537109375, -0.6936492919921875, 0.171661376953125, 0.2203216552734375, -0.122283935546875, 0.570831298828125, 0.2188854217529297, 0.14940643310546875, -0.01605987548828125, -0.019256591796875, 0.01955413818359375, -0.04453277587890625, 0.09373283386230469, -0.46346282958984375, -0.1872100830078125, 0.235076904296875, -0.1744842529296875, 0.2263641357421875, 0.10347747802734375, 0.08173370361328125, 0.20325469970703125, -0.40334320068359375, -0.2807426452636719, 0.718292236328125, -0.12633895874023438, -0.2571868896484375, 0.19190597534179688, -1.048583984375, -0.21142578125, -0.29999542236328125, -0.38788414001464844, 0.29230499267578125, 0.187469482421875, 0.09754753112792969, -0.21129417419433594, 0.3030242919921875, 0.5635452270507812, 0.27082061767578125, 0.1279430389404297, -0.13877105712890625, 0.2812652587890625, -0.1714191436767578, 0.2180938720703125, 0.662261962890625, 0.2166728973388672, 0.24234771728515625, -0.0699625015258789, 0.1877899169921875, -0.053546905517578125, 0.069610595703125, -0.2959136962890625, 0.24561309814453125, 0.3728485107421875, -0.14397048950195312, -0.02974700927734375, 0.0670928955078125, 0.32849884033203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000019.npy"} +{"epoch": 0.02872260015117158, "step": 20, "batch_size": 64, "mean": -0.0170963853597641, "std": 0.26566895842552185, "min": -0.8018112182617188, "p10": -0.3248939514160156, "median": -0.03464508056640625, "p90": 0.3202270507812501, "max": 0.6984024047851562, "pos_frac": 0.453125, "sample": [0.117034912109375, -0.10189437866210938, -0.19876861572265625, -0.1939544677734375, -0.17608261108398438, -0.44120025634765625, -0.190338134765625, 0.37646484375, 0.15550994873046875, 0.12622451782226562, 0.2672309875488281, 0.21655654907226562, -0.25803375244140625, 0.09928321838378906, -0.1475372314453125, 0.09812164306640625, -0.09302520751953125, -0.10136985778808594, 0.359771728515625, -0.8018112182617188, -0.01198577880859375, -0.10717391967773438, -0.27576446533203125, 0.36771392822265625, 0.08922958374023438, -0.34249114990234375, 0.18224334716796875, -0.18227577209472656, 0.5702590942382812, 0.1062612533569336, -0.337158203125, -0.06844329833984375, -0.11011505126953125, -0.1539134979248047, -0.19745635986328125, 0.20792007446289062, -0.05730438232421875, 0.020355224609375, -0.11952972412109375, 0.22411346435546875, -0.12994003295898438, 0.08442306518554688, 0.021450042724609375, -0.302490234375, 0.29868316650390625, -0.173248291015625, 0.012493133544921875, -0.654998779296875, 0.27742767333984375, 0.1746826171875, -0.33449554443359375, 0.6984024047851562, 0.21448898315429688, 0.32946014404296875, -0.0116424560546875, -0.07702255249023438, 0.332672119140625, 0.0770263671875, -0.21659088134765625, -0.06482696533203125, -0.008340835571289062, -0.4310722351074219, -0.17309951782226562, 0.04572296142578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000020.npy"} +{"epoch": 0.030234315948601664, "step": 21, "batch_size": 64, "mean": -0.04383578896522522, "std": 0.318909615278244, "min": -1.13543701171875, "p10": -0.3735179901123047, "median": -0.06836318969726562, "p90": 0.32851181030273446, "max": 0.9480323791503906, "pos_frac": 0.4375, "sample": [-0.08039665222167969, 0.14558792114257812, -0.11584281921386719, 0.3815765380859375, 0.9480323791503906, 0.0038299560546875, 0.0226593017578125, 0.0794830322265625, -0.07463836669921875, -0.2254619598388672, 0.02181243896484375, -0.0620880126953125, -0.4070281982421875, 0.0740814208984375, -0.0370025634765625, 0.028148651123046875, -0.4672698974609375, 0.9081039428710938, -0.20841598510742188, -0.16376495361328125, 0.023761749267578125, 0.1558055877685547, -0.06016349792480469, -0.5052566528320312, -0.275543212890625, -0.468597412109375, -0.4208354949951172, 0.336090087890625, 0.0291290283203125, -0.2063922882080078, -1.13543701171875, 0.03626441955566406, 0.4825439453125, -0.21037864685058594, -0.3575897216796875, 0.31082916259765625, 0.003021240234375, 0.14499664306640625, -0.31395721435546875, -0.22105026245117188, -0.0413665771484375, -0.11751556396484375, 0.138031005859375, 0.19320297241210938, -0.22539520263671875, -0.2268962860107422, 0.17032623291015625, -0.25870513916015625, -0.182891845703125, -0.16051864624023438, 0.23174285888671875, 0.412750244140625, 0.061229705810546875, 0.26854705810546875, -0.08825302124023438, -0.16754150390625, -0.34427642822265625, 0.0410003662109375, -0.08548164367675781, 0.4773101806640625, -0.1871185302734375, -0.317535400390625, -0.13443756103515625, -0.3803443908691406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000021.npy"} +{"epoch": 0.031746031746031744, "step": 22, "batch_size": 64, "mean": -0.028929293155670166, "std": 0.30786848068237305, "min": -1.0690460205078125, "p10": -0.41842956542968746, "median": -0.00148773193359375, "p90": 0.3234346389770508, "max": 0.5515613555908203, "pos_frac": 0.484375, "sample": [-0.5232696533203125, -0.38909912109375, -0.4360847473144531, 0.09600830078125, 0.16941261291503906, 0.12505340576171875, 0.13837242126464844, 0.0637664794921875, 0.06906509399414062, 0.16715049743652344, 0.40462493896484375, 0.04431915283203125, -0.008905410766601562, 0.0035858154296875, 0.3176841735839844, 0.0657958984375, -0.09278106689453125, -0.09083938598632812, -0.18238067626953125, -0.07537078857421875, -0.071441650390625, 0.451202392578125, -0.0398712158203125, 0.0073070526123046875, 0.0599517822265625, -1.0690460205078125, -0.001434326171875, -0.2526531219482422, -0.08718681335449219, 0.020969390869140625, -0.7560882568359375, 0.1643829345703125, -0.238372802734375, -0.01214599609375, -0.10495948791503906, -0.20927047729492188, -0.12266159057617188, 0.00226593017578125, -0.8785324096679688, 0.16832733154296875, 0.5515613555908203, 0.4022369384765625, -0.35878753662109375, 0.3113250732421875, -0.17209243774414062, 0.2904205322265625, -0.004955291748046875, 0.074371337890625, 0.02240753173828125, -0.430999755859375, -0.038562774658203125, -0.0015411376953125, -0.0138092041015625, 0.5188522338867188, 0.4158935546875, 0.13453292846679688, -0.09185218811035156, 0.15651512145996094, 0.26641845703125, -0.22319412231445312, -0.0977630615234375, -0.5881500244140625, 0.3258991241455078, -0.197052001953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000022.npy"} +{"epoch": 0.03325774754346183, "step": 23, "batch_size": 64, "mean": -0.02451947331428528, "std": 0.2753342092037201, "min": -0.8352203369140625, "p10": -0.34796466827392575, "median": 0.038120269775390625, "p90": 0.2817146301269532, "max": 0.6925010681152344, "pos_frac": 0.5625, "sample": [-0.4350433349609375, 0.231658935546875, 0.4305419921875, 0.0260009765625, 0.00063323974609375, 0.05040931701660156, -0.10211944580078125, 0.3065338134765625, 0.0302276611328125, 0.125244140625, 0.0975494384765625, -0.021663665771484375, 0.06581878662109375, 0.0604705810546875, -0.11151885986328125, -0.2689208984375, -0.674224853515625, 0.35040283203125, -0.25567626953125, 0.21685028076171875, 0.06448745727539062, 0.08034133911132812, 0.20665740966796875, -0.4725799560546875, 0.07228469848632812, 0.2095489501953125, 0.06018829345703125, -0.08239555358886719, 0.2650146484375, 0.063568115234375, -0.8352203369140625, 0.06660842895507812, -0.11444854736328125, 0.17729568481445312, 0.6925010681152344, -0.2521858215332031, 0.04168701171875, 0.436309814453125, -0.038089752197265625, -0.7291030883789062, -0.1419525146484375, -0.37884521484375, -0.16912460327148438, -0.0806884765625, 0.28887176513671875, 0.13800048828125, -0.2680168151855469, 0.3471221923828125, 0.03455352783203125, -0.1790008544921875, -0.3617095947265625, -0.31589317321777344, -0.11602973937988281, -0.20650291442871094, -0.2915992736816406, 0.2114391326904297, 0.14994049072265625, -0.15547943115234375, -0.25274658203125, -0.186767578125, 0.08233642578125, 0.13383102416992188, 0.06305122375488281, 0.050319671630859375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000023.npy"} +{"epoch": 0.03476946334089191, "step": 24, "batch_size": 64, "mean": -0.027493715286254883, "std": 0.21561187505722046, "min": -0.5091667175292969, "p10": -0.29457836151123046, "median": -0.024881362915039062, "p90": 0.21575660705566407, "max": 0.6712265014648438, "pos_frac": 0.46875, "sample": [-0.023151397705078125, -0.1772918701171875, -0.05010223388671875, 0.06817054748535156, 0.00586700439453125, -0.19039535522460938, 0.260345458984375, 0.16139984130859375, 0.007694244384765625, 0.155731201171875, -0.029500961303710938, 0.0044231414794921875, -0.25806236267089844, 0.0159454345703125, 0.6712265014648438, 0.1292877197265625, -0.09463882446289062, -0.20375633239746094, 0.16588592529296875, -0.0016841888427734375, 0.26013946533203125, 0.09674835205078125, -0.14730072021484375, -0.3674812316894531, 0.46379852294921875, -0.2897148132324219, -0.026611328125, -0.13101577758789062, -0.135894775390625, 0.0261688232421875, 0.21759796142578125, -0.29653358459472656, -0.5091667175292969, -0.14693641662597656, 0.15006256103515625, -0.46173095703125, -0.42852020263671875, -0.17936134338378906, -0.211944580078125, -0.14739990234375, 0.1917724609375, 0.21146011352539062, 0.1272296905517578, 0.0508575439453125, 0.173675537109375, 0.23223876953125, 0.02576446533203125, -0.1003875732421875, -0.11974525451660156, 0.1359710693359375, 0.09329795837402344, -0.02972412109375, -0.29001617431640625, -0.08205413818359375, -0.32183837890625, 0.23775863647460938, -0.05251502990722656, -0.364013671875, -0.24509429931640625, 0.012401580810546875, 0.05506134033203125, 0.105865478515625, -0.111480712890625, -0.04837989807128906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000024.npy"} +{"epoch": 0.036281179138321996, "step": 25, "batch_size": 64, "mean": 0.021612167358398438, "std": 0.3683222532272339, "min": -0.7617073059082031, "p10": -0.45455245971679686, "median": 0.0300750732421875, "p90": 0.38952407836914066, "max": 1.25018310546875, "pos_frac": 0.53125, "sample": [0.196807861328125, -0.16020965576171875, -0.010587692260742188, -0.20919036865234375, 0.15891456604003906, 0.039333343505859375, 0.8001708984375, -0.06191253662109375, -0.480499267578125, 0.06072235107421875, 0.2861328125, -0.08220100402832031, -0.601898193359375, -0.2547760009765625, -0.22625732421875, 0.035427093505859375, -0.48993682861328125, 0.15845870971679688, 0.11175155639648438, -0.20245361328125, -0.2852020263671875, 0.09668540954589844, 0.02161407470703125, 1.25018310546875, -0.13499069213867188, -0.10732460021972656, 0.024723052978515625, 1.01129150390625, -0.7617073059082031, 0.23028945922851562, 0.10218048095703125, 0.14318466186523438, 0.07883453369140625, 0.380706787109375, 0.39330291748046875, 0.09912681579589844, 0.712493896484375, -0.2306804656982422, -0.1925487518310547, 0.4965057373046875, -0.526824951171875, -0.12365341186523438, -0.057826995849609375, 0.0660400390625, -0.1062164306640625, 0.22821426391601562, -0.0992279052734375, -0.0037841796875, -0.42169189453125, -0.734161376953125, -0.027801513671875, -0.026065826416015625, 0.21038055419921875, -0.282501220703125, 0.14078521728515625, 0.03883171081542969, 0.1324615478515625, 0.18431854248046875, 0.0954437255859375, -0.21783065795898438, -0.46863555908203125, 0.057586669921875, 0.09009742736816406, 0.838775634765625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000025.npy"} +{"epoch": 0.03779289493575208, "step": 26, "batch_size": 64, "mean": -0.040790557861328125, "std": 0.3422093689441681, "min": -1.0486297607421875, "p10": -0.48813400268554685, "median": -0.05940437316894531, "p90": 0.32676658630371097, "max": 1.11749267578125, "pos_frac": 0.4375, "sample": [0.23264694213867188, -0.5821990966796875, -0.5074539184570312, 0.04364776611328125, 0.17321014404296875, 0.31449127197265625, -0.1542816162109375, -0.3583183288574219, -0.058666229248046875, -0.02196502685546875, 0.3320274353027344, 1.11749267578125, -0.161224365234375, -0.5861663818359375, -0.7813873291015625, -0.1146240234375, -0.12418365478515625, 0.1873035430908203, 0.3914909362792969, -0.01819610595703125, -0.1414947509765625, -0.19576454162597656, -0.09326171875, -0.29610443115234375, -0.09999465942382812, -0.164794921875, 0.2791709899902344, -0.27600860595703125, 0.15234756469726562, -0.1744384765625, 0.07364845275878906, 0.4674835205078125, 0.11183929443359375, 0.3015403747558594, 0.12985992431640625, -0.0575408935546875, 0.16046905517578125, 0.06940269470214844, -0.06014251708984375, -1.0486297607421875, 0.14332008361816406, -0.5396194458007812, -0.18415260314941406, -0.1117706298828125, 0.6890106201171875, -0.44305419921875, -0.3876495361328125, 0.39715003967285156, -0.096527099609375, 0.011205673217773438, 0.13311004638671875, -0.5878219604492188, -0.10908126831054688, 0.2979583740234375, -0.11341476440429688, 0.0681610107421875, -0.34307098388671875, -0.17620086669921875, 0.35041046142578125, -0.15147972106933594, -0.21515274047851562, 0.05941200256347656, 0.232421875, 0.005008697509765625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000026.npy"} +{"epoch": 0.039304610733182165, "step": 27, "batch_size": 64, "mean": -0.007200300693511963, "std": 0.3243751525878906, "min": -0.6388778686523438, "p10": -0.40545196533203126, "median": -0.020030975341796875, "p90": 0.36526947021484374, "max": 0.995391845703125, "pos_frac": 0.453125, "sample": [-0.092529296875, -0.018560409545898438, -0.40079498291015625, -0.021501541137695312, 0.08286666870117188, 0.44437408447265625, 0.059375762939453125, -0.6388778686523438, 0.01837921142578125, -0.4122161865234375, -0.170745849609375, -0.3697509765625, -0.034832000732421875, 0.22540283203125, -0.00542449951171875, -0.029018402099609375, 0.3136329650878906, -0.25479888916015625, -0.2642669677734375, -0.40744781494140625, 0.45426177978515625, -0.04779052734375, 0.995391845703125, 0.2998199462890625, -0.0003662109375, -0.1141510009765625, 0.004302978515625, -0.07546615600585938, 0.331634521484375, -0.1370868682861328, -0.41510009765625, 0.07876396179199219, 0.04343605041503906, -0.30625152587890625, 0.28046607971191406, -0.3212127685546875, -0.11642074584960938, 0.9384841918945312, 0.34459686279296875, 0.0057373046875, -0.236541748046875, -0.49974822998046875, -0.11194801330566406, 0.04309844970703125, 0.11629867553710938, -0.07813262939453125, 0.36533355712890625, 0.36511993408203125, -0.49147796630859375, -0.12365913391113281, -0.361968994140625, -0.1219482421875, 0.09326744079589844, 0.6258087158203125, 0.08030891418457031, -0.1906871795654297, 0.206695556640625, -0.10059928894042969, 0.0072784423828125, -0.39916229248046875, 0.02105712890625, -0.6048431396484375, 0.3006629943847656, 0.36865234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000027.npy"} +{"epoch": 0.04081632653061224, "step": 28, "batch_size": 64, "mean": -0.008278310298919678, "std": 0.331570029258728, "min": -0.8742218017578125, "p10": -0.4453151702880859, "median": 0.04610252380371094, "p90": 0.3825851440429688, "max": 0.807373046875, "pos_frac": 0.578125, "sample": [0.31024932861328125, -0.8742218017578125, 0.555877685546875, -0.081298828125, -0.5041351318359375, -0.10566329956054688, 0.1324615478515625, 0.045543670654296875, 0.12361335754394531, 0.807373046875, -0.3312339782714844, 0.12604522705078125, -0.6854019165039062, 0.01556396484375, -0.823089599609375, -0.2736968994140625, -0.33473968505859375, -0.11864852905273438, 0.17010879516601562, 0.3163299560546875, 0.1519622802734375, 0.19899368286132812, 0.0072765350341796875, -0.12580108642578125, 0.1551227569580078, 0.06534194946289062, -0.23567581176757812, 0.05859375, -0.45949554443359375, -0.12741851806640625, -0.486602783203125, -0.4122276306152344, 0.3742218017578125, 0.044677734375, 0.4841461181640625, 0.30957794189453125, -0.04267120361328125, 0.026638031005859375, -0.00074005126953125, -0.2428436279296875, 0.10239601135253906, 0.38616943359375, -0.34192657470703125, 0.5878772735595703, 0.046661376953125, 0.051361083984375, 0.13228797912597656, -0.35594940185546875, 0.07091331481933594, -0.002422332763671875, -0.5185089111328125, 0.5577011108398438, 0.0850830078125, -0.02198028564453125, 0.44073486328125, 0.10052490234375, 0.15845870971679688, -0.387115478515625, -0.233551025390625, 0.12795257568359375, 0.0867919921875, 0.28508949279785156, 0.13221359252929688, -0.23468780517578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000028.npy"} +{"epoch": 0.042328042328042326, "step": 29, "batch_size": 64, "mean": 0.05616277456283569, "std": 0.36369413137435913, "min": -0.752349853515625, "p10": -0.4436737060546874, "median": 0.04289531707763672, "p90": 0.63367919921875, "max": 0.81951904296875, "pos_frac": 0.546875, "sample": [0.428009033203125, -0.51995849609375, 0.6904258728027344, -0.0535736083984375, 0.6801300048828125, 0.08231353759765625, 0.1206207275390625, 0.04230308532714844, -0.527099609375, -0.5347747802734375, -0.00603485107421875, 0.16583633422851562, 0.11582183837890625, 0.13589096069335938, 0.7400588989257812, 0.6163711547851562, -0.752349853515625, -0.5205421447753906, -0.0124664306640625, -0.02970123291015625, 0.013065338134765625, -0.47479248046875, 0.5225791931152344, -0.09376335144042969, -0.33603668212890625, 0.3653106689453125, -0.17216110229492188, -0.330413818359375, 0.15765380859375, 0.2282428741455078, 0.13713455200195312, -0.371063232421875, 0.189117431640625, 0.13068580627441406, 0.699066162109375, 0.19603347778320312, 0.1836090087890625, 0.32561492919921875, 0.10437774658203125, 0.81951904296875, -0.2763519287109375, 0.14493179321289062, -0.1191253662109375, 0.043487548828125, -0.06025505065917969, -0.0157623291015625, -0.35564422607421875, -0.0438385009765625, -0.31801605224609375, -0.17359352111816406, 0.636627197265625, 0.06304168701171875, 0.7070159912109375, 0.176361083984375, -0.085479736328125, -0.12303924560546875, -0.5370559692382812, -0.00492095947265625, -0.330078125, 0.626800537109375, 0.3842620849609375, -0.1194000244140625, 0.197509765625, 0.021881103515625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000029.npy"} +{"epoch": 0.04383975812547241, "step": 30, "batch_size": 64, "mean": 0.09423834085464478, "std": 0.3643344044685364, "min": -0.4705924987792969, "p10": -0.2707786560058594, "median": 0.03786754608154297, "p90": 0.43837966918945315, "max": 1.4501419067382812, "pos_frac": 0.53125, "sample": [-0.22343063354492188, 0.04985237121582031, -0.27532196044921875, 0.068939208984375, -0.11560630798339844, -0.009429931640625, -0.0414581298828125, 0.0019378662109375, -0.14933013916015625, -0.1132965087890625, -0.11151885986328125, 0.23944091796875, -0.214324951171875, 0.31000518798828125, 0.18749237060546875, 0.4434814453125, -0.13544082641601562, 1.4501419067382812, -0.06805419921875, 0.4264678955078125, 0.327911376953125, 0.3002510070800781, -0.17510032653808594, 0.14135360717773438, 0.6457138061523438, -0.2601776123046875, 0.025882720947265625, -0.42789459228515625, 0.13671875, 0.3733940124511719, 0.3420867919921875, -0.03909111022949219, 0.05419921875, 0.646392822265625, 1.1636962890625, -0.16590118408203125, 0.19367218017578125, 0.2281494140625, 0.7019805908203125, -0.24826622009277344, 0.09769439697265625, 0.1459503173828125, 1.072418212890625, -0.42652130126953125, -0.06626510620117188, 0.26227569580078125, -0.30975341796875, -0.17975997924804688, 0.06013679504394531, -0.37603759765625, 0.42647552490234375, -0.34824371337890625, 0.34525108337402344, -0.05413818359375, -0.0181121826171875, 0.09865951538085938, -0.12265777587890625, 0.053131103515625, -0.16015625, -0.0027561187744140625, 0.11019134521484375, -0.4705924987792969, -0.04685211181640625, 0.2553977966308594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000030.npy"} +{"epoch": 0.045351473922902494, "step": 31, "batch_size": 64, "mean": 0.05173113942146301, "std": 0.3067670166492462, "min": -0.889923095703125, "p10": -0.23914566040039062, "median": 0.033789634704589844, "p90": 0.4080062866210938, "max": 0.9306640625, "pos_frac": 0.53125, "sample": [0.3834381103515625, 0.0887908935546875, 0.0541534423828125, 0.3763275146484375, -0.2401275634765625, -0.67901611328125, 0.040920257568359375, 0.1755523681640625, 0.16359710693359375, 0.36060333251953125, 0.16250991821289062, 0.1963653564453125, 0.09416770935058594, 0.48919677734375, 0.22780609130859375, 0.405731201171875, 0.39121055603027344, -0.01178741455078125, 0.337646484375, -0.21849441528320312, -0.05108642578125, 0.026659011840820312, -0.010986328125, 0.05652809143066406, 0.27864837646484375, -0.032318115234375, 0.1392059326171875, 0.6286163330078125, -0.17304229736328125, -0.07390594482421875, -0.1414642333984375, -0.080810546875, -0.14344024658203125, -0.14916038513183594, -0.02567291259765625, 0.07364654541015625, -0.2034149169921875, -0.402740478515625, -0.26744651794433594, -0.05218505859375, 0.4089813232421875, 0.00011444091796875, -0.4046974182128906, 0.44976806640625, 0.9306640625, -0.15503311157226562, -0.23685455322265625, -0.12861251831054688, 0.08625030517578125, 0.28127288818359375, -0.889923095703125, -0.2335662841796875, 0.512054443359375, -0.022979736328125, 0.217071533203125, 0.06240081787109375, 0.28154754638671875, -0.11993408203125, -0.11712455749511719, -0.02182769775390625, 0.6238861083984375, 0.06666946411132812, -0.016754150390625, -0.4568023681640625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000031.npy"} +{"epoch": 0.04686318972033258, "step": 32, "batch_size": 64, "mean": -0.038973838090896606, "std": 0.303048312664032, "min": -0.7988357543945312, "p10": -0.36792297363281246, "median": -0.031322479248046875, "p90": 0.34068374633789067, "max": 0.6704483032226562, "pos_frac": 0.421875, "sample": [-0.11432647705078125, -0.3440399169921875, -0.3781585693359375, -0.0931854248046875, -0.0356292724609375, -0.02701568603515625, 0.13299560546875, -0.07796478271484375, 0.37750244140625, -0.062591552734375, 0.31827545166015625, 0.44113922119140625, -0.17543411254882812, -0.0089263916015625, 0.1511096954345703, -0.1797332763671875, 0.4048309326171875, 0.21364593505859375, -0.5226821899414062, 0.054851531982421875, -0.11501693725585938, 0.19131088256835938, 0.05598640441894531, 0.293182373046875, 0.46126556396484375, 0.0068645477294921875, 0.138397216796875, 0.3184051513671875, -0.2080078125, 0.19029998779296875, 0.11054039001464844, 0.3315773010253906, -0.021253585815429688, -0.2370758056640625, -0.2031097412109375, 0.6704483032226562, 0.043487548828125, -0.6253089904785156, 0.03485107421875, -0.0026092529296875, 0.26844024658203125, 0.3346061706542969, -0.24074935913085938, -0.17354202270507812, -0.2298870086669922, -0.27754974365234375, -0.13753509521484375, -0.25531005859375, -0.7988357543945312, -0.4716968536376953, -0.7027587890625, 0.3432884216308594, -0.34151649475097656, -0.02005767822265625, -0.21074295043945312, -0.1602191925048828, -0.34278106689453125, 0.371185302734375, -0.13751220703125, 0.2088623046875, -0.3069496154785156, 0.12416839599609375, -0.193389892578125, -0.652740478515625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000032.npy"} +{"epoch": 0.04837490551776266, "step": 33, "batch_size": 64, "mean": -0.001662135124206543, "std": 0.3591341972351074, "min": -1.2842178344726562, "p10": -0.42285766601562497, "median": -0.004364013671875, "p90": 0.38634567260742203, "max": 0.9859237670898438, "pos_frac": 0.5, "sample": [-0.099822998046875, -0.4541015625, 0.078094482421875, -0.447052001953125, -0.18473052978515625, 0.1811065673828125, -0.413330078125, 0.15154647827148438, 0.6184234619140625, 0.30321502685546875, -0.42694091796875, 0.11383628845214844, -0.20055866241455078, -0.07058143615722656, 0.7941741943359375, -0.8040580749511719, -0.67205810546875, 0.20125198364257812, -0.17542266845703125, 0.402191162109375, -0.04915618896484375, 0.9859237670898438, 0.2952728271484375, 0.144500732421875, -1.2842178344726562, 0.001861572265625, 0.12494659423828125, 0.34937286376953125, -0.1364593505859375, 0.2335205078125, 0.21865081787109375, -0.12281417846679688, 0.2500190734863281, -0.09030723571777344, -0.11877250671386719, -0.0493621826171875, 0.0845947265625, -0.14521026611328125, -0.10614395141601562, -0.2921772003173828, 0.2287139892578125, -0.3739128112792969, -0.2026348114013672, -0.01448822021484375, -0.44659423828125, -0.010589599609375, 0.42059326171875, -0.08214378356933594, 0.02860736846923828, 0.112945556640625, -0.3392066955566406, 0.5219345092773438, 0.18277740478515625, 0.1953582763671875, -0.16070175170898438, -0.30295562744140625, 0.258636474609375, 0.41930389404296875, 0.0576171875, -0.2920684814453125, 0.010944366455078125, 0.3217277526855469, 0.1883258819580078, -0.017791748046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000033.npy"} +{"epoch": 0.049886621315192746, "step": 34, "batch_size": 64, "mean": 0.08535227179527283, "std": 0.33712127804756165, "min": -0.9809646606445312, "p10": -0.35966625213623044, "median": 0.09466934204101562, "p90": 0.49484996795654307, "max": 0.8438796997070312, "pos_frac": 0.59375, "sample": [-0.39830780029296875, 0.42067718505859375, 0.1928558349609375, -0.22777366638183594, 0.37720489501953125, -0.03090667724609375, -0.019872665405273438, -0.00337982177734375, 0.06312179565429688, 0.06196784973144531, 0.30118560791015625, -0.3266754150390625, -0.10723876953125, -0.105377197265625, -0.498870849609375, -0.074249267578125, 0.8438796997070312, 0.16141510009765625, 0.13447952270507812, 0.17833518981933594, -0.1045684814453125, 0.15283584594726562, 0.21967506408691406, 0.195159912109375, -0.11008453369140625, 0.238800048828125, 0.5229263305664062, 0.5946998596191406, 0.25914764404296875, -0.3344440460205078, -0.37047576904296875, 0.3021202087402344, -0.12475204467773438, 0.07184028625488281, -0.17051315307617188, 0.7625732421875, 0.42122650146484375, 0.3088111877441406, 0.14931488037109375, -0.04990386962890625, -0.05315399169921875, 0.2754783630371094, 0.08559226989746094, 0.27626800537109375, 0.4702606201171875, 0.459808349609375, -0.0816192626953125, 0.31679534912109375, 0.3916587829589844, 0.5053882598876953, 0.10374641418457031, -0.9809646606445312, 0.6385116577148438, 0.58270263671875, 0.05941009521484375, -0.4698905944824219, 0.13094329833984375, -0.38227272033691406, -0.1362628936767578, 0.06902885437011719, 0.34018707275390625, -0.24794769287109375, -0.2540130615234375, -0.5139694213867188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000034.npy"} +{"epoch": 0.05139833711262283, "step": 35, "batch_size": 64, "mean": -0.01030150055885315, "std": 0.3363676965236664, "min": -0.75408935546875, "p10": -0.4360786437988281, "median": -0.00035953521728515625, "p90": 0.37086505889892607, "max": 0.999176025390625, "pos_frac": 0.5, "sample": [0.1170196533203125, -0.5468368530273438, 0.24135971069335938, 0.15140533447265625, -0.41306304931640625, 0.8087310791015625, 0.19007110595703125, -0.06020355224609375, 0.1606597900390625, -0.1886310577392578, -0.5632171630859375, 0.29638671875, -0.00450897216796875, 0.008148193359375, -0.36954498291015625, 0.1464099884033203, 0.999176025390625, 0.0617218017578125, 0.56890869140625, -0.07733917236328125, -0.15921592712402344, 0.0453948974609375, 0.4027843475341797, -0.188751220703125, -0.15717315673828125, 0.10737991333007812, -0.4736175537109375, -0.09328460693359375, -0.013141632080078125, -0.03527069091796875, -0.75408935546875, 0.6747894287109375, 0.009368896484375, -0.4380035400390625, -0.32431793212890625, -0.43158721923828125, 0.007991790771484375, -0.591094970703125, 0.42467308044433594, -0.5918235778808594, 0.08128738403320312, 0.08387374877929688, 0.13106536865234375, 0.21323394775390625, -0.15835952758789062, 0.09902191162109375, 0.2618751525878906, -0.40860748291015625, -0.062267303466796875, -0.24083709716796875, -0.07364273071289062, 0.17461776733398438, -0.3863716125488281, -0.070587158203125, -0.1700592041015625, 0.18431854248046875, 0.1138916015625, 0.053497314453125, -0.048725128173828125, -0.1129608154296875, 0.0037899017333984375, 0.6507110595703125, -0.08755302429199219, 0.16182708740234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000035.npy"} +{"epoch": 0.05291005291005291, "step": 36, "batch_size": 64, "mean": 0.0723852813243866, "std": 0.3569437861442566, "min": -0.755706787109375, "p10": -0.35621891021728513, "median": 0.03702545166015625, "p90": 0.545961380004883, "max": 0.98712158203125, "pos_frac": 0.578125, "sample": [0.0509796142578125, 0.736480712890625, 0.221466064453125, -0.0671844482421875, -0.11281776428222656, -0.2778892517089844, 0.5828018188476562, 0.12327194213867188, 0.27863311767578125, -0.755706787109375, 0.5707550048828125, 0.19496536254882812, 0.25351715087890625, -0.3455657958984375, 0.11279869079589844, -0.19687652587890625, 0.3788909912109375, -0.12345504760742188, -0.114959716796875, 0.6069679260253906, 0.3562183380126953, 0.4101676940917969, 0.38570404052734375, 0.12595558166503906, -0.24976348876953125, -0.3012542724609375, -0.2568778991699219, 0.6028823852539062, 0.4881095886230469, 0.98712158203125, -0.275360107421875, 0.03174591064453125, 0.0377044677734375, 0.371185302734375, 0.0219573974609375, -0.08492660522460938, -0.20471954345703125, 0.4142913818359375, -0.4382286071777344, -0.03440093994140625, 0.10442733764648438, 0.2266082763671875, 0.8226394653320312, -0.36078453063964844, 0.0054378509521484375, 0.18494796752929688, 0.43300628662109375, 0.18808364868164062, 0.24396514892578125, 0.4500160217285156, -0.013179779052734375, -0.0867767333984375, -0.04571723937988281, 0.17852783203125, -0.18084716796875, -0.4994659423828125, -0.49167633056640625, -0.1505889892578125, -0.6063385009765625, 0.036346435546875, -0.3971977233886719, 0.4000396728515625, -0.33004188537597656, 0.016641616821289062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000036.npy"} +{"epoch": 0.05442176870748299, "step": 37, "batch_size": 64, "mean": 0.09815576672554016, "std": 0.3523368537425995, "min": -0.8534774780273438, "p10": -0.273878288269043, "median": 0.05164813995361328, "p90": 0.6215309143066408, "max": 0.832672119140625, "pos_frac": 0.625, "sample": [0.3291473388671875, 0.6379013061523438, 0.0193634033203125, -0.33354949951171875, 0.3471221923828125, -0.08755111694335938, -0.1427631378173828, 0.05955314636230469, 0.18613243103027344, 0.043743133544921875, 0.716888427734375, 0.16298294067382812, 0.39176177978515625, 0.537078857421875, -0.044261932373046875, -0.8534774780273438, 0.25354766845703125, 0.7606887817382812, 0.1571044921875, 0.418548583984375, -0.2018280029296875, -0.11171913146972656, -0.6598358154296875, -0.276031494140625, -0.07623672485351562, -0.36646270751953125, 0.037387847900390625, -0.26885414123535156, 0.08708953857421875, -0.255889892578125, -0.6080474853515625, 0.33695220947265625, 0.038543701171875, 0.01830291748046875, 0.703460693359375, 0.4679412841796875, 0.5165023803710938, -0.23938751220703125, 0.007358551025390625, -0.012538909912109375, 0.65057373046875, -0.13846206665039062, 0.042697906494140625, 0.6303634643554688, -0.13040924072265625, 0.025844573974609375, 0.355621337890625, -0.0655059814453125, -0.1195068359375, -0.1422271728515625, 0.3289833068847656, -0.166412353515625, 0.11965179443359375, 0.1309490203857422, 0.0771942138671875, 0.39809417724609375, 0.347320556640625, 0.28102684020996094, 0.832672119140625, -0.26439666748046875, 0.06276702880859375, -0.35335540771484375, 0.08089447021484375, 0.600921630859375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000037.npy"} +{"epoch": 0.055933484504913075, "step": 38, "batch_size": 64, "mean": 0.01809588074684143, "std": 0.34552595019340515, "min": -0.61260986328125, "p10": -0.3632904052734375, "median": -0.01277923583984375, "p90": 0.5007675170898439, "max": 1.3723335266113281, "pos_frac": 0.5, "sample": [-0.04561042785644531, 0.2942638397216797, 0.0730133056640625, 1.3723335266113281, -0.05175018310546875, 0.5795936584472656, 0.06525802612304688, 0.5223960876464844, -0.25118255615234375, 0.0696563720703125, 0.09243011474609375, -0.17377853393554688, 0.12786293029785156, 0.05904197692871094, 0.05767822265625, -0.37163543701171875, -0.20565032958984375, 0.09021568298339844, -0.27986907958984375, 0.00502777099609375, -0.44852447509765625, 0.01806640625, -0.34381866455078125, -0.513153076171875, 0.22847747802734375, -0.0802764892578125, 0.2717132568359375, -0.5798263549804688, -0.13419342041015625, -0.171783447265625, -0.3431415557861328, -0.065673828125, -0.05582237243652344, 0.11375045776367188, 0.8728179931640625, 0.22513961791992188, -0.03058624267578125, -0.5159473419189453, -0.06060028076171875, 0.04309844970703125, 0.3241920471191406, -0.12743377685546875, 0.20711517333984375, 0.08987045288085938, 0.2633781433105469, -0.16005706787109375, -0.16510391235351562, -0.48262786865234375, 0.5925140380859375, 0.51312255859375, -0.2053070068359375, -0.21228790283203125, 0.4719390869140625, -0.2103271484375, 0.0799407958984375, 0.5409069061279297, -0.1435394287109375, -0.05321502685546875, -0.26627349853515625, -0.0904693603515625, 0.049304962158203125, -0.61260986328125, 0.27132606506347656, 0.024766921997070312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000038.npy"} +{"epoch": 0.05744520030234316, "step": 39, "batch_size": 64, "mean": 0.053556889295578, "std": 0.3736174702644348, "min": -1.11181640625, "p10": -0.4437427520751952, "median": 0.06519412994384766, "p90": 0.49820823669433595, "max": 0.934326171875, "pos_frac": 0.609375, "sample": [0.4225616455078125, 0.03173828125, -0.1435394287109375, 0.18718528747558594, -0.16436767578125, 0.3614044189453125, -0.0063323974609375, 0.23548316955566406, 0.4663238525390625, 0.4888916015625, -0.6772003173828125, 0.09953689575195312, 0.44217872619628906, -0.07910537719726562, 0.0836639404296875, -0.1418323516845703, -0.11699676513671875, 0.6175308227539062, 0.22660064697265625, -0.1320648193359375, -0.7075672149658203, -0.595733642578125, -0.04215431213378906, 0.5591964721679688, 0.934326171875, -0.17646026611328125, -1.11181640625, 0.2396221160888672, -0.05384063720703125, -0.3473854064941406, 0.041667938232421875, 0.080718994140625, 0.0523681640625, 0.2471923828125, 0.5022010803222656, 0.610443115234375, 0.35997772216796875, 0.36470794677734375, 0.183868408203125, -0.5301055908203125, 0.3087005615234375, -0.48503875732421875, 0.00763702392578125, 0.536376953125, -0.312164306640625, 0.159881591796875, -0.089111328125, -0.243133544921875, 0.14942550659179688, -0.10763931274414062, 0.22269439697265625, -0.6737136840820312, -0.14246368408203125, -0.2914276123046875, 0.018148422241210938, 0.036956787109375, 0.09818267822265625, 0.848358154296875, 0.08250045776367188, 0.06901168823242188, 0.14770126342773438, 0.06137657165527344, -0.041919708251953125, 0.2544136047363281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000039.npy"} +{"epoch": 0.05895691609977324, "step": 40, "batch_size": 64, "mean": 0.03565022349357605, "std": 0.31573286652565, "min": -0.6953926086425781, "p10": -0.35339775085449215, "median": 0.03944110870361328, "p90": 0.4525678634643555, "max": 0.858551025390625, "pos_frac": 0.53125, "sample": [0.26264190673828125, 0.45542144775390625, -0.011074066162109375, 0.263458251953125, -0.004863739013671875, 0.2516460418701172, 0.538299560546875, 0.2039794921875, 0.1522216796875, 0.4829139709472656, 0.3435211181640625, 0.08926200866699219, 0.198486328125, -0.23949623107910156, 0.10691642761230469, -0.31842803955078125, 0.1614990234375, 0.392242431640625, 0.42926025390625, -0.16560935974121094, -0.11472702026367188, 0.140777587890625, 0.04232025146484375, 0.4578208923339844, -0.203125, 0.2456207275390625, -0.36818695068359375, -0.14281463623046875, 0.5182037353515625, -0.08739089965820312, 0.2417316436767578, -0.5081329345703125, 0.858551025390625, 0.08443450927734375, 0.20508766174316406, -0.009349822998046875, -0.07543182373046875, 0.03656196594238281, -0.55902099609375, -0.28216552734375, -0.023578643798828125, 0.0509033203125, 0.4459095001220703, -0.1355743408203125, -0.559814453125, 0.005672454833984375, 0.09781646728515625, -0.2643547058105469, -0.3632087707519531, -0.089874267578125, 0.22951507568359375, -0.1150665283203125, 0.08821487426757812, 0.6468887329101562, -0.33050537109375, -0.6953926086425781, -0.509765625, -0.24637603759765625, -0.12761688232421875, -0.31243896484375, 0.3969097137451172, 0.1390857696533203, -0.0483245849609375, -0.07047271728515625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000040.npy"} +{"epoch": 0.06046863189720333, "step": 41, "batch_size": 64, "mean": 0.11279991269111633, "std": 0.3728303909301758, "min": -0.6320075988769531, "p10": -0.35307617187499996, "median": 0.05382537841796875, "p90": 0.6388130187988282, "max": 0.91680908203125, "pos_frac": 0.59375, "sample": [0.0557098388671875, -0.07635498046875, -0.15831375122070312, -0.4399375915527344, 0.211456298828125, -0.03857421875, -0.03130340576171875, 0.7986793518066406, 0.10095977783203125, 0.298858642578125, 0.5397262573242188, 0.13911056518554688, -0.18281173706054688, 0.2187957763671875, 0.306427001953125, 0.2917633056640625, 0.03383636474609375, 0.3947601318359375, -0.08045578002929688, -0.10785293579101562, 0.774688720703125, -0.3188629150390625, -0.3350677490234375, 0.2959442138671875, 0.10501289367675781, 0.01123046875, 0.02062225341796875, 0.4139213562011719, 0.05194091796875, 0.426605224609375, -0.10471725463867188, 0.91680908203125, 0.9145660400390625, 0.20703887939453125, -0.03921318054199219, -0.43039703369140625, 0.4647941589355469, -0.14822769165039062, 0.6447982788085938, -0.15537643432617188, 0.19111251831054688, -0.6320075988769531, 0.004611968994140625, -0.06415176391601562, 0.7412567138671875, -0.44659996032714844, -0.3607940673828125, 0.11991500854492188, 0.5145721435546875, -0.333404541015625, -0.5187492370605469, 0.624847412109375, 0.04430389404296875, -0.40496063232421875, 0.3142738342285156, 0.4097137451171875, 0.38594818115234375, 0.21723175048828125, 0.7305641174316406, -0.263580322265625, -0.15301513671875, 0.5878982543945312, -0.20738601684570312, -0.2729949951171875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000041.npy"} +{"epoch": 0.06198034769463341, "step": 42, "batch_size": 64, "mean": -0.007597431540489197, "std": 0.40125828981399536, "min": -1.1277389526367188, "p10": -0.47302589416503904, "median": -0.027310848236083984, "p90": 0.4368843078613282, "max": 1.2473526000976562, "pos_frac": 0.484375, "sample": [0.555328369140625, 0.13232421875, -0.5966949462890625, -0.109344482421875, -0.17049789428710938, -0.1736602783203125, 0.09038543701171875, -0.02901744842529297, -0.2423553466796875, 0.09730148315429688, -0.4492034912109375, -0.16937255859375, 0.04526519775390625, 0.24204635620117188, -0.4355010986328125, 0.5491371154785156, 0.4427642822265625, 0.56634521484375, 0.055789947509765625, 0.2662086486816406, 0.11834716796875, 1.2473526000976562, -0.053863525390625, -0.179107666015625, -0.4686012268066406, -0.3202972412109375, 0.1423492431640625, -0.1019287109375, -0.22674560546875, -0.20566368103027344, 0.353546142578125, 0.3917522430419922, -0.17364883422851562, -0.32129669189453125, 0.5691795349121094, 0.32091522216796875, 0.28778076171875, 0.42316436767578125, -1.1277389526367188, 0.3527069091796875, -0.15428733825683594, -0.47492218017578125, -1.0826950073242188, -0.47846221923828125, -0.0855865478515625, -0.387725830078125, -0.025604248046875, -0.0435791015625, -0.20371627807617188, -0.4931640625, 0.4534778594970703, 0.044994354248046875, -0.28936004638671875, -0.22298431396484375, 0.27923583984375, 0.0529327392578125, -0.7155075073242188, 0.1919536590576172, 0.4026336669921875, 0.3345794677734375, 0.3584423065185547, 0.21384048461914062, 0.1979217529296875, -0.054103851318359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000042.npy"} +{"epoch": 0.06349206349206349, "step": 43, "batch_size": 64, "mean": 0.022962093353271484, "std": 0.3836731016635895, "min": -1.2718734741210938, "p10": -0.458687400817871, "median": 0.06567955017089844, "p90": 0.4815771102905274, "max": 0.6954002380371094, "pos_frac": 0.5625, "sample": [0.1223907470703125, 0.09267044067382812, -0.25982666015625, -0.022859573364257812, -0.2265625, -0.10651779174804688, -0.15333175659179688, 0.17721939086914062, 0.4953155517578125, -0.335113525390625, 0.3124427795410156, 0.23520660400390625, 0.2330169677734375, -0.668426513671875, -0.22686767578125, 0.62164306640625, 0.41980743408203125, -0.0280914306640625, 0.18304443359375, -0.49878692626953125, 0.4640827178955078, -0.09220123291015625, -0.03765869140625, -0.25431251525878906, 0.6954002380371094, 0.06231689453125, 0.06797409057617188, -0.222991943359375, 0.21668243408203125, 0.0250701904296875, 0.137542724609375, 0.0985260009765625, 0.48907470703125, -0.1892547607421875, -0.1486225128173828, 0.4016571044921875, 0.4589691162109375, -0.36512184143066406, 0.5742950439453125, 0.2165069580078125, -0.2212982177734375, -0.18891143798828125, 0.013336181640625, -1.2718734741210938, 0.33744049072265625, 0.1973876953125, 0.063385009765625, 0.37163543701171875, -0.5040779113769531, 0.455169677734375, 0.20560264587402344, 0.695037841796875, -0.26364898681640625, -0.21521759033203125, 0.52154541015625, 0.1904449462890625, -0.7568416595458984, 0.4048271179199219, 0.1845703125, -0.708740234375, -0.6095428466796875, -0.19451904296875, 0.1633777618408203, -0.36382293701171875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000043.npy"} +{"epoch": 0.06500377928949358, "step": 44, "batch_size": 64, "mean": 0.05126279592514038, "std": 0.43854257464408875, "min": -1.0680885314941406, "p10": -0.4167137145996093, "median": 0.04663562774658203, "p90": 0.5493822097778324, "max": 1.560272216796875, "pos_frac": 0.578125, "sample": [-0.33925819396972656, -0.19227218627929688, -0.73663330078125, 0.00418853759765625, 0.30828094482421875, -0.43011474609375, 0.111419677734375, -0.19451141357421875, -0.24590301513671875, -0.3033294677734375, 0.009765625, 0.412353515625, -0.2990570068359375, 0.07451629638671875, 0.11938095092773438, 0.02468109130859375, -0.08397674560546875, 0.17295074462890625, -0.0910186767578125, 0.37789154052734375, -0.13454437255859375, 0.3307914733886719, 0.5888042449951172, 0.02672576904296875, -0.12520217895507812, 0.734222412109375, 0.170318603515625, -0.08200836181640625, 0.40777587890625, 0.12017822265625, 0.6497802734375, 0.2679901123046875, 0.16661834716796875, -0.21370697021484375, -0.12489891052246094, 0.19660568237304688, 1.560272216796875, 0.21874237060546875, -0.2298583984375, -0.4322662353515625, -0.17964935302734375, 0.108001708984375, 0.06207275390625, -0.0779266357421875, 0.06548309326171875, 0.606597900390625, -0.6803970336914062, 0.4573974609375, -0.7287063598632812, 0.031198501586914062, -1.0680885314941406, 0.2446136474609375, 0.18524932861328125, 0.1908721923828125, -0.236846923828125, 0.0749359130859375, 0.7083282470703125, 1.4155731201171875, 0.446075439453125, -0.1142425537109375, -0.07053375244140625, 0.06502151489257812, -0.38544464111328125, -0.63446044921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000044.npy"} +{"epoch": 0.06651549508692366, "step": 45, "batch_size": 64, "mean": 0.13097891211509705, "std": 0.40715929865837097, "min": -0.9106674194335938, "p10": -0.318499755859375, "median": 0.08786392211914062, "p90": 0.6211616516113282, "max": 1.497802734375, "pos_frac": 0.640625, "sample": [0.35472869873046875, -0.426727294921875, 0.4436054229736328, -0.1102142333984375, 0.11244010925292969, -0.11852264404296875, -0.302734375, 0.2587127685546875, -0.32525634765625, -0.715087890625, 0.07831573486328125, 0.6326217651367188, 0.18664169311523438, 0.02140045166015625, -0.09995460510253906, -0.1812591552734375, 0.0462493896484375, 1.0313720703125, 0.07291412353515625, 0.286712646484375, 0.3324127197265625, -0.5433273315429688, 0.5679264068603516, -0.019382476806640625, 0.15504074096679688, 0.648468017578125, 0.0657501220703125, 0.24001121520996094, 0.809967041015625, 0.7947158813476562, 0.12462615966796875, 0.253326416015625, -0.6644134521484375, -0.3290252685546875, 0.3038787841796875, 0.7587127685546875, -0.15296363830566406, 0.59442138671875, 0.01605987548828125, 0.14205169677734375, -0.9106674194335938, -0.171966552734375, -0.023700714111328125, 0.03034210205078125, 0.2238922119140625, -0.024240493774414062, 0.32635498046875, -0.03373527526855469, 0.2645263671875, 0.00408935546875, 0.10285186767578125, -0.2749786376953125, 0.5355300903320312, 0.2998046875, -0.10200309753417969, 0.5373611450195312, 1.497802734375, 0.0542449951171875, 0.31186866760253906, -0.06750297546386719, -0.04747772216796875, 0.097412109375, 0.5501480102539062, -0.14152145385742188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000045.npy"} +{"epoch": 0.06802721088435375, "step": 46, "batch_size": 64, "mean": 0.15051376819610596, "std": 0.4654026925563812, "min": -0.94927978515625, "p10": -0.385099983215332, "median": 0.057361602783203125, "p90": 0.7629699707031251, "max": 1.1319427490234375, "pos_frac": 0.53125, "sample": [0.05173492431640625, -0.241119384765625, -0.2006988525390625, 0.21445465087890625, 0.24383544921875, -0.096954345703125, 0.022409439086914062, -0.4327735900878906, -0.35044097900390625, -0.94927978515625, 0.7137260437011719, 0.3623771667480469, 0.5985565185546875, 0.7772293090820312, -0.06726837158203125, -0.06971359252929688, -0.8793487548828125, -0.07791519165039062, 0.5450077056884766, 0.2578601837158203, 0.16963768005371094, 1.1319427490234375, -0.02667999267578125, 0.8584136962890625, 0.2842445373535156, -0.001373291015625, 0.5397415161132812, 1.0346603393554688, 1.11328125, 0.2621116638183594, -0.14211273193359375, 0.314605712890625, 0.06298828125, -0.01918792724609375, -0.3658885955810547, 0.4525146484375, -0.011842727661132812, -0.024324417114257812, -0.6064605712890625, 1.0644798278808594, 0.7296981811523438, -0.484588623046875, 0.5670700073242188, 0.2608680725097656, 0.6396217346191406, -0.12369537353515625, -0.39333343505859375, -0.21647262573242188, -0.048206329345703125, 0.1774749755859375, 0.48164939880371094, -0.027004241943359375, -0.2097301483154297, 0.13632965087890625, 0.522552490234375, 0.667327880859375, -0.5991668701171875, -0.0468292236328125, -0.3430938720703125, 0.7966156005859375, -0.04889678955078125, 0.3484230041503906, -0.1821308135986328, 0.5159683227539062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000046.npy"} +{"epoch": 0.06953892668178382, "step": 47, "batch_size": 64, "mean": 0.1261444389820099, "std": 0.433704137802124, "min": -1.421478271484375, "p10": -0.37036056518554683, "median": 0.14210128784179688, "p90": 0.6194149017333984, "max": 1.2355270385742188, "pos_frac": 0.640625, "sample": [0.4488086700439453, 0.5114479064941406, 0.19355392456054688, -0.0731353759765625, 0.6645126342773438, 0.08857154846191406, 0.115936279296875, 1.2355270385742188, 0.07918548583984375, 0.16551971435546875, 0.016344070434570312, -0.07172584533691406, 0.22020721435546875, -0.674407958984375, 0.11049652099609375, 0.14144134521484375, 0.3079395294189453, 0.525726318359375, -0.1448822021484375, -0.26461029052734375, 0.22901535034179688, 0.08473968505859375, -0.17465972900390625, 0.14975929260253906, -0.2877216339111328, 0.6863861083984375, 0.011140823364257812, 0.29907798767089844, 0.032444000244140625, 0.40146636962890625, -0.16902923583984375, 0.5695915222167969, -0.39900970458984375, 0.7048912048339844, -1.421478271484375, 0.18384933471679688, 0.5762519836425781, 0.16761016845703125, 0.3106269836425781, 0.9181442260742188, -0.3035125732421875, 0.3168792724609375, -0.1381072998046875, -0.11133575439453125, 0.6168327331542969, -0.2110137939453125, 0.7966766357421875, -0.20071029663085938, 0.5178604125976562, 0.5002651214599609, -0.174468994140625, -0.2408905029296875, -0.431976318359375, -0.57830810546875, -0.5918560028076172, 0.6205215454101562, 0.14276123046875, 0.462066650390625, 0.4704246520996094, -0.058452606201171875, 0.18014907836914062, 0.60467529296875, -0.41143798828125, -0.17335128784179688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000047.npy"} +{"epoch": 0.0710506424792139, "step": 48, "batch_size": 64, "mean": 0.03092169761657715, "std": 0.39811617136001587, "min": -0.6876068115234375, "p10": -0.4531957626342773, "median": 0.038745880126953125, "p90": 0.5724014282226565, "max": 1.2117462158203125, "pos_frac": 0.515625, "sample": [0.22813987731933594, -0.170318603515625, -0.17467880249023438, -0.048126220703125, 0.13846206665039062, 0.9460029602050781, -0.281219482421875, -0.015350341796875, -0.364227294921875, -0.17955589294433594, 0.289215087890625, 0.09013557434082031, -0.14159011840820312, -0.577667236328125, 0.41445159912109375, 0.34722900390625, 0.23781967163085938, -0.10120201110839844, -0.3348236083984375, 0.4820098876953125, -0.11358642578125, -0.6491432189941406, 0.04787445068359375, 0.14616012573242188, 0.3816795349121094, 0.5932846069335938, 0.0670013427734375, 0.2852210998535156, -0.571533203125, 0.6885452270507812, -0.28894805908203125, 0.228546142578125, -0.23693084716796875, 0.2809562683105469, 0.09821319580078125, -0.09160614013671875, -0.23095321655273438, 0.0296173095703125, 0.8764152526855469, -0.19469451904296875, -0.2462005615234375, 0.1554412841796875, -0.31304168701171875, 0.25034332275390625, -0.47969818115234375, 0.06226158142089844, 0.11710166931152344, 0.192230224609375, -0.6468048095703125, -0.216552734375, 0.5236740112304688, 0.07326507568359375, -0.3516216278076172, 0.11543846130371094, -0.2842864990234375, 0.6584453582763672, 1.2117462158203125, -0.06725692749023438, -0.43389892578125, -0.16956710815429688, 0.16575241088867188, 0.6804656982421875, -0.46146583557128906, -0.6876068115234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000048.npy"} +{"epoch": 0.07256235827664399, "step": 49, "batch_size": 64, "mean": 0.08111083507537842, "std": 0.4912498891353607, "min": -1.499481201171875, "p10": -0.49552688598632805, "median": 0.12878036499023438, "p90": 0.7255348205566406, "max": 1.2197341918945312, "pos_frac": 0.625, "sample": [0.03014373779296875, 0.3285675048828125, 0.04847908020019531, 0.7613372802734375, 0.2671661376953125, -0.1822032928466797, 0.13929367065429688, 0.1453857421875, 0.11073493957519531, -0.10625457763671875, 0.06020545959472656, 0.009464263916015625, 0.5827560424804688, 0.12540435791015625, -0.14771461486816406, -0.08408164978027344, -0.7045936584472656, 0.254638671875, 0.24033355712890625, -1.03948974609375, 0.12065887451171875, 0.19740676879882812, -0.031375885009765625, 0.13521575927734375, 0.11517143249511719, 0.30886077880859375, -0.1049041748046875, 0.19931793212890625, 0.18335723876953125, -0.41989898681640625, 0.24982833862304688, 0.20404052734375, -0.32177734375, 0.6135921478271484, 0.7033157348632812, 0.7306365966796875, -0.19445037841796875, 0.41204833984375, 0.8937835693359375, 1.0110855102539062, -0.5279388427734375, -0.13346099853515625, -0.3529205322265625, 0.3364410400390625, -0.35552215576171875, 0.39923858642578125, -0.01749420166015625, 0.388214111328125, 0.1321563720703125, -0.08480262756347656, 0.9135665893554688, 0.7136306762695312, -1.499481201171875, -0.5706787109375, -0.3581275939941406, -0.7727699279785156, 0.16002273559570312, 1.2197341918945312, 0.303985595703125, 0.14341163635253906, -0.17508697509765625, -0.40288543701171875, 0.7710037231445312, -0.8846282958984375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000049.npy"} +{"epoch": 0.07407407407407407, "step": 50, "batch_size": 64, "mean": 0.10493996739387512, "std": 0.3791825771331787, "min": -0.7903976440429688, "p10": -0.3704259872436523, "median": 0.09713935852050781, "p90": 0.5802175521850587, "max": 0.965087890625, "pos_frac": 0.59375, "sample": [0.28376007080078125, 0.211517333984375, -0.1354827880859375, 0.07301712036132812, -0.12659835815429688, -0.07791900634765625, 0.09777450561523438, -0.27120208740234375, 0.11408233642578125, 0.5494422912597656, 0.34010887145996094, 0.445953369140625, -0.173858642578125, -0.2436676025390625, -0.10238265991210938, -0.3935565948486328, 0.33453369140625, 0.5695991516113281, 0.5847682952880859, 0.1375579833984375, -0.05859375, 0.44190216064453125, -0.224029541015625, -0.19933319091796875, 0.525726318359375, 0.717437744140625, -0.019222259521484375, 0.22953414916992188, 0.23116683959960938, -0.38645172119140625, 0.7411136627197266, 0.029058456420898438, 0.45542144775390625, 0.6197471618652344, -0.7903976440429688, 0.044940948486328125, 0.2736968994140625, 0.8768234252929688, 0.04041099548339844, 0.29821014404296875, 0.965087890625, 0.546478271484375, 0.489898681640625, -0.5329742431640625, 0.12047004699707031, 0.22394371032714844, 0.0203399658203125, -0.001708984375, -0.44176483154296875, 0.30239105224609375, 0.2721080780029297, -0.17398834228515625, -0.13989639282226562, 0.09650421142578125, 0.8049259185791016, -0.2626991271972656, -0.7535629272460938, 0.16261672973632812, -0.275970458984375, -0.33303260803222656, -0.0995941162109375, -0.414794921875, 0.11078262329101562, -0.0340118408203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000050.npy"} +{"epoch": 0.07558578987150416, "step": 51, "batch_size": 64, "mean": 0.16088125109672546, "std": 0.5224586725234985, "min": -1.156463623046875, "p10": -0.5130722045898437, "median": 0.15506362915039062, "p90": 0.8266605377197267, "max": 1.767852783203125, "pos_frac": 0.640625, "sample": [0.466156005859375, 0.12326812744140625, -0.07159996032714844, -0.039745330810546875, 0.2106170654296875, 0.4786834716796875, 0.06817245483398438, 0.7336616516113281, -0.4655723571777344, -0.199462890625, 0.30472564697265625, 0.7909507751464844, -0.18258285522460938, 1.1083145141601562, -0.41302490234375, 0.2370452880859375, -0.018890380859375, 0.24462890625, 0.25183868408203125, 0.631103515625, 0.8419647216796875, 0.4276695251464844, 0.44970130920410156, 0.292755126953125, 0.185028076171875, -0.5114593505859375, -0.5187606811523438, -0.43384742736816406, -0.04205322265625, 0.5547599792480469, 0.394012451171875, 0.1872711181640625, -0.019540786743164062, 1.0064239501953125, -1.156463623046875, 0.0919647216796875, 0.3484306335449219, -0.6770782470703125, -0.5341873168945312, 0.189056396484375, 0.11234283447265625, -0.189117431640625, 0.12509918212890625, 0.9771270751953125, 0.34272003173828125, 0.01966094970703125, -0.6467018127441406, 1.767852783203125, -0.513763427734375, 1.1995925903320312, 0.344329833984375, 0.2030181884765625, 0.5270881652832031, -0.08844757080078125, -0.18877410888671875, -0.26894569396972656, 0.04044342041015625, 1.3154258728027344, 0.2878227233886719, 0.01508331298828125, -0.6989288330078125, 0.376678466796875, -0.10148239135742188, 0.00434112548828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000051.npy"} +{"epoch": 0.07709750566893424, "step": 52, "batch_size": 64, "mean": 0.240006223320961, "std": 0.5847069621086121, "min": -0.9403076171875, "p10": -0.3885345458984375, "median": 0.06369686126708984, "p90": 0.9771011352539064, "max": 2.3000946044921875, "pos_frac": 0.59375, "sample": [1.1586151123046875, 0.758056640625, 0.863922119140625, 0.795623779296875, 0.8329620361328125, -0.2963104248046875, 0.5087566375732422, -0.4674034118652344, -0.3552207946777344, 0.9428253173828125, 0.665557861328125, 0.37674713134765625, -0.3908233642578125, 1.6657257080078125, 0.5833816528320312, -0.2593231201171875, 1.24420166015625, -0.1990509033203125, -0.08484268188476562, -0.3702526092529297, -0.4967689514160156, -0.13479995727539062, 0.49102783203125, -0.1660938262939453, 0.0542144775390625, 0.991790771484375, -0.2173309326171875, 0.020368576049804688, 0.0331268310546875, 0.21142196655273438, 0.3538627624511719, -0.08331298828125, -0.14665985107421875, -0.5402145385742188, 0.012025833129882812, 1.1033706665039062, 0.3156013488769531, 0.19306564331054688, 0.31050872802734375, -0.099517822265625, 0.06856346130371094, 0.0364837646484375, -0.07126235961914062, 0.5395221710205078, 0.16226959228515625, 0.8111076354980469, -0.15357685089111328, -0.17044830322265625, 2.3000946044921875, -0.5413970947265625, 0.4394187927246094, -0.9403076171875, -0.47190093994140625, 0.6301116943359375, 0.4819793701171875, -0.0261993408203125, 1.0552139282226562, 0.05883026123046875, 0.867431640625, 0.28757667541503906, 0.3919563293457031, -0.12295913696289062, -0.06775093078613281, -0.3831939697265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000052.npy"} +{"epoch": 0.07860922146636433, "step": 53, "batch_size": 64, "mean": 0.2129267156124115, "std": 0.5002836585044861, "min": -1.4168319702148438, "p10": -0.4215705871582031, "median": 0.1954021453857422, "p90": 0.73179931640625, "max": 1.32275390625, "pos_frac": 0.703125, "sample": [0.18617630004882812, 0.321075439453125, 0.7305068969726562, 0.384185791015625, 0.33092689514160156, 0.0056934356689453125, 0.5229644775390625, -0.152496337890625, 0.6319141387939453, -0.487945556640625, 0.3437042236328125, 0.0364990234375, -0.04131317138671875, 0.4361248016357422, -0.54083251953125, 0.20462799072265625, 0.036754608154296875, 1.32275390625, -0.172943115234375, -0.7659530639648438, 0.04353904724121094, 0.26569366455078125, 0.7828826904296875, 0.09589767456054688, -1.4168319702148438, 0.5487060546875, -0.35465431213378906, 0.26932525634765625, 0.6601734161376953, 0.5672988891601562, 0.5366325378417969, -0.6722335815429688, -0.18217849731445312, 0.3248443603515625, 0.3945159912109375, 1.042266845703125, 0.7227706909179688, 1.2379684448242188, -0.41028594970703125, -0.13045310974121094, -0.28717994689941406, 0.07228851318359375, 0.6883068084716797, -0.4711761474609375, 0.7743072509765625, 0.640045166015625, 0.220367431640625, 0.605224609375, 0.6293678283691406, 1.181549072265625, 0.0055084228515625, 0.7323532104492188, -0.1438770294189453, 0.7096099853515625, -0.03733062744140625, -0.4264068603515625, 0.5994415283203125, -0.14070510864257812, 0.1759490966796875, 0.12316131591796875, -0.059253692626953125, 0.1542205810546875, 0.14899444580078125, 0.07424163818359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000053.npy"} +{"epoch": 0.0801209372637944, "step": 54, "batch_size": 64, "mean": 0.1448424756526947, "std": 0.46852150559425354, "min": -1.0439453125, "p10": -0.4369697570800781, "median": 0.13620567321777344, "p90": 0.6479427337646485, "max": 1.4831390380859375, "pos_frac": 0.578125, "sample": [0.1784515380859375, 1.4610443115234375, -0.5742111206054688, 0.3122138977050781, -0.036411285400390625, -0.4112548828125, 0.6393470764160156, 0.368621826171875, 0.36943817138671875, -0.6695289611816406, -0.2656822204589844, -0.026865005493164062, 0.6052703857421875, -1.0439453125, -0.011745452880859375, -0.14162826538085938, 0.6089630126953125, 0.3192863464355469, -0.0206298828125, 0.16564178466796875, 0.18320465087890625, 1.4831390380859375, 0.13268661499023438, 0.5785770416259766, 0.5756378173828125, 0.6516265869140625, 0.12110328674316406, -0.14861297607421875, 0.10955047607421875, -0.096649169921875, -0.2230377197265625, 0.24178695678710938, -0.19220733642578125, -0.3578662872314453, 0.39553260803222656, 0.3214569091796875, 0.0653533935546875, -0.2351837158203125, 0.681304931640625, 0.8873786926269531, 0.08847618103027344, 0.2998161315917969, 0.8412742614746094, 0.6262283325195312, -0.18350601196289062, 0.32717323303222656, 0.39313507080078125, 0.16783523559570312, -0.11853218078613281, 0.1397247314453125, -0.011932373046875, -0.5459976196289062, -0.08446502685546875, -0.20366668701171875, 0.5055580139160156, -0.12215232849121094, 0.3676471710205078, -0.2943744659423828, 0.8581466674804688, -0.5536270141601562, -0.44799041748046875, 0.14861297607421875, 0.5570449829101562, -0.4856681823730469], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000054.npy"} +{"epoch": 0.08163265306122448, "step": 55, "batch_size": 64, "mean": 0.3081167936325073, "std": 0.7133110761642456, "min": -0.99053955078125, "p10": -0.4953834533691406, "median": 0.18228912353515625, "p90": 1.233436965942383, "max": 2.59356689453125, "pos_frac": 0.6875, "sample": [1.6970672607421875, 0.2832183837890625, 0.12068939208984375, 0.07253265380859375, 0.15109634399414062, 0.648834228515625, 0.7902908325195312, 0.14409637451171875, -0.6275806427001953, 0.03148651123046875, -0.25371551513671875, 1.134674072265625, -0.7395896911621094, -0.3070068359375, -0.060749053955078125, 1.4566574096679688, 0.9840011596679688, 0.04228973388671875, 0.169464111328125, -0.3592987060546875, 0.362335205078125, -0.4000244140625, 1.0097084045410156, 0.4214153289794922, 2.59356689453125, 0.318389892578125, 0.2696552276611328, 1.06817626953125, -0.28108978271484375, 0.3861083984375, 0.2681427001953125, 1.2546234130859375, 0.1951141357421875, -0.4237861633300781, -0.6178665161132812, -0.345794677734375, 0.30161285400390625, 0.1300792694091797, 0.5043258666992188, -0.1122283935546875, 0.1064300537109375, 1.1773033142089844, 0.06949615478515625, 1.39801025390625, 0.4638214111328125, 0.44028472900390625, 1.3729629516601562, -0.4060516357421875, -0.5091094970703125, -0.99053955078125, 0.22186660766601562, 0.8051490783691406, -0.08867645263671875, 0.11775970458984375, -0.4546623229980469, 0.8971176147460938, 0.6314773559570312, 0.133392333984375, 0.5312690734863281, 2.107513427734375, -0.5235595703125, -0.46335601806640625, 1.1840019226074219, -0.7833480834960938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000055.npy"} +{"epoch": 0.08314436885865457, "step": 56, "batch_size": 64, "mean": 0.20225009322166443, "std": 0.6259340643882751, "min": -2.2276458740234375, "p10": -0.4887966156005859, "median": 0.2507057189941406, "p90": 0.9176935195922855, "max": 1.6669921875, "pos_frac": 0.65625, "sample": [0.05730438232421875, 0.41510009765625, 0.05190277099609375, -0.530426025390625, 0.5687484741210938, -0.0235748291015625, 1.2555694580078125, -1.39410400390625, -0.4716987609863281, 0.3338470458984375, 0.2896156311035156, -0.496124267578125, 0.2698211669921875, 0.5483322143554688, 0.28682708740234375, 1.4598884582519531, 0.1873321533203125, -0.5319156646728516, 0.583526611328125, 1.1998825073242188, -0.5372962951660156, 1.6669921875, 0.4492626190185547, 0.3574714660644531, 0.09016990661621094, 0.8359279632568359, -0.3102378845214844, 0.25682830810546875, 0.2814178466796875, 0.73040771484375, -0.2725028991699219, 0.33123016357421875, 0.9962997436523438, 0.5905952453613281, 0.19863128662109375, -0.07877349853515625, -2.2276458740234375, 0.4093456268310547, -0.020967483520507812, -0.10005950927734375, 0.631317138671875, 0.7208251953125, -0.5084075927734375, -0.2547111511230469, 0.2445831298828125, 0.477630615234375, -0.37772369384765625, 0.9527359008789062, -0.1352214813232422, 0.49554443359375, 0.6152000427246094, 0.5825157165527344, 0.5570945739746094, 0.5413475036621094, 0.166412353515625, -0.09478950500488281, -0.39731597900390625, 0.10565185546875, 0.17282867431640625, 0.028350830078125, -0.1670665740966797, 1.3500289916992188, -0.025997161865234375, -0.44377899169921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000056.npy"} +{"epoch": 0.08465608465608465, "step": 57, "batch_size": 64, "mean": 0.2819889187812805, "std": 0.6840593814849854, "min": -1.0062751770019531, "p10": -0.6334079742431641, "median": 0.25829505920410156, "p90": 0.9980995178222658, "max": 2.40704345703125, "pos_frac": 0.71875, "sample": [-0.8193130493164062, -0.6344184875488281, 0.23626327514648438, 2.40704345703125, -0.2024383544921875, 0.849334716796875, 0.6470947265625, -0.7832717895507812, 0.2302398681640625, 0.414520263671875, 0.86541748046875, 1.144317626953125, -0.9602622985839844, -0.0276336669921875, 0.799407958984375, -1.0062751770019531, 0.5464706420898438, 0.7303695678710938, 2.3295440673828125, 0.2215404510498047, 0.496063232421875, -0.26618194580078125, 0.8054790496826172, 1.2003631591796875, -0.3088264465332031, -0.5341339111328125, 0.19830703735351562, 0.162628173828125, 0.24988365173339844, -0.6310501098632812, 0.73712158203125, 0.6002941131591797, 0.3214225769042969, 0.25734710693359375, 0.126190185546875, 0.46503448486328125, -0.97357177734375, 0.3867607116699219, -0.36843109130859375, 0.42755126953125, -0.4422950744628906, -0.461151123046875, -0.5402908325195312, 0.4673728942871094, -0.6895980834960938, 0.490936279296875, 1.0157852172851562, 0.4674835205078125, 0.935150146484375, 0.6556587219238281, 0.2592430114746094, 0.743621826171875, 0.023712158203125, 1.0234527587890625, 0.1970691680908203, 1.2435417175292969, 0.2465362548828125, 0.9568328857421875, 0.21779251098632812, 0.3895111083984375, 0.20251846313476562, 0.21357154846191406, 0.49737548828125, -0.4067420959472656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000057.npy"} +{"epoch": 0.08616780045351474, "step": 58, "batch_size": 64, "mean": 0.24712243676185608, "std": 1.0262049436569214, "min": -1.2570571899414062, "p10": -0.6444507598876953, "median": -0.0042629241943359375, "p90": 1.2570384979248048, "max": 5.002899169921875, "pos_frac": 0.484375, "sample": [-1.2570571899414062, 0.6719284057617188, 0.9081249237060547, -0.21673202514648438, 0.7441787719726562, -0.5741500854492188, -0.13985443115234375, 0.5096206665039062, -0.0043182373046875, -0.30489349365234375, 0.7074546813964844, -0.6060657501220703, 0.65155029296875, -0.6498947143554688, -0.6166610717773438, 0.8346672058105469, 1.4246139526367188, -0.724517822265625, -0.166534423828125, 1.707275390625, 0.11231422424316406, -0.3856620788574219, 0.4650077819824219, 0.693267822265625, -0.33404541015625, -0.3033256530761719, -1.0375709533691406, -0.54156494140625, -0.6814594268798828, 0.46820831298828125, -0.04225921630859375, -0.9658985137939453, 0.4607658386230469, 0.6281204223632812, 5.002899169921875, -0.6317481994628906, -0.790435791015625, 2.9155426025390625, 1.4299392700195312, 0.0841217041015625, -0.3372802734375, 0.10894966125488281, 0.3754158020019531, -0.6093292236328125, -0.176605224609375, 3.1853179931640625, -0.2968101501464844, -0.46671295166015625, -0.0474700927734375, 1.0641250610351562, -0.0767974853515625, 1.2652015686035156, 0.33504486083984375, -0.004207611083984375, 0.461090087890625, 0.910125732421875, 0.17162132263183594, -0.44942283630371094, -0.3627777099609375, 1.2379913330078125, -0.5187225341796875, -0.123260498046875, 0.09966278076171875, 0.6257343292236328], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000058.npy"} +{"epoch": 0.08767951625094482, "step": 59, "batch_size": 64, "mean": 0.3036823272705078, "std": 0.9264476299285889, "min": -1.445343017578125, "p10": -0.5878360748291016, "median": 0.10018157958984375, "p90": 1.6355285644531259, "max": 3.3837127685546875, "pos_frac": 0.59375, "sample": [0.038059234619140625, 0.36260986328125, 0.099365234375, -0.5866050720214844, -0.2004375457763672, -0.44056129455566406, 0.22916030883789062, -1.2986984252929688, 0.6100692749023438, 1.0876312255859375, 2.518890380859375, 0.22021865844726562, 1.161041259765625, 1.3427734375, 0.4570026397705078, -0.24788475036621094, 1.8022308349609375, -0.8782234191894531, 0.1009979248046875, 0.051055908203125, 0.591827392578125, -0.3233680725097656, -0.044677734375, 0.24739837646484375, -0.033233642578125, 0.7528266906738281, -0.23925399780273438, 1.4201431274414062, 2.758087158203125, 0.15003204345703125, -0.24932098388671875, 0.26346588134765625, 0.6497077941894531, -0.104156494140625, -0.983856201171875, 0.09567451477050781, -0.15002059936523438, 0.4629402160644531, -0.6694183349609375, -0.1649017333984375, 3.3837127685546875, 0.333251953125, 1.7278366088867188, 0.6658058166503906, -0.1816997528076172, 0.39175987243652344, -0.5883636474609375, 1.0331764221191406, 0.05572509765625, 1.8953704833984375, -0.4206428527832031, 0.805999755859375, -0.3217277526855469, -1.445343017578125, 0.6912689208984375, -0.5464897155761719, -1.175689697265625, -0.0323028564453125, 0.07580184936523438, -0.32465362548828125, 0.1732311248779297, 1.84173583984375, 0.9351959228515625, -0.39588165283203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000059.npy"} +{"epoch": 0.08919123204837491, "step": 60, "batch_size": 64, "mean": 0.08645197749137878, "std": 0.8943778276443481, "min": -2.3677520751953125, "p10": -0.9693363189697265, "median": 0.1605224609375, "p90": 1.0758735656738283, "max": 2.53466796875, "pos_frac": 0.609375, "sample": [0.15382766723632812, 0.029205322265625, 0.5309638977050781, -0.31652069091796875, -0.5675506591796875, 1.6149520874023438, -2.3677520751953125, -0.7869834899902344, 0.25830078125, -2.2730865478515625, 0.3558998107910156, 1.0488662719726562, -0.45404624938964844, 0.16721725463867188, -1.1048965454101562, 0.4954986572265625, -0.36866188049316406, 1.0016098022460938, 1.3300647735595703, 0.38748931884765625, -0.5045318603515625, -0.04025840759277344, 0.26314544677734375, -0.983734130859375, 0.23307037353515625, 0.43425750732421875, 0.7427711486816406, 0.062229156494140625, 1.1617507934570312, -0.7570838928222656, 0.35370826721191406, 0.0338897705078125, 0.8607940673828125, -0.07095718383789062, -0.23090362548828125, 0.40313720703125, 0.6999378204345703, -1.2286300659179688, -0.25872802734375, -0.5705432891845703, -0.6677169799804688, 1.1730575561523438, 0.7570419311523438, -1.3256263732910156, 0.10033035278320312, 1.3321380615234375, -0.6148796081542969, -0.8248138427734375, 0.8951263427734375, 0.10657691955566406, 0.8141098022460938, 0.06499862670898438, 0.7006950378417969, 0.6249313354492188, 2.53466796875, -0.4803943634033203, 0.7358970642089844, 0.48541259765625, 0.8304977416992188, 0.41047096252441406, -1.919158935546875, -0.9357414245605469, 1.0874481201171875, -0.08986091613769531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000060.npy"} +{"epoch": 0.09070294784580499, "step": 61, "batch_size": 64, "mean": 0.053537994623184204, "std": 1.0544979572296143, "min": -2.3319549560546875, "p10": -1.6528480529785154, "median": 0.03446769714355469, "p90": 1.2217521667480469, "max": 2.8970947265625, "pos_frac": 0.546875, "sample": [0.442779541015625, 0.9858245849609375, 1.9069595336914062, 2.0018157958984375, 0.034000396728515625, 0.0055942535400390625, 0.351654052734375, 2.0155029296875, 0.79510498046875, -0.24293899536132812, -0.0182647705078125, -0.34801673889160156, -0.6398696899414062, 0.677978515625, 0.3192138671875, -0.4081535339355469, 0.834930419921875, -0.1327381134033203, -0.34743499755859375, 0.5495395660400391, 0.10083770751953125, 0.4825630187988281, 0.3112945556640625, 0.023715972900390625, -0.11490249633789062, -0.16230010986328125, -1.7080841064453125, -1.8538360595703125, 0.101806640625, -0.5331993103027344, 0.8698577880859375, -0.5740451812744141, -0.7847900390625, 1.2325897216796875, -1.1847076416015625, 0.6779632568359375, -0.0145416259765625, -0.6299896240234375, -0.86749267578125, 0.4249267578125, 2.8970947265625, -1.8501663208007812, 0.03493499755859375, -2.1706314086914062, 0.7262649536132812, 1.3916091918945312, 0.4156074523925781, 0.35286712646484375, -0.6854133605957031, -0.1349773406982422, 1.1964645385742188, -1.5239639282226562, -0.042083740234375, -0.155426025390625, 0.9025039672851562, -2.3319549560546875, 1.6525726318359375, 0.41054725646972656, 0.5078353881835938, -0.3339996337890625, 0.8133010864257812, 0.6562347412109375, -2.170734405517578, -1.7132034301757812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000061.npy"} +{"epoch": 0.09221466364323508, "step": 62, "batch_size": 64, "mean": 0.2517266571521759, "std": 0.887523353099823, "min": -2.1666259765625, "p10": -0.632540512084961, "median": 0.2688159942626953, "p90": 1.099435043334961, "max": 2.3747100830078125, "pos_frac": 0.59375, "sample": [0.8951644897460938, -1.3541297912597656, -0.6254348754882812, 0.29210472106933594, 0.6301116943359375, 0.21455001831054688, 1.1022605895996094, -0.246795654296875, -1.8417510986328125, -0.049468994140625, 1.8414688110351562, -0.7476367950439453, 1.6856765747070312, -1.2265625, 1.487884521484375, 0.7949371337890625, 0.658050537109375, 0.25145721435546875, 0.09017562866210938, 0.805328369140625, -0.01276397705078125, -0.5362701416015625, -0.59625244140625, -0.003566741943359375, 0.5819950103759766, 0.7849273681640625, -0.305511474609375, 0.5906600952148438, 0.9225234985351562, 0.9981155395507812, -0.47289276123046875, -0.344207763671875, -0.00136566162109375, 0.4734649658203125, 0.8797683715820312, -2.1666259765625, -0.0644989013671875, 1.5755577087402344, -0.35770416259765625, 0.23455047607421875, 0.2861747741699219, 0.9056625366210938, 0.45955657958984375, 0.3277854919433594, 1.0928421020507812, 0.8673133850097656, -0.6218414306640625, -0.6355857849121094, 0.22172927856445312, 1.0229110717773438, -0.3247261047363281, 0.9989833831787109, 0.7714862823486328, 0.14349365234375, -0.5175247192382812, 0.40317535400390625, 2.3747100830078125, 2.2484283447265625, -0.10740470886230469, -0.0774993896484375, 0.5727920532226562, -0.24930953979492188, -1.59722900390625, 0.7072887420654297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000062.npy"} +{"epoch": 0.09372637944066516, "step": 63, "batch_size": 64, "mean": 0.5762900114059448, "std": 0.9069164395332336, "min": -2.36749267578125, "p10": -0.2524507522583007, "median": 0.4911813735961914, "p90": 1.8170520782470705, "max": 3.442840576171875, "pos_frac": 0.796875, "sample": [0.26680755615234375, 0.0452117919921875, 0.4954833984375, -0.11746597290039062, 3.442840576171875, -0.20089149475097656, -0.21373558044433594, 3.08734130859375, 0.6020736694335938, 0.5491104125976562, -2.36749267578125, -0.44007110595703125, 0.6457004547119141, -1.15020751953125, 0.8941802978515625, 1.0117874145507812, 1.7595367431640625, 0.32100677490234375, 0.46860504150390625, 0.4561729431152344, -0.08200836181640625, 0.33643531799316406, 0.9166984558105469, 2.2308502197265625, 0.6081695556640625, 0.8613853454589844, 0.3946990966796875, 0.2248554229736328, 0.4633350372314453, 0.4023895263671875, 0.8727684020996094, 0.5586395263671875, 1.2064170837402344, 0.8332595825195312, 0.4166717529296875, 0.11194610595703125, 0.5065956115722656, -0.4755058288574219, 0.22811317443847656, 0.9714107513427734, 0.49363136291503906, 2.26361083984375, 0.20458030700683594, 1.8417015075683594, 0.5132865905761719, 1.007059097290039, 0.482818603515625, 1.8863754272460938, 0.5426788330078125, -0.1710052490234375, 0.34441184997558594, 0.6901111602783203, 0.194671630859375, -0.175445556640625, 0.837371826171875, -0.2855377197265625, 0.7271003723144531, 0.48873138427734375, -0.26904296875, 0.08832359313964844, 0.8814811706542969, 2.2293701171875, 1.737823486328125, -0.814666748046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000063.npy"} +{"epoch": 0.09523809523809523, "step": 64, "batch_size": 64, "mean": 0.08405190706253052, "std": 0.9259858727455139, "min": -3.5260009765625, "p10": -0.857124328613281, "median": 0.1083831787109375, "p90": 1.140073394775391, "max": 2.7002105712890625, "pos_frac": 0.546875, "sample": [1.5367202758789062, 2.7002105712890625, -0.6706962585449219, -0.6964263916015625, -0.3626594543457031, 0.3725128173828125, 0.6502799987792969, 0.09682464599609375, 0.446807861328125, 1.3981857299804688, 0.5768299102783203, 0.46981048583984375, 1.198385238647461, -0.4179344177246094, 0.9957847595214844, -0.06004524230957031, -0.103424072265625, -0.24045562744140625, 0.049953460693359375, 0.5316028594970703, 1.04473876953125, -0.44382476806640625, -0.281829833984375, -0.1452960968017578, -1.10662841796875, -0.286529541015625, 0.18884658813476562, -3.5260009765625, -0.57672119140625, 0.6285419464111328, 0.23773765563964844, 0.11994171142578125, 0.13704681396484375, -0.925994873046875, 0.5375633239746094, -0.1964111328125, 0.8161773681640625, -2.171173095703125, -1.1760101318359375, 0.9573211669921875, 0.7401275634765625, 0.17960357666015625, 1.1809310913085938, -0.1287994384765625, 0.24361228942871094, -1.1201019287109375, -0.38446807861328125, 0.599639892578125, -0.21282196044921875, 1.9281768798828125, -0.6932601928710938, -0.4766101837158203, 1.56573486328125, 0.28562164306640625, 0.07300567626953125, 0.3143653869628906, 0.6054935455322266, -0.4505119323730469, -1.0208282470703125, -0.6100616455078125, 0.264892578125, -0.32497406005859375, -0.13613510131835938, 0.6529273986816406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000064.npy"} +{"epoch": 0.09674981103552532, "step": 65, "batch_size": 64, "mean": 0.4979110360145569, "std": 1.2749006748199463, "min": -2.909893035888672, "p10": -1.060335922241211, "median": 0.3942985534667969, "p90": 1.9762382507324219, "max": 5.531280517578125, "pos_frac": 0.703125, "sample": [0.1767559051513672, 1.0685958862304688, 1.6375961303710938, -0.112396240234375, 0.00711822509765625, 1.2524986267089844, -1.407745361328125, -0.718841552734375, -0.7542648315429688, 1.9774856567382812, -0.346435546875, 0.7704124450683594, 1.0928840637207031, 0.7350387573242188, 0.36092376708984375, 0.7830905914306641, 0.6243686676025391, 0.3922004699707031, -1.0913238525390625, 2.1887054443359375, -0.42230224609375, 0.05663299560546875, -1.3775634765625, 2.116424560546875, 2.1637115478515625, -0.13280868530273438, -2.909893035888672, 0.5189971923828125, -0.34345245361328125, 0.4173126220703125, 0.13716888427734375, 0.76025390625, -0.6633453369140625, -1.2060012817382812, 0.261260986328125, 2.88763427734375, 0.3963966369628906, 1.97332763671875, 1.1135635375976562, 1.1971092224121094, 1.85491943359375, 0.32813262939453125, 1.8661270141601562, 1.3948440551757812, -1.0092506408691406, 0.041839599609375, 0.5038986206054688, 0.00103759765625, -1.0822296142578125, -0.8001899719238281, 0.33547210693359375, 1.1131725311279297, -0.13599395751953125, 0.6358757019042969, 0.543853759765625, 1.2209930419921875, 2.4232406616210938, -0.3821754455566406, 1.3123626708984375, 1.8266830444335938, 0.08136749267578125, -1.5757293701171875, 5.531280517578125, 0.2556800842285156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000065.npy"} +{"epoch": 0.0982615268329554, "step": 66, "batch_size": 64, "mean": 0.3692646324634552, "std": 1.2013484239578247, "min": -3.522064208984375, "p10": -1.0201889038085938, "median": 0.3373394012451172, "p90": 1.8297927856445313, "max": 3.629638671875, "pos_frac": 0.671875, "sample": [0.3567180633544922, 3.626800537109375, 0.7343883514404297, 0.6389122009277344, 0.47686767578125, 1.0673980712890625, 0.24242591857910156, 1.0269241333007812, -0.019735336303710938, 0.1992969512939453, -0.8131828308105469, 0.6984634399414062, 2.2132110595703125, -0.002887725830078125, -1.014495849609375, -0.8099441528320312, 0.16674041748046875, 0.5827217102050781, 1.8453521728515625, 1.793487548828125, 0.4062652587890625, 2.0917015075683594, -1.6724166870117188, -0.20799636840820312, 0.6857719421386719, 1.1662483215332031, 0.3859100341796875, 1.2560157775878906, 1.0185928344726562, 0.18138885498046875, 1.0844879150390625, 0.6284942626953125, 0.11347198486328125, -0.20257949829101562, 0.3584861755371094, -0.08687210083007812, 0.3179607391357422, 0.7152976989746094, -1.0287895202636719, 0.4287834167480469, -3.522064208984375, -2.438549041748047, 0.034275054931640625, -0.05210113525390625, -0.11592864990234375, -1.0226287841796875, 1.8927383422851562, -0.22671127319335938, 0.51885986328125, 1.2178668975830078, -0.4101753234863281, 0.151153564453125, 0.0064849853515625, 3.629638671875, 2.7899246215820312, -1.1178703308105469, 1.0053634643554688, -0.8862571716308594, -1.2184600830078125, 1.6649932861328125, 0.7881889343261719, -0.1527099609375, 0.2576141357421875, 0.189605712890625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000066.npy"} +{"epoch": 0.09977324263038549, "step": 67, "batch_size": 64, "mean": 0.2932513952255249, "std": 1.0444155931472778, "min": -2.0282745361328125, "p10": -1.1339355468749999, "median": 0.2881889343261719, "p90": 1.495781707763672, "max": 2.8315582275390625, "pos_frac": 0.65625, "sample": [0.3047943115234375, -1.183319091796875, -0.7928810119628906, -0.4854240417480469, -1.9106597900390625, 0.4147510528564453, 1.7833976745605469, 1.0989570617675781, -0.6009597778320312, 0.06862258911132812, 1.6728172302246094, -0.07626724243164062, -1.1534500122070312, -1.7039260864257812, 1.3764114379882812, 0.8272438049316406, -0.4662933349609375, 0.5943756103515625, -0.6209640502929688, 1.107421875, -0.7503509521484375, 1.48834228515625, -2.0282745361328125, 2.8315582275390625, 0.3761730194091797, 0.92401123046875, 0.5537643432617188, 0.6270675659179688, 1.0034408569335938, -1.0884017944335938, -0.07823944091796875, 0.2563056945800781, 0.7526779174804688, 1.4401321411132812, 0.5603866577148438, 0.18362045288085938, 1.314544677734375, 0.3964805603027344, -0.4884014129638672, 0.5598602294921875, 1.912994384765625, 0.18981170654296875, 1.102569580078125, 0.9742889404296875, 0.27158355712890625, 0.766021728515625, -0.0558624267578125, 0.05879783630371094, 0.2147979736328125, -0.865081787109375, 0.15815353393554688, 2.811553955078125, 0.2714672088623047, 0.14942169189453125, -0.19889450073242188, -0.320587158203125, 1.4989700317382812, 0.4266204833984375, -1.3849735260009766, -1.6595001220703125, 2.0819854736328125, -0.31163787841796875, 0.85076904296875, 0.7354736328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000067.npy"} +{"epoch": 0.10128495842781557, "step": 68, "batch_size": 64, "mean": 0.31144294142723083, "std": 1.3126503229141235, "min": -3.67156982421875, "p10": -0.9356220245361326, "median": 0.2667045593261719, "p90": 1.7902824401855473, "max": 3.884033203125, "pos_frac": 0.609375, "sample": [3.429107666015625, 1.3471755981445312, 0.30904388427734375, 1.509429931640625, -0.613037109375, 3.1529693603515625, 0.514556884765625, -0.6450653076171875, 0.8281574249267578, -0.4380340576171875, 1.8653793334960938, -3.67156982421875, 1.4821929931640625, 1.2034454345703125, 0.8107757568359375, 0.6026840209960938, -0.210540771484375, -0.453887939453125, 0.17162322998046875, 0.8342819213867188, 0.9052734375, -0.04300498962402344, 0.703704833984375, 1.3278350830078125, 0.3676109313964844, 0.12288665771484375, 0.7128181457519531, -0.4122886657714844, 1.6844863891601562, -0.7967987060546875, -0.1449413299560547, 0.5257625579833984, 0.15040016174316406, -0.09355926513671875, -1.2823905944824219, 0.04129791259765625, 3.884033203125, 0.2947502136230469, 0.0767059326171875, 0.24920272827148438, -2.4358673095703125, -0.8009033203125, 0.8336334228515625, 0.2842063903808594, -1.876953125, -3.0629425048828125, -0.14458465576171875, 0.02997589111328125, -0.9933586120605469, -0.5553207397460938, 1.8607330322265625, 0.9619216918945312, -0.49430084228515625, 1.3371353149414062, 1.2624359130859375, -0.14263153076171875, -0.2038421630859375, 1.8273544311523438, 1.8240814208984375, 0.4822998046875, 1.7114181518554688, -0.70367431640625, -0.1603240966796875, -1.2106170654296875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000068.npy"} +{"epoch": 0.10279667422524566, "step": 69, "batch_size": 64, "mean": 0.7181195020675659, "std": 1.3830491304397583, "min": -2.4279327392578125, "p10": -0.9049961090087889, "median": 0.6275243759155273, "p90": 2.348862457275391, "max": 6.048736572265625, "pos_frac": 0.734375, "sample": [0.040557861328125, -0.008289337158203125, 3.1591567993164062, 0.00801849365234375, 1.0680618286132812, -0.051830291748046875, -0.3689231872558594, -0.22772789001464844, 1.166748046875, 1.5276336669921875, 0.8116893768310547, -0.055179595947265625, 0.4617271423339844, 1.8772964477539062, 1.4113388061523438, 2.3922195434570312, 0.6240196228027344, 1.8089523315429688, 3.2700653076171875, -1.2511253356933594, 3.251129150390625, -2.4279327392578125, 1.125244140625, 0.201202392578125, 3.03558349609375, -0.11099624633789062, 0.6873550415039062, 1.2941665649414062, 0.803253173828125, 0.14655303955078125, 0.8199081420898438, -0.952362060546875, 0.5976200103759766, 0.8730545043945312, 0.45378875732421875, -0.7215118408203125, 1.624420166015625, 0.6534652709960938, 0.30120849609375, 0.993865966796875, 0.416839599609375, 0.6310291290283203, 0.9378662109375, 0.0881805419921875, 1.75665283203125, -1.0367355346679688, 2.4953956604003906, 0.7127227783203125, -0.5032405853271484, -1.2936630249023438, 0.5409698486328125, 1.401336669921875, -2.3144989013671875, -0.7944755554199219, 0.9816513061523438, -0.2674102783203125, 1.8429718017578125, 0.22101974487304688, 2.2476959228515625, -1.2500114440917969, 6.048736572265625, 0.4463043212890625, 2.1334381103515625, 0.2034473419189453], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000069.npy"} +{"epoch": 0.10430839002267574, "step": 70, "batch_size": 64, "mean": 0.33839601278305054, "std": 1.3904435634613037, "min": -3.502166748046875, "p10": -1.320660400390625, "median": 0.3862037658691406, "p90": 2.1760368347167978, "max": 3.867156982421875, "pos_frac": 0.578125, "sample": [-0.3096923828125, -1.1299934387207031, 1.9895172119140625, -1.5281982421875, 0.2021961212158203, -1.1034488677978516, -0.2956809997558594, 1.1071853637695312, 1.7833938598632812, -0.21854209899902344, 1.1017379760742188, 2.2559738159179688, -3.502166748046875, 1.3104400634765625, 0.2574729919433594, -0.7304477691650391, 2.4291610717773438, -2.0923538208007812, -0.550506591796875, -0.5415821075439453, 3.3009872436523438, 0.5963287353515625, 1.125692367553711, 0.5497074127197266, -1.93072509765625, 0.5539474487304688, 0.6646080017089844, 3.0629196166992188, -0.7576904296875, 3.867156982421875, -0.23267745971679688, 0.34352874755859375, -1.3470916748046875, -0.658172607421875, 0.4288787841796875, 2.4128341674804688, 1.3533859252929688, -0.0605010986328125, 0.6172409057617188, 1.3880653381347656, 0.6271247863769531, -1.7127132415771484, -0.45969390869140625, 0.5008926391601562, 1.5980911254882812, 1.3441658020019531, 0.048282623291015625, 3.183624267578125, -0.19191360473632812, 0.6215019226074219, 0.9088134765625, 1.420501708984375, 0.5539093017578125, -0.9127349853515625, 0.07293319702148438, -0.23727798461914062, -0.0337371826171875, -1.6600799560546875, 1.0470657348632812, 0.63555908203125, -1.2589874267578125, 1.2211627960205078, -0.8034000396728516, -0.568634033203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000070.npy"} +{"epoch": 0.10582010582010581, "step": 71, "batch_size": 64, "mean": 0.3131345212459564, "std": 1.7051150798797607, "min": -5.518035888671875, "p10": -1.7286460876464844, "median": 0.14293289184570312, "p90": 2.3581100463867197, "max": 3.4686355590820312, "pos_frac": 0.578125, "sample": [0.12405204772949219, 1.5204391479492188, 1.1256847381591797, 0.3753986358642578, -0.10202789306640625, -0.63995361328125, 1.3635368347167969, -1.7051315307617188, -0.27689361572265625, 3.035614013671875, 0.3321266174316406, -2.1366653442382812, -0.432861328125, 0.9636001586914062, -3.0816574096679688, 0.010431289672851562, 0.47051239013671875, 3.4067306518554688, 0.9797248840332031, 1.8089141845703125, -3.864501953125, 3.2206497192382812, 0.15472030639648438, -0.019603729248046875, 1.0130233764648438, 3.015472412109375, 0.8412094116210938, -0.41770172119140625, 0.13036727905273438, -0.2519683837890625, -5.518035888671875, -0.385498046875, -1.9795722961425781, -0.5748329162597656, 0.07237625122070312, 0.13114547729492188, 0.4591827392578125, 1.1623687744140625, 1.6030807495117188, -1.2280616760253906, -0.34976959228515625, -3.036865234375, 1.024993896484375, 2.4500885009765625, -0.4282970428466797, -0.22083663940429688, -1.7387237548828125, 1.4817886352539062, -0.0071868896484375, -0.09097862243652344, 2.0877819061279297, 3.335681915283203, -1.42510986328125, 1.6772613525390625, 1.537261962890625, 1.4857406616210938, 1.0469322204589844, -0.06162261962890625, 0.7474899291992188, 0.7430667877197266, 2.14349365234375, -0.014093399047851562, 3.4686355590820312, -0.5215187072753906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000071.npy"} +{"epoch": 0.1073318216175359, "step": 72, "batch_size": 64, "mean": 0.7806516885757446, "std": 1.4726299047470093, "min": -3.8550567626953125, "p10": -0.8027095794677733, "median": 0.7557544708251953, "p90": 2.4053787231445316, "max": 5.929059982299805, "pos_frac": 0.75, "sample": [2.4737625122070312, 1.6468734741210938, 0.6651611328125, 1.9479827880859375, -3.8550567626953125, 0.060047149658203125, 0.2728767395019531, 5.929059982299805, -1.586944580078125, -0.8419857025146484, 1.4191856384277344, 1.6670074462890625, -0.41963958740234375, 2.1684322357177734, 0.04476165771484375, -0.40266990661621094, -0.12028121948242188, 0.475128173828125, 2.3398284912109375, 0.3898468017578125, 3.1976776123046875, 1.0166282653808594, 2.214466094970703, 0.48764991760253906, 0.916259765625, 0.5845413208007812, 0.4659290313720703, -0.6067867279052734, -1.20928955078125, 0.31581878662109375, 2.8082427978515625, 1.3704700469970703, 1.027374267578125, 2.4334716796875, 0.8245925903320312, 0.4102344512939453, 0.6436710357666016, 1.2095355987548828, 0.5929450988769531, -0.19278717041015625, 1.6953887939453125, 1.1994342803955078, 1.4232559204101562, 0.2823753356933594, 1.3878250122070312, -0.39569854736328125, 1.3915557861328125, 2.2236175537109375, -1.2676544189453125, 1.3283615112304688, -1.5773162841796875, 1.051300048828125, 3.4565963745117188, -0.7110652923583984, 3.5638198852539062, -0.6608123779296875, 1.2641639709472656, 0.7599143981933594, -1.68408203125, 0.3107490539550781, -0.654815673828125, 1.0262985229492188, 0.7515945434570312, 1.01287841796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000072.npy"} +{"epoch": 0.10884353741496598, "step": 73, "batch_size": 64, "mean": 0.0687820315361023, "std": 1.9548823833465576, "min": -4.2749176025390625, "p10": -1.6948028564453126, "median": -0.2613687515258789, "p90": 1.7577236175537112, "max": 9.87872314453125, "pos_frac": 0.4375, "sample": [1.319915771484375, 0.4011573791503906, -0.335113525390625, -0.9087104797363281, -1.3888015747070312, -1.6499252319335938, -1.6923675537109375, -0.9639625549316406, -4.2749176025390625, 1.2551250457763672, -1.6958465576171875, 0.05423736572265625, 0.12921524047851562, -1.402862548828125, -1.3269920349121094, 0.6469287872314453, -1.9810714721679688, -0.41658782958984375, -0.724456787109375, 1.081451416015625, -1.0010986328125, -0.19022750854492188, -0.9639167785644531, -0.25445556640625, 9.87872314453125, 2.043365478515625, -0.6528701782226562, -1.0016441345214844, -0.2682819366455078, 1.0666046142578125, 0.3738536834716797, -0.5152320861816406, 0.8093204498291016, 0.13646697998046875, -2.2203216552734375, 0.3966503143310547, 1.7873420715332031, 2.782470703125, -1.5871105194091797, -1.7502365112304688, -0.888427734375, 3.719024658203125, -0.401214599609375, -1.9856033325195312, -0.43758392333984375, 1.6886138916015625, 1.4516372680664062, -0.8108768463134766, -0.46124267578125, -0.9796028137207031, -2.6931610107421875, 0.2640495300292969, 0.675872802734375, 0.5683937072753906, 1.3552436828613281, 1.07183837890625, 0.89581298828125, -0.13758087158203125, -0.7464523315429688, -0.14981842041015625, 0.29480934143066406, 2.8048248291015625, 5.014373779296875, -0.7066974639892578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000073.npy"} +{"epoch": 0.11035525321239607, "step": 74, "batch_size": 64, "mean": 0.744025468826294, "std": 1.608478307723999, "min": -2.8560333251953125, "p10": -1.039209175109863, "median": 0.7327785491943359, "p90": 2.452210426330567, "max": 7.238006591796875, "pos_frac": 0.6875, "sample": [-0.795135498046875, 2.600616455078125, -0.0705108642578125, 0.7282485961914062, 2.3288345336914062, -0.5592041015625, 2.6202850341796875, 7.238006591796875, 1.10028076171875, 2.366727828979492, 1.3476219177246094, -0.3178443908691406, 2.2924957275390625, 1.0723342895507812, -0.6038589477539062, 0.7366409301757812, 0.809295654296875, 0.9145660400390625, 0.019407272338867188, -2.7346649169921875, 1.0694541931152344, 1.911834716796875, 0.7061920166015625, 0.8977813720703125, 1.169189453125, -0.04450798034667969, -0.4446563720703125, 1.3828201293945312, 2.660308837890625, -1.27398681640625, 0.14844894409179688, 0.7887363433837891, 1.7133445739746094, -1.307882308959961, -0.9143104553222656, 1.3836822509765625, -0.7802829742431641, 1.0737838745117188, -2.4046401977539062, -0.4304981231689453, 0.6848793029785156, -1.0927371978759766, 2.1297645568847656, 0.7101974487304688, 1.0871658325195312, 3.7142333984375, 1.0688247680664062, -0.40789794921875, 1.3157424926757812, 1.2399444580078125, -2.8560333251953125, 0.08685684204101562, 0.7052497863769531, -0.15456008911132812, -1.2769699096679688, 0.5272865295410156, 0.6151504516601562, 4.520355224609375, 2.4888458251953125, 1.184112548828125, 1.6509075164794922, 0.6953353881835938, 0.7289161682128906, -0.1468963623046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000074.npy"} +{"epoch": 0.11186696900982615, "step": 75, "batch_size": 64, "mean": 0.8308317065238953, "std": 1.5749726295471191, "min": -3.4029617309570312, "p10": -0.8579879760742184, "median": 0.559776782989502, "p90": 3.098542785644532, "max": 5.63690185546875, "pos_frac": 0.671875, "sample": [-0.42975807189941406, -0.46547698974609375, -0.4205894470214844, 1.4184036254882812, 3.15362548828125, -0.3230152130126953, 0.332855224609375, 1.3796844482421875, 1.6426162719726562, 3.533233642578125, -1.2412796020507812, -0.15636825561523438, -1.0742950439453125, 1.120269775390625, 0.4715995788574219, 1.4315986633300781, 2.8075332641601562, -1.638418197631836, 0.5585088729858398, 0.34056663513183594, 3.663330078125, 1.2482337951660156, 1.4893569946289062, 2.1674041748046875, 1.2417373657226562, 0.3159332275390625, 1.9082794189453125, -1.41168212890625, 2.9700164794921875, 0.6610870361328125, 0.43665122985839844, 3.5255279541015625, -0.2217559814453125, 2.218618392944336, 1.2603530883789062, 0.6177711486816406, 5.63690185546875, 1.091339111328125, 0.8645095825195312, 0.34793853759765625, -0.5332870483398438, 1.343832015991211, -3.4029617309570312, 0.5610446929931641, 1.836334228515625, -0.12514877319335938, -0.3932228088378906, 0.10591506958007812, 0.2303314208984375, -0.03592491149902344, 0.7747421264648438, 1.7789459228515625, -1.382049560546875, -0.38608551025390625, 1.3312263488769531, -0.5334625244140625, 4.0297698974609375, -0.1795177459716797, -0.9970703125, -0.11400985717773438, 0.4821319580078125, 0.1834716796875, 4.1241302490234375, 2.0012474060058594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000075.npy"} +{"epoch": 0.11337868480725624, "step": 76, "batch_size": 64, "mean": 0.3572384715080261, "std": 1.8785728216171265, "min": -5.1087799072265625, "p10": -1.850385284423828, "median": 0.2363414764404297, "p90": 2.7292243957519537, "max": 4.575614929199219, "pos_frac": 0.609375, "sample": [-5.1087799072265625, 1.6648979187011719, -2.5844039916992188, 0.053009033203125, 0.619659423828125, -0.7274818420410156, -0.5518016815185547, 1.1788864135742188, 0.28195953369140625, 3.8489151000976562, 2.256101608276367, 4.575614929199219, -0.053009033203125, -4.6935577392578125, 1.3383026123046875, -1.7294692993164062, 0.68328857421875, 1.2368125915527344, 1.164581298828125, 0.24742889404296875, -0.225921630859375, 3.0104904174804688, -2.2899932861328125, 3.8238906860351562, -3.1822662353515625, 2.6060867309570312, 0.7864227294921875, 1.4469451904296875, 0.07589149475097656, -0.37198638916015625, 0.0752716064453125, -0.8472518920898438, -1.9022064208984375, -0.517303466796875, -0.40749359130859375, 0.41289520263671875, 2.4382400512695312, 1.967926025390625, -0.1752185821533203, 1.0665435791015625, -0.5590000152587891, -0.025665283203125, 0.07677841186523438, 0.22525405883789062, 2.3023452758789062, 0.5953540802001953, 0.8054046630859375, -1.213104248046875, -0.04345703125, -0.693328857421875, 0.396484375, 0.9987945556640625, -0.2590827941894531, 2.9315948486328125, 4.1638946533203125, 0.30008506774902344, -0.26786041259765625, -1.6018962860107422, -2.870025634765625, 2.7819976806640625, 1.2125740051269531, 0.0362548828125, 2.0168533325195312, 0.06109619140625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000076.npy"} +{"epoch": 0.11489040060468632, "step": 77, "batch_size": 64, "mean": 0.9306704998016357, "std": 1.813875436782837, "min": -3.54486083984375, "p10": -1.1323368072509763, "median": 0.6748628616333008, "p90": 3.2009464263916025, "max": 6.4031524658203125, "pos_frac": 0.71875, "sample": [-0.4248619079589844, 1.9322776794433594, -1.2225379943847656, -1.8119277954101562, 2.5596580505371094, 1.8398818969726562, -2.6136741638183594, 0.2855396270751953, -3.54486083984375, 0.9286270141601562, 1.2431411743164062, 1.249176025390625, -0.19109153747558594, -1.5001564025878906, 0.4398231506347656, 2.9668235778808594, 0.2943115234375, 0.737457275390625, 1.134979248046875, 4.2954864501953125, -0.43099403381347656, -0.020654678344726562, 2.8613510131835938, 3.570240020751953, 0.4778900146484375, 0.3185462951660156, 0.2510108947753906, 3.3012847900390625, 0.6024589538574219, 0.9188194274902344, -0.42249488830566406, -0.6113739013671875, -0.09769058227539062, 0.472259521484375, 1.7139549255371094, 1.993072509765625, 1.3604202270507812, 1.0135078430175781, 1.2314510345458984, 1.5498046875, 1.6742782592773438, 0.1956329345703125, 0.8756942749023438, -0.9218673706054688, 0.2454376220703125, 6.4031524658203125, 0.3596477508544922, -0.13648605346679688, 4.153373718261719, 1.6814498901367188, 5.719825744628906, 0.5322494506835938, 0.6122684478759766, 2.2121028900146484, 2.6996078491210938, 0.8685073852539062, -0.8127899169921875, -1.632965087890625, 2.6969757080078125, -0.1968212127685547, 0.89837646484375, 4.185935974121094, 0.3381614685058594, -1.739776611328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000077.npy"} +{"epoch": 0.1164021164021164, "step": 78, "batch_size": 64, "mean": 0.491301029920578, "std": 1.6939300298690796, "min": -4.484519958496094, "p10": -1.2697303771972657, "median": 0.3223285675048828, "p90": 2.825635719299317, "max": 4.7413787841796875, "pos_frac": 0.609375, "sample": [3.083629608154297, -3.1334228515625, 0.7650604248046875, 3.185821533203125, 0.207855224609375, 2.660001754760742, 0.3180885314941406, 0.5758285522460938, 0.1257495880126953, 0.8569259643554688, 1.2733001708984375, -1.050994873046875, -0.2851886749267578, 2.0626068115234375, -0.070404052734375, -1.2631454467773438, -1.9048652648925781, 3.9003524780273438, -1.7579421997070312, 0.6533126831054688, -0.6032867431640625, -0.6054000854492188, -0.8732147216796875, -1.1181182861328125, 1.6335601806640625, -0.007080078125, -0.39434051513671875, -4.484519958496094, 3.0010299682617188, -2.9825286865234375, 1.6699581146240234, 1.5556640625, -0.30435943603515625, 1.4090347290039062, 0.08936309814453125, -0.081207275390625, -1.272552490234375, 0.326568603515625, 1.6822986602783203, 2.231203079223633, 0.2883796691894531, 0.8269081115722656, 1.1619186401367188, 1.6037750244140625, -0.46881866455078125, 2.0160675048828125, -1.5616683959960938, 0.2185688018798828, 0.8591423034667969, 1.9127349853515625, -0.46962738037109375, 3.1479415893554688, -0.4092254638671875, -1.2321701049804688, 0.62567138671875, -0.5747337341308594, 0.5269508361816406, 0.7086868286132812, 1.498300552368164, 2.8966217041015625, -0.6183013916015625, 0.09962081909179688, 4.7413787841796875, 2.5705013275146484], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000078.npy"} +{"epoch": 0.11791383219954649, "step": 79, "batch_size": 64, "mean": 1.4255708456039429, "std": 2.758211612701416, "min": -3.8801803588867188, "p10": -0.9978908538818357, "median": 1.0060949325561523, "p90": 4.2137973785400415, "max": 11.66326904296875, "pos_frac": 0.703125, "sample": [0.09372711181640625, 0.22357940673828125, -1.4682273864746094, 2.217243194580078, -0.6194992065429688, 2.0273284912109375, -3.8801803588867188, -0.4492931365966797, -0.03290557861328125, 0.0051116943359375, -0.6643314361572266, 1.2060165405273438, 1.4899482727050781, 10.018577575683594, -2.876373291015625, 1.06903076171875, 3.372814178466797, 0.8128490447998047, -0.7597694396972656, -0.2624397277832031, 1.2621574401855469, 8.510452270507812, 3.4857025146484375, 4.6551513671875, 1.5985679626464844, -1.6039505004882812, 5.990299224853516, 2.11578369140625, 1.735107421875, 3.51239013671875, -1.688446044921875, -0.3551959991455078, 3.202972412109375, 1.7593154907226562, 2.529630661010742, 0.4723033905029297, 1.1497001647949219, 2.3405933380126953, -1.0638580322265625, 3.0810928344726562, -0.8439674377441406, 3.040740966796875, 0.5792236328125, -0.3445472717285156, 0.7676792144775391, 0.9985523223876953, 3.3311538696289062, 0.31627464294433594, 0.8009376525878906, 0.12677001953125, 1.0340328216552734, -1.4648361206054688, 7.859466552734375, 0.1483917236328125, 1.0136375427246094, 11.66326904296875, -0.28092193603515625, 1.3245468139648438, 0.2061767578125, 2.0506134033203125, -0.5315322875976562, 1.2270622253417969, 4.514400482177734, -0.5135726928710938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000079.npy"} +{"epoch": 0.11942554799697656, "step": 80, "batch_size": 64, "mean": 0.6862205266952515, "std": 2.7007906436920166, "min": -5.3495025634765625, "p10": -2.2907287597656247, "median": 0.3432197570800781, "p90": 4.7207275390625005, "max": 8.68402099609375, "pos_frac": 0.546875, "sample": [-0.5852985382080078, 3.8162460327148438, 0.5083236694335938, 2.2752456665039062, 1.110555648803711, 0.0421295166015625, -0.9457168579101562, -0.49332427978515625, -2.4625167846679688, -1.9178390502929688, 2.3214759826660156, 5.197265625, -1.3395843505859375, -1.6409225463867188, -0.21869277954101562, 5.212249755859375, -1.8779067993164062, -0.783477783203125, 0.8348388671875, -0.011341094970703125, 0.596435546875, -2.4505386352539062, 0.2150554656982422, 2.462799072265625, 4.5656585693359375, 3.4556541442871094, 3.7743759155273438, -0.1138763427734375, 4.7871856689453125, 5.5467529296875, -2.7152976989746094, 2.29345703125, 0.9313011169433594, 0.5191841125488281, 1.527557373046875, 2.5191268920898438, -0.6590232849121094, -0.16388702392578125, 6.08856201171875, -5.3495025634765625, 4.8518218994140625, 0.29659271240234375, -4.8788299560546875, 2.0512046813964844, -0.3037605285644531, 1.0343170166015625, 1.1881561279296875, 0.7370834350585938, -3.1862640380859375, -1.3720836639404297, 3.95751953125, -0.34413719177246094, -0.9461212158203125, -0.3135719299316406, -0.6081790924072266, -0.24626541137695312, 1.080718994140625, -1.493539810180664, 1.8540706634521484, -5.078681945800781, 0.3898468017578125, -1.031158447265625, 8.68402099609375, 0.7226638793945312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000080.npy"} +{"epoch": 0.12093726379440665, "step": 81, "batch_size": 64, "mean": 0.982353687286377, "std": 2.520073890686035, "min": -4.387336730957031, "p10": -1.786664581298828, "median": 0.8995294570922852, "p90": 3.851153564453125, "max": 10.150848388671875, "pos_frac": 0.6875, "sample": [1.2502002716064453, 2.5218372344970703, 1.1834754943847656, -1.8479156494140625, 1.2098541259765625, -4.387336730957031, 0.3694267272949219, 0.37009429931640625, -0.43106651306152344, -0.7440757751464844, 6.8722991943359375, -0.8583526611328125, 1.5407867431640625, 3.3650360107421875, 0.6651248931884766, -1.0417098999023438, 0.3584442138671875, 0.25453948974609375, 5.618919372558594, 2.3422470092773438, 1.4330768585205078, -0.8195343017578125, 1.629638671875, 2.446502685546875, 2.8436431884765625, 1.7755393981933594, 0.4953460693359375, -0.87896728515625, -0.04899787902832031, -2.0991058349609375, 1.2691268920898438, -3.7763824462890625, 1.991973876953125, 0.5175113677978516, -1.596221923828125, 4.710479736328125, 0.1646270751953125, 10.150848388671875, 2.483856201171875, 6.28961181640625, -1.6437454223632812, 1.2366371154785156, 0.34259796142578125, 0.01190185546875, 1.1339340209960938, 0.03733634948730469, 1.9384632110595703, 1.4293289184570312, 4.159523010253906, 2.306671142578125, 3.8283920288085938, -2.4632492065429688, 3.066448211669922, -0.6953201293945312, -0.4475517272949219, 1.6183700561523438, -2.291543960571289, 0.41081809997558594, 1.4145469665527344, 3.8609085083007812, -0.7278785705566406, 1.7992286682128906, -4.1528167724609375, -0.8967666625976562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000081.npy"} +{"epoch": 0.12244897959183673, "step": 82, "batch_size": 64, "mean": 1.2423981428146362, "std": 2.1404149532318115, "min": -5.80865478515625, "p10": -0.9608581542968747, "median": 1.1626367568969727, "p90": 4.130057907104494, "max": 7.956367492675781, "pos_frac": 0.734375, "sample": [0.6253852844238281, 1.5564384460449219, 0.45416259765625, -1.07000732421875, 2.6864471435546875, 0.04541015625, 1.047891616821289, 2.954265594482422, 4.416717529296875, 1.1607894897460938, 1.1644840240478516, 0.6787853240966797, 2.8889923095703125, 2.8812942504882812, 0.10229110717773438, 1.4495201110839844, 1.7019729614257812, 0.27811431884765625, 1.2619075775146484, 1.210052490234375, 0.6752090454101562, -0.7061767578125, 1.2749805450439453, -0.18474388122558594, -0.30248260498046875, 5.351661682128906, 1.7983264923095703, 0.9489288330078125, 1.9823856353759766, 1.1923065185546875, -0.14741897583007812, -0.1284809112548828, 7.956367492675781, -0.0210113525390625, 2.5887908935546875, 5.8515472412109375, 3.637664794921875, 2.3552703857421875, 1.2131576538085938, 3.854572296142578, 0.24457931518554688, -2.546550750732422, 4.2481231689453125, -1.9926948547363281, 1.6121463775634766, 0.6557846069335938, 2.898181915283203, -0.3510913848876953, 4.709938049316406, -0.3847808837890625, 0.39987945556640625, -0.1025848388671875, 2.1879730224609375, 4.664466857910156, -0.634185791015625, -1.1826667785644531, 2.5970687866210938, 1.7951774597167969, 1.670206069946289, 0.6482410430908203, -1.8560333251953125, -5.80865478515625, -1.2072391510009766, 0.5624275207519531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000082.npy"} +{"epoch": 0.12396069538926682, "step": 83, "batch_size": 64, "mean": 1.432050347328186, "std": 2.5276846885681152, "min": -3.9630775451660156, "p10": -1.2055114746093747, "median": 1.2978229522705078, "p90": 3.7959617614746124, "max": 11.026237487792969, "pos_frac": 0.75, "sample": [2.294921875, 1.7532806396484375, -1.850341796875, 0.17035484313964844, 0.2867107391357422, -0.48584747314453125, 2.3779449462890625, 2.1286048889160156, 2.61895751953125, 7.79107666015625, 0.7068939208984375, 1.9496898651123047, -2.7682838439941406, 1.801239013671875, 2.2493228912353516, 1.1121444702148438, 2.9264144897460938, 2.637420654296875, 3.087432861328125, 1.3341522216796875, 2.5948638916015625, -1.3583831787109375, 5.319793701171875, 0.40064430236816406, -1.3541336059570312, 2.62786865234375, 5.3345947265625, 1.2089996337890625, 0.4265174865722656, -0.12311363220214844, -2.602020263671875, 1.1021366119384766, 2.8314971923828125, -3.9630775451660156, -0.7513828277587891, 2.9518985748291016, 1.4353160858154297, 1.0606002807617188, 1.9606437683105469, -3.2604522705078125, 0.061077117919921875, -0.2461090087890625, -0.5718345642089844, -0.21119117736816406, -0.4746971130371094, -0.8587265014648438, 0.34879302978515625, 2.960357666015625, 5.4951629638671875, 1.7986984252929688, 11.026237487792969, 1.7076797485351562, 4.099617004394531, 1.2614936828613281, 0.09368896484375, 1.9716167449951172, 1.3636360168457031, 2.2833938598632812, 2.487152099609375, 7.853668212890625, 0.627838134765625, 0.6053237915039062, 0.7816619873046875, -0.7782249450683594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000083.npy"} +{"epoch": 0.1254724111866969, "step": 84, "batch_size": 64, "mean": 1.1620919704437256, "std": 2.756577253341675, "min": -4.51531982421875, "p10": -2.402637100219726, "median": 0.8830738067626953, "p90": 5.398051452636719, "max": 9.910400390625, "pos_frac": 0.71875, "sample": [1.48089599609375, 2.235576629638672, 0.7917461395263672, 2.070880889892578, 6.725990295410156, 2.637319564819336, 5.3293609619140625, -0.5801315307617188, -0.6138839721679688, -4.51531982421875, 0.9143218994140625, 2.4041919708251953, 4.572673797607422, -0.19945526123046875, -0.045078277587890625, 0.20555877685546875, 0.9458274841308594, -1.607147216796875, 5.7556915283203125, -1.4760704040527344, 1.4630126953125, 2.4060287475585938, 1.348196029663086, 3.1279449462890625, 0.49520301818847656, 0.152679443359375, -2.517578125, 5.427490234375, 0.04452323913574219, 1.8599166870117188, 0.190216064453125, 0.7662811279296875, -0.5258827209472656, 9.910400390625, -3.6505355834960938, 0.8518257141113281, 1.8787040710449219, 2.8656005859375, 1.5221595764160156, -3.250518798828125, -4.005615234375, 6.3055877685546875, 2.520130157470703, 0.32384490966796875, 5.85882568359375, 1.0531158447265625, 0.2856292724609375, 3.271512985229492, 1.5814857482910156, -2.7891311645507812, 0.19966888427734375, -1.3905105590820312, 1.7782154083251953, 2.6033782958984375, 1.9560890197753906, -2.134441375732422, 5.8080291748046875, 0.4919281005859375, 0.7093887329101562, -2.6598358154296875, 3.2536773681640625, 0.6600799560546875, -1.8047981262207031, -0.9009857177734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000084.npy"} +{"epoch": 0.12698412698412698, "step": 85, "batch_size": 64, "mean": 1.5479627847671509, "std": 2.972931385040283, "min": -6.127849578857422, "p10": -2.0087703704833983, "median": 1.5814380645751953, "p90": 4.658524322509767, "max": 10.446723937988281, "pos_frac": 0.734375, "sample": [2.5418701171875, -2.4244041442871094, 1.5377845764160156, 3.334573745727539, 1.494384765625, 0.6442031860351562, 2.1762542724609375, 0.9855880737304688, 0.7656784057617188, 4.197902679443359, 3.820068359375, 4.3480072021484375, 2.0855579376220703, -1.97845458984375, 1.3918838500976562, 2.2354393005371094, 1.8385200500488281, 4.1072540283203125, 3.2000770568847656, 2.87872314453125, -3.2479095458984375, 3.171955108642578, -4.375091552734375, 3.0311241149902344, 0.7452945709228516, 5.37237548828125, -0.02698516845703125, 1.1617660522460938, 6.525409698486328, -0.2389202117919922, -1.7588539123535156, 2.4652252197265625, 3.72113037109375, 0.967010498046875, 1.9940242767333984, 0.7039566040039062, 4.791603088378906, 3.101612091064453, 0.5745010375976562, -0.3904438018798828, -6.127849578857422, 1.7366485595703125, -2.0217628479003906, 2.7009620666503906, 10.446723937988281, 3.280801773071289, 7.6964569091796875, -0.47074317932128906, 0.8968048095703125, 0.7675991058349609, 1.7491111755371094, -3.2607345581054688, 0.8178863525390625, -1.6127853393554688, 1.1719207763671875, 1.625091552734375, -3.0333251953125, -1.8006134033203125, 6.7991180419921875, 1.911376953125, 8.551719665527344, -1.7092208862304688, -0.29727935791015625, 1.7820167541503906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000085.npy"} +{"epoch": 0.12849584278155707, "step": 86, "batch_size": 64, "mean": 1.1366732120513916, "std": 2.9642276763916016, "min": -6.238624572753906, "p10": -2.3160182952880857, "median": 1.1774930953979492, "p90": 5.375097465515139, "max": 9.071647644042969, "pos_frac": 0.59375, "sample": [9.071647644042969, -1.0453605651855469, -2.5155792236328125, -0.0262451171875, 1.7914180755615234, -0.4261360168457031, -0.16802978515625, 1.5611953735351562, -6.238624572753906, 2.3147029876708984, -0.5316600799560547, -2.2156448364257812, 5.55548095703125, 6.8197479248046875, -1.2388381958007812, 3.4978561401367188, 2.162508010864258, -0.7583351135253906, -5.569084167480469, 1.5619163513183594, 6.7041015625, -0.4311370849609375, -0.5498580932617188, 3.1531448364257812, 5.70745849609375, 0.04029655456542969, 2.754486083984375, 1.233255386352539, 3.3766021728515625, 0.7137126922607422, 4.0358734130859375, 5.650146484375, 2.8123626708984375, -4.327095031738281, 4.3230743408203125, 2.179168701171875, -1.6189651489257812, 1.6576862335205078, -2.3590354919433594, 0.9043807983398438, -2.7941818237304688, 3.7539138793945312, 2.296764373779297, -1.1335563659667969, 2.1055068969726562, -0.2616539001464844, 4.954202651977539, -1.797189712524414, 2.130596160888672, -0.5904865264892578, -0.29608154296875, 5.707275390625, -0.00504302978515625, -0.33695411682128906, 2.0965003967285156, 3.3606719970703125, -3.8124446868896484, 3.6751251220703125, 1.9482803344726562, -1.1593894958496094, 1.1217308044433594, 0.18091392517089844, 0.5209197998046875, 1.5190696716308594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000086.npy"} +{"epoch": 0.13000755857898716, "step": 87, "batch_size": 64, "mean": 1.4544591903686523, "std": 3.6378307342529297, "min": -11.11627197265625, "p10": -2.1413143157958983, "median": 1.194112777709961, "p90": 5.957646942138672, "max": 11.310089111328125, "pos_frac": 0.703125, "sample": [1.015289306640625, 3.811492919921875, 9.856002807617188, -0.41381072998046875, 1.3129997253417969, -0.3521881103515625, 6.692138671875, -0.88531494140625, 4.820747375488281, 3.3476428985595703, -2.7119522094726562, -1.7276649475097656, 2.660388946533203, -3.2649402618408203, -11.11627197265625, 6.956245422363281, -4.8909454345703125, 6.028831481933594, 1.5672569274902344, 1.4882888793945312, -1.753509521484375, -1.626922607421875, 2.128690719604492, -2.1769065856933594, 4.4086456298828125, -4.52978515625, 0.1633453369140625, -0.1154937744140625, 1.30859375, -1.5827884674072266, -1.1428184509277344, 0.14995956420898438, 5.045722961425781, -2.8887100219726562, 1.482452392578125, -2.0582656860351562, 0.9751739501953125, 3.376363754272461, 3.834300994873047, 1.0796318054199219, 1.7184181213378906, 4.210174560546875, 0.51031494140625, -1.8367652893066406, 2.2299575805664062, 2.4271926879882812, 0.5672950744628906, 3.3724098205566406, -1.4323158264160156, 4.2413177490234375, 5.7915496826171875, 2.2444305419921875, 8.976943969726562, 0.30706787109375, 0.33863067626953125, 3.50299072265625, 0.8237953186035156, 0.10608673095703125, 11.310089111328125, 7.544914245605469, 2.10711669921875, 0.9350395202636719, 1.03369140625, 1.7831268310546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000087.npy"} +{"epoch": 0.13151927437641722, "step": 88, "batch_size": 64, "mean": 1.0208956003189087, "std": 4.696348667144775, "min": -11.639678955078125, "p10": -3.105636978149414, "median": 0.7032327651977539, "p90": 7.641759490966797, "max": 18.811981201171875, "pos_frac": 0.625, "sample": [9.15447998046875, 0.7650852203369141, -3.1341819763183594, -3.039031982421875, 8.00030517578125, 5.3890380859375, 8.147918701171875, 0.8075141906738281, 0.26807403564453125, 0.08476829528808594, 4.553211212158203, 6.003700256347656, 1.551055908203125, 1.832601547241211, -0.6576614379882812, 2.1068572998046875, 7.591819763183594, -2.4502334594726562, -3.676788330078125, 1.7734375, 3.1709823608398438, 1.1048469543457031, 0.7853221893310547, -0.6981678009033203, -2.4276504516601562, 1.4442214965820312, -0.4610862731933594, 18.811981201171875, -1.5922126770019531, -4.232683181762695, -11.639678955078125, -4.895256042480469, 7.6631622314453125, -0.616943359375, 1.05963134765625, 0.5636940002441406, -3.0258255004882812, 2.547853469848633, -0.21818161010742188, 1.6071090698242188, 0.07225608825683594, 3.81591796875, 2.5100536346435547, 0.9128837585449219, -5.8360595703125, -0.0838623046875, 0.3184547424316406, 0.5335121154785156, -2.189178466796875, -0.7454833984375, 2.809629440307617, 1.2623062133789062, 9.628616333007812, 8.149139404296875, 4.9526824951171875, -3.0273895263671875, 3.0119400024414062, 0.2567100524902344, 0.6413803100585938, 3.1817798614501953, -1.870290756225586, -11.468246459960938, -2.842306137084961, -2.6802139282226562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000088.npy"} +{"epoch": 0.1330309901738473, "step": 89, "batch_size": 64, "mean": 1.9596128463745117, "std": 3.7056400775909424, "min": -5.992958068847656, "p10": -1.3534778594970702, "median": 1.4931488037109375, "p90": 5.928994750976564, "max": 16.035614013671875, "pos_frac": 0.71875, "sample": [0.6794986724853516, 3.5732460021972656, -0.5870647430419922, 1.476043701171875, 4.65948486328125, 4.397804260253906, -1.2033309936523438, 0.7463836669921875, 2.79644775390625, 5.289573669433594, 2.553436279296875, 6.1114044189453125, 3.1966705322265625, -1.2133331298828125, 0.7708663940429688, 0.3938732147216797, -1.6085433959960938, 1.51025390625, -0.8805007934570312, -3.127992630004883, 3.7377471923828125, -1.0172786712646484, -0.03932952880859375, 5.022468566894531, -1.4135398864746094, 16.035614013671875, 3.6742706298828125, 2.0287857055664062, -0.19342422485351562, 7.788848876953125, 0.14530181884765625, 3.5358734130859375, 1.3008995056152344, 1.2503738403320312, 5.5033721923828125, 0.20738983154296875, 3.5163040161132812, 1.1705093383789062, -0.349151611328125, 5.164764404296875, 3.1058387756347656, 3.434703826904297, 9.872550964355469, 10.478231430053711, 1.5545291900634766, 0.2559242248535156, 0.9384498596191406, 4.1089630126953125, 6.319637298583984, -5.5943450927734375, -0.571929931640625, 3.9381580352783203, -0.12438583374023438, 0.358673095703125, 0.18140029907226562, -5.14068603515625, 2.2882347106933594, -0.5524826049804688, -4.325248718261719, -5.992958068847656, 7.3664093017578125, 2.0358428955078125, 1.7914505004882812, 3.0842361450195312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000089.npy"} +{"epoch": 0.1345427059712774, "step": 90, "batch_size": 64, "mean": 1.7934300899505615, "std": 3.918658971786499, "min": -7.41455078125, "p10": -3.392925262451172, "median": 1.6387596130371094, "p90": 6.4769088745117225, "max": 11.726799011230469, "pos_frac": 0.671875, "sample": [-0.9396495819091797, 3.9695205688476562, 1.722381591796875, 3.248828887939453, 4.981863021850586, 0.03732490539550781, 4.805164337158203, 11.726799011230469, -7.41455078125, -4.287109375, 4.613380432128906, 5.314548492431641, 4.304300308227539, 1.0623397827148438, 3.7936935424804688, -1.4622611999511719, 1.6637344360351562, 8.0753173828125, -2.894359588623047, 2.065196990966797, 5.655891418457031, -4.137168884277344, 11.10400390625, 5.3792572021484375, 9.563827514648438, 0.88031005859375, 3.0739974975585938, 2.76275634765625, -0.08043861389160156, 0.5608367919921875, -0.622100830078125, -0.69512939453125, 2.6503982543945312, 10.555526733398438, 1.2864837646484375, 6.828773498535156, -4.423393249511719, -0.7928619384765625, 8.505012512207031, 2.1755924224853516, -2.730010986328125, 2.3986968994140625, 1.6416015625, -3.43603515625, 4.892852783203125, 0.09375762939453125, 1.0220069885253906, -0.15909767150878906, -0.2355022430419922, -0.4529380798339844, -4.426361083984375, 1.7191543579101562, 2.9397659301757812, 0.0934295654296875, 0.8463764190673828, 1.384206771850586, 1.6359176635742188, -3.9879379272460938, -3.2923355102539062, 3.7878875732421875, 3.0928573608398438, 4.11395263671875, -0.6307430267333984, -0.15001678466796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000090.npy"} +{"epoch": 0.1360544217687075, "step": 91, "batch_size": 64, "mean": 1.4283735752105713, "std": 4.522755146026611, "min": -8.807670593261719, "p10": -3.4285118103027337, "median": 0.709773063659668, "p90": 6.047349548339844, "max": 21.5604248046875, "pos_frac": 0.6875, "sample": [4.899871826171875, 5.998451232910156, 0.6642589569091797, -2.7715835571289062, 0.410125732421875, 9.684577941894531, 0.1569538116455078, 2.1464767456054688, 3.1567840576171875, 9.657112121582031, 21.5604248046875, -1.5389633178710938, 0.5479793548583984, -2.4831886291503906, 2.6740360260009766, -2.5264225006103516, -1.2125091552734375, -1.7323074340820312, 4.912452697753906, -4.1426544189453125, 1.9672107696533203, 2.480060577392578, 1.2059783935546875, 0.3395576477050781, 0.7552871704101562, 2.3095569610595703, 4.521810531616211, -3.8560409545898438, -6.060150146484375, 5.388771057128906, 0.7711715698242188, -2.3185081481933594, -5.8269500732421875, 1.8271713256835938, 7.631994247436523, -2.2498207092285156, -2.338165283203125, 3.7993850708007812, -1.4319496154785156, -8.807670593261719, 3.7990188598632812, 3.9724960327148438, -3.8846359252929688, -0.34470367431640625, 0.257843017578125, 0.17414474487304688, 0.8913726806640625, 6.068305969238281, 3.5226364135742188, 1.938385009765625, 0.10335540771484375, -2.295530319213867, 0.23494720458984375, 0.03248310089111328, 6.375278472900391, 3.3015403747558594, 10.03692626953125, 0.03650665283203125, 5.3220062255859375, 2.795177459716797, -3.710052490234375, -0.155517578125, 2.2323570251464844, 0.5410022735595703], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000091.npy"} +{"epoch": 0.13756613756613756, "step": 92, "batch_size": 64, "mean": 2.316648483276367, "std": 4.9371819496154785, "min": -7.679351806640625, "p10": -1.9801511764526367, "median": 1.2416038513183594, "p90": 8.400841522216798, "max": 18.217605590820312, "pos_frac": 0.640625, "sample": [3.9342117309570312, -0.6560840606689453, -3.6322402954101562, -0.1613311767578125, -1.5289230346679688, -1.9149093627929688, 0.21300125122070312, 0.4862079620361328, 3.0038299560546875, -1.7659873962402344, -1.7576904296875, 1.6435413360595703, 3.999666213989258, -2.9451675415039062, 6.8001708984375, 2.433116912841797, 3.5974349975585938, 0.26488304138183594, 7.620447158813477, 1.7403640747070312, 4.821502685546875, -0.5609245300292969, 1.491292953491211, 1.1804275512695312, 6.6558074951171875, -0.5100784301757812, -5.9927520751953125, 2.376800537109375, -7.679351806640625, 5.6593017578125, -2.0081119537353516, 3.2451629638671875, 10.78167724609375, 0.902587890625, 5.596282958984375, 1.3027801513671875, 0.22133636474609375, 6.9144744873046875, -5.965080261230469, 6.483055114746094, -1.2390823364257812, -0.3477821350097656, 8.9461669921875, 2.8560333251953125, 2.3122730255126953, -1.224578857421875, 6.4600372314453125, 9.082168579101562, 3.97650146484375, -1.2468490600585938, 0.025386810302734375, 15.155258178710938, -0.1710357666015625, -0.028591156005859375, 8.174758911132812, -6.734836578369141, 18.217605590820312, 8.497734069824219, 0.5806293487548828, 12.28680419921875, 8.045181274414062, 1.0033855438232422, -1.3771286010742188, -1.2752685546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000092.npy"} +{"epoch": 0.13907785336356765, "step": 93, "batch_size": 64, "mean": 1.369350552558899, "std": 5.281036853790283, "min": -9.707611083984375, "p10": -4.0478370666503904, "median": 0.7645683288574219, "p90": 7.416987609863281, "max": 24.696319580078125, "pos_frac": 0.5625, "sample": [0.8664073944091797, 4.399314880371094, 4.6121368408203125, 5.873626708984375, -2.024639129638672, 7.415000915527344, -5.677093505859375, -0.9333267211914062, -0.32041358947753906, 2.3438720703125, -0.6241283416748047, 2.1925582885742188, 24.696319580078125, 1.8344268798828125, -1.3367328643798828, -0.122772216796875, -0.6827011108398438, 4.239513397216797, -0.8008060455322266, -7.535865783691406, 3.7935409545898438, 13.197067260742188, -2.7015609741210938, 0.37084197998046875, 4.961677551269531, -0.3424663543701172, 7.417839050292969, 0.7780742645263672, -2.0692176818847656, 2.9469223022460938, -0.757476806640625, 0.29459571838378906, -1.854654312133789, 7.98828125, -2.9232940673828125, -1.9299240112304688, 0.9043769836425781, -0.05751800537109375, -9.707611083984375, -1.0027694702148438, 0.8826408386230469, -0.3259124755859375, 2.233430862426758, 1.754159927368164, 2.71868896484375, -5.923583984375, 0.6535396575927734, 0.7510623931884766, 8.077133178710938, 1.7257194519042969, 4.995054244995117, 1.2100543975830078, 7.147239685058594, 3.1385650634765625, -3.4816513061523438, -3.531707763671875, 7.528591156005859, -8.923240661621094, -4.269035339355469, 10.29302978515625, 5.682716369628906, -5.1644287109375, -1.2515602111816406, 3.9965057373046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000093.npy"} +{"epoch": 0.14058956916099774, "step": 94, "batch_size": 64, "mean": 2.7925729751586914, "std": 6.915157318115234, "min": -17.447669982910156, "p10": -2.139606857299804, "median": 2.2982053756713867, "p90": 9.310162544250492, "max": 29.118011474609375, "pos_frac": 0.71875, "sample": [4.707496643066406, 0.3120231628417969, 0.41345977783203125, 3.6076793670654297, 1.6898040771484375, -13.708549499511719, 0.3311939239501953, 0.6747379302978516, 1.87017822265625, 3.2460784912109375, 0.6941108703613281, 7.124961853027344, 3.558624267578125, -10.585607528686523, 2.9669132232666016, 8.305686950683594, -1.237335205078125, -0.3200569152832031, 10.4896240234375, 4.2467498779296875, 2.970285415649414, 2.6206207275390625, 4.770195007324219, 2.9568443298339844, 2.6721878051757812, 1.2841625213623047, -0.035785675048828125, 1.6447906494140625, -4.045738220214844, 6.030586242675781, 4.791921615600586, -2.2832908630371094, -0.243560791015625, -1.4217987060546875, 4.449851989746094, 5.450397491455078, 6.4289398193359375, 4.026885986328125, 5.307670593261719, -4.5440521240234375, 9.740652084350586, 10.170845031738281, -0.5504684448242188, 29.118011474609375, 6.016155242919922, 5.3678436279296875, -1.332733154296875, 17.413543701171875, 6.1423492431640625, 3.1707305908203125, 24.140914916992188, 0.9502925872802734, -1.0091400146484375, -1.18365478515625, 2.513761520385742, 0.6396617889404297, -17.447669982910156, 2.0826492309570312, 15.963943481445312, 1.499338150024414, 0.28801536560058594, -1.8043441772460938, -3.1380538940429688, -1.2468490600585938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000094.npy"} +{"epoch": 0.1421012849584278, "step": 95, "batch_size": 64, "mean": 2.2672791481018066, "std": 5.979048252105713, "min": -11.661102294921875, "p10": -4.353610229492188, "median": 1.599212646484375, "p90": 11.022861480712892, "max": 16.315826416015625, "pos_frac": 0.640625, "sample": [8.632659912109375, -4.45452880859375, 0.8910140991210938, -1.5798301696777344, -5.1870574951171875, -0.3684043884277344, 13.248870849609375, 1.3185138702392578, 4.399871826171875, -0.15006637573242188, -2.4859542846679688, 1.574554443359375, 1.623870849609375, -1.6403541564941406, -0.04155731201171875, 2.7852783203125, -11.085067749023438, -3.7527847290039062, -1.2864398956298828, 2.0127906799316406, 16.315826416015625, 10.374519348144531, 0.3778076171875, -0.9374237060546875, -8.645736694335938, 0.8389205932617188, 11.760223388671875, 12.373321533203125, -7.163734436035156, -11.661102294921875, 3.531810760498047, 11.535850524902344, 2.623394012451172, 4.216743469238281, -0.28713226318359375, 1.9204940795898438, 8.822456359863281, -3.7187576293945312, -6.693115234375, 4.914085388183594, 6.559356689453125, 0.30048370361328125, 11.148826599121094, 4.032859802246094, -4.118133544921875, 7.242683410644531, 7.90496826171875, 0.2274169921875, 2.733745574951172, 8.249977111816406, -3.0519332885742188, 6.840690612792969, -0.47141456604003906, 6.04852294921875, 8.579307556152344, 11.739768981933594, -2.833253860473633, 1.3811416625976562, 1.9084701538085938, 5.3809051513671875, 0.7367782592773438, 2.014068603515625, 10.72894287109375, -3.132150650024414], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000095.npy"} +{"epoch": 0.1436130007558579, "step": 96, "batch_size": 64, "mean": 2.618502616882324, "std": 7.711458683013916, "min": -17.569061279296875, "p10": -4.974496459960937, "median": 1.4416751861572266, "p90": 13.210534667968753, "max": 22.92535400390625, "pos_frac": 0.6875, "sample": [3.3218040466308594, -1.4164886474609375, 12.292724609375, -2.5941085815429688, 22.672576904296875, -17.569061279296875, 6.386749267578125, 1.870065689086914, 4.2674713134765625, 1.0468292236328125, 9.076080322265625, -0.5804538726806641, 2.6936874389648438, 16.348251342773438, 5.5261383056640625, 1.5081634521484375, -9.909339904785156, -0.15866851806640625, 0.5077667236328125, -0.38042449951171875, 1.4910087585449219, 1.3923416137695312, 13.827278137207031, -7.518745422363281, 1.1657638549804688, 0.6955986022949219, 7.254631042480469, -1.838653564453125, -3.9476470947265625, 0.19829559326171875, 2.140827178955078, 13.6038818359375, 22.92535400390625, 10.04498291015625, -1.311187744140625, -5.201446533203125, -13.088935852050781, 0.42876625061035156, -6.345878601074219, 1.661376953125, 4.573974609375, 3.094928741455078, 2.4283447265625, -0.3909912109375, 22.127639770507812, -4.3052825927734375, 4.651941299438477, 0.9033355712890625, 0.26671600341796875, 0.09383392333984375, 5.017322540283203, -2.506500244140625, 0.5364952087402344, 10.177268981933594, 17.116668701171875, 3.8672332763671875, -2.8742904663085938, 0.6814537048339844, 3.3956260681152344, 4.9436187744140625, 11.793670654296875, 3.3642616271972656, -4.4449462890625, -9.41552734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000096.npy"} +{"epoch": 0.14512471655328799, "step": 97, "batch_size": 64, "mean": 1.3165080547332764, "std": 9.17529296875, "min": -37.5640869140625, "p10": -6.39561653137207, "median": 0.6453218460083008, "p90": 9.535021018981936, "max": 30.56402587890625, "pos_frac": 0.609375, "sample": [-18.224838256835938, -6.558540344238281, 2.3700923919677734, 9.780550003051758, 0.6226158142089844, 22.51415252685547, -6.015460968017578, -4.620841979980469, -4.036823272705078, -37.5640869140625, -3.9501705169677734, -1.9582405090332031, 2.209381103515625, 4.160758972167969, -7.133872985839844, -15.025665283203125, 0.6680278778076172, 0.07025527954101562, 3.7520294189453125, 0.5387191772460938, 1.718801498413086, 5.7656402587890625, 3.0964412689208984, -4.657123565673828, 6.4852752685546875, -1.2273788452148438, -2.5906143188476562, 8.962120056152344, -4.301441192626953, 21.435501098632812, 1.948822021484375, -4.5783233642578125, 4.040336608886719, 0.5178871154785156, 0.5865192413330078, 7.959831237792969, 11.752157211303711, 12.00461196899414, -1.2386093139648438, 6.36614990234375, 4.825374603271484, -10.0118408203125, 30.56402587890625, 4.784526824951172, 4.572639465332031, -0.6928806304931641, 5.969038009643555, 6.8536529541015625, 14.847564697265625, -4.347801208496094, -0.07194137573242188, 4.317131042480469, -0.3866119384765625, -2.5928688049316406, -6.597625732421875, 3.7276077270507812, 5.080820083618164, -3.3007659912109375, 4.281396865844727, 3.4594955444335938, 0.052173614501953125, 0.5301055908203125, 4.000217437744141, -1.2515583038330078], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000097.npy"} +{"epoch": 0.14663643235071808, "step": 98, "batch_size": 64, "mean": 0.8181928396224976, "std": 6.087565898895264, "min": -19.57476806640625, "p10": -4.934747314453125, "median": 1.3416156768798828, "p90": 7.783008193969728, "max": 12.086288452148438, "pos_frac": 0.671875, "sample": [0.1282196044921875, 2.0940895080566406, 8.892034530639648, 1.5248489379882812, 3.2585983276367188, 4.5127105712890625, 5.021034240722656, 3.3086090087890625, -3.4459152221679688, 11.513267517089844, -16.64508056640625, -4.918479919433594, -5.560302734375, -1.7119522094726562, -7.865135192871094, 0.052978515625, -0.35761260986328125, 0.9578323364257812, 1.1269111633300781, 1.092620849609375, 6.578853607177734, 8.951126098632812, -4.4015350341796875, 10.35772705078125, 0.4588794708251953, -3.301239013671875, 7.387840270996094, 1.3992156982421875, 3.112791061401367, 3.0508193969726562, 2.903186798095703, -0.31240081787109375, 10.141433715820312, 1.754058837890625, -4.821231842041016, 3.433837890625, 1.6876106262207031, 2.2081451416015625, 1.2840156555175781, -0.895294189453125, 2.4561233520507812, 0.7498779296875, -18.856658935546875, 1.8470039367675781, -0.371246337890625, -5.872581481933594, 2.0109901428222656, 1.5903377532958984, 0.643585205078125, 7.952365875244141, -4.941719055175781, 5.2014312744140625, -1.6517257690429688, -2.962444305419922, -19.57476806640625, 4.863288879394531, 3.0178070068359375, 1.0143451690673828, 0.9674644470214844, 12.086288452148438, 7.127834320068359, 7.1468658447265625, -1.8163223266601562, -4.220920562744141], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000098.npy"} +{"epoch": 0.14814814814814814, "step": 99, "batch_size": 64, "mean": 0.940975546836853, "std": 8.211979866027832, "min": -23.415863037109375, "p10": -7.917589569091796, "median": 0.7000045776367188, "p90": 10.018151473999025, "max": 29.372833251953125, "pos_frac": 0.53125, "sample": [13.464225769042969, 10.12795639038086, 9.754341125488281, 19.96471405029297, 0.8658676147460938, -9.530231475830078, -1.1946640014648438, -1.9503517150878906, 3.1961669921875, 6.972530364990234, 4.458770751953125, -0.2634925842285156, 1.05963134765625, 5.707191467285156, -0.8160057067871094, 3.8944854736328125, 2.690258026123047, -0.030622482299804688, -2.9746131896972656, -0.017011642456054688, 10.263496398925781, 0.8746681213378906, -13.469093322753906, -5.142578125, -12.41534423828125, -6.798572540283203, 29.372833251953125, -17.772354125976562, -4.77947998046875, 12.169586181640625, 1.9554901123046875, -8.063201904296875, -1.7698478698730469, 0.34944915771484375, 0.5341415405273438, -0.5981674194335938, 1.0296096801757812, -4.08452033996582, -1.2113399505615234, 7.606052398681641, 1.1439895629882812, -23.415863037109375, 8.983558654785156, 15.934906005859375, -0.7859420776367188, 2.152751922607422, 9.761940002441406, 1.4340324401855469, -11.209747314453125, 0.9865036010742188, -2.4645614624023438, 5.599615097045898, 2.0047073364257812, -1.0696678161621094, 5.6993560791015625, -0.3496894836425781, -1.1148910522460938, 6.753448486328125, 2.334440231323242, -0.7250137329101562, 1.3122787475585938, -5.423366546630859, -3.1724929809570312, -7.577827453613281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000099.npy"} +{"epoch": 0.14965986394557823, "step": 100, "batch_size": 64, "mean": 2.406625747680664, "std": 11.435500144958496, "min": -29.728271484375, "p10": -7.926403045654296, "median": 1.3047218322753906, "p90": 13.304907226562515, "max": 47.84613037109375, "pos_frac": 0.65625, "sample": [5.144012451171875, -3.5902175903320312, -8.313064575195312, 3.9085235595703125, 19.123321533203125, -1.8495712280273438, 0.8311443328857422, 5.319620132446289, 8.28106689453125, 8.153106689453125, 2.1103057861328125, -0.6887969970703125, 6.87847900390625, 25.75806427001953, 0.9176788330078125, 2.2825698852539062, -1.38848876953125, 3.793170928955078, 1.0367546081542969, 0.8281402587890625, 1.1604576110839844, 0.97637939453125, -9.346855163574219, 0.2945137023925781, -8.908699035644531, 6.988548278808594, -3.730855941772461, 14.920318603515625, 1.5850296020507812, -29.728271484375, -0.9393424987792969, 0.4774627685546875, -4.162445068359375, 4.244121551513672, 2.7664833068847656, -0.6571426391601562, 9.535614013671875, 1.2471160888671875, -18.896766662597656, 7.374114990234375, 2.1616058349609375, 32.787384033203125, -9.014060974121094, 1.3623275756835938, -3.524158477783203, -5.417873382568359, 3.2146759033203125, 3.004434585571289, -2.5647506713867188, -0.6854820251464844, 8.092239379882812, 19.496353149414062, -3.646392822265625, 5.064750671386719, 3.8745574951171875, 1.0937843322753906, -7.024192810058594, 21.593490600585938, 5.752967834472656, 2.9537506103515625, 47.84613037109375, -1.638275146484375, -29.172286987304688, 4.6774749755859375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000100.npy"} +{"epoch": 0.15117157974300832, "step": 101, "batch_size": 64, "mean": 3.0907914638519287, "std": 8.39657211303711, "min": -26.875152587890625, "p10": -4.758570289611816, "median": 2.8843536376953125, "p90": 11.379962158203126, "max": 31.911697387695312, "pos_frac": 0.75, "sample": [-3.4780311584472656, -1.7338829040527344, 7.6699371337890625, 1.9122390747070312, 9.821693420410156, 9.913703918457031, 6.345676422119141, 1.0449943542480469, 2.230804443359375, -1.5801124572753906, 3.081207275390625, 6.9412841796875, -26.875152587890625, 14.853309631347656, 5.238380432128906, 1.8828010559082031, 6.406730651855469, 3.7913150787353516, -1.8543014526367188, -4.476789474487305, 6.801582336425781, 2.8621826171875, 11.167022705078125, -5.112903594970703, 6.128105163574219, -5.5574951171875, 5.937892913818359, 5.135490417480469, 1.1989250183105469, 9.960041046142578, -0.14775848388671875, 17.51080322265625, 8.36182975769043, 0.7751636505126953, -0.3200531005859375, -2.8958396911621094, 0.5232467651367188, -12.189872741699219, -3.733663558959961, 10.234306335449219, -11.333343505859375, 4.5605621337890625, 1.6313362121582031, 2.460235595703125, 11.471221923828125, 4.246742248535156, 0.1705474853515625, -20.13629150390625, 0.5440101623535156, 1.8243999481201172, 10.807380676269531, 0.6726360321044922, 12.198564529418945, 2.906524658203125, 7.451820373535156, -4.87933349609375, 31.911697387695312, 3.4577789306640625, 0.04624176025390625, 4.626258850097656, 11.496047973632812, 3.33966064453125, 18.209918975830078, 2.3512229919433594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000101.npy"} +{"epoch": 0.15268329554043839, "step": 102, "batch_size": 64, "mean": 0.8239915370941162, "std": 10.787586212158203, "min": -24.621139526367188, "p10": -9.605522155761719, "median": -0.9951858520507812, "p90": 12.132511520385744, "max": 41.19329833984375, "pos_frac": 0.4375, "sample": [14.71917724609375, -7.892555236816406, -4.649049758911133, 0.612762451171875, -12.933113098144531, -2.957683563232422, -8.413902282714844, 3.11761474609375, -13.628273010253906, -1.4310111999511719, 2.8776702880859375, 6.58172607421875, 1.7396240234375, 2.9904556274414062, 12.658721923828125, -7.982490539550781, -1.9135761260986328, 33.3560791015625, -4.623405456542969, 21.099807739257812, -3.932039260864258, 1.9054431915283203, -8.739974975585938, -1.0269317626953125, 3.003049850463867, 3.1146411895751953, -0.6608009338378906, 11.584957122802734, -18.34526824951172, 0.38835906982421875, 12.287925720214844, -1.1176338195800781, -13.663177490234375, -2.821063995361328, -9.976470947265625, -3.9129562377929688, 6.239048004150391, 11.708580017089844, 23.466659545898438, -5.623128890991211, -0.3175678253173828, -7.1675872802734375, -24.621139526367188, -1.284027099609375, -2.2093505859375, -0.96343994140625, -0.6610107421875, 11.769878387451172, -1.7072772979736328, -4.270317077636719, 1.385793685913086, 8.338790893554688, 6.021659851074219, -7.274532318115234, -1.4243278503417969, -7.047489166259766, -1.4426002502441406, 41.19329833984375, -5.480915069580078, -11.026412963867188, 4.781028747558594, 8.516136169433594, 9.15561294555664, 1.2634601593017578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000102.npy"} +{"epoch": 0.15419501133786848, "step": 103, "batch_size": 64, "mean": 2.3296782970428467, "std": 6.953776836395264, "min": -15.155441284179688, "p10": -5.319907760620117, "median": 2.027252197265625, "p90": 10.859908294677735, "max": 18.804763793945312, "pos_frac": 0.640625, "sample": [-5.344486236572266, -4.16998291015625, 16.914962768554688, -11.58026123046875, -2.461801528930664, 6.968212127685547, -5.406166076660156, -1.6072502136230469, 0.32061767578125, 13.270492553710938, -4.966651916503906, 2.6369476318359375, -3.384033203125, 3.4048843383789062, -3.6905593872070312, 8.716331481933594, 2.1773147583007812, -1.7207412719726562, -1.1849365234375, 8.770637512207031, 4.43450927734375, 10.520771026611328, 0.3893280029296875, 3.4220924377441406, 3.8034744262695312, -1.607940673828125, -1.4337730407714844, 18.804763793945312, -3.9782943725585938, 4.771064758300781, 9.437965393066406, 8.439971923828125, 13.8446044921875, -4.0050506591796875, 1.6913909912109375, 9.618621826171875, -15.155441284179688, 8.673004150390625, 2.0180740356445312, 1.1308422088623047, 7.0367584228515625, 0.8597297668457031, 0.4877204895019531, 3.3589859008789062, 5.168672561645508, 5.99542236328125, 11.005252838134766, -6.904834747314453, 10.410005569458008, 1.6242828369140625, 11.0460205078125, 2.8796768188476562, -8.376487731933594, -0.4387664794921875, -13.573928833007812, 0.306488037109375, -5.2625579833984375, -1.3865318298339844, -2.2861080169677734, 2.0364303588867188, 5.452568054199219, 5.8131561279296875, 14.913955688476562, 6.449989318847656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000103.npy"} +{"epoch": 0.15570672713529857, "step": 104, "batch_size": 64, "mean": 4.690879821777344, "std": 14.216902732849121, "min": -24.852005004882812, "p10": -8.96139144897461, "median": 1.9800214767456055, "p90": 21.80292739868165, "max": 65.59954833984375, "pos_frac": 0.640625, "sample": [25.986068725585938, 9.340988159179688, 8.379913330078125, -5.357793807983398, -8.745391845703125, 6.7671661376953125, -5.836099624633789, -0.6748886108398438, 8.356979370117188, -0.427032470703125, 1.7748870849609375, -24.852005004882812, 65.59954833984375, 19.118972778320312, -12.876800537109375, -3.565399169921875, 35.38818359375, 4.046649932861328, -0.004779815673828125, 17.95873260498047, -13.401840209960938, -20.17919921875, 12.39666748046875, 16.307266235351562, 3.2497406005859375, -1.3712615966796875, 1.0133438110351562, -17.46446990966797, 8.250406265258789, -0.7037601470947266, -0.05727195739746094, 1.8241386413574219, -0.9049606323242188, 5.381284713745117, 15.639148712158203, 5.766580581665039, 1.4457855224609375, -0.9431304931640625, 1.17803955078125, 5.392494201660156, -0.9382247924804688, -6.599512100219727, 38.459259033203125, 2.135904312133789, 5.882118225097656, 22.95319366455078, 5.046054840087891, 2.748750686645508, -17.819786071777344, 2.7867202758789062, 1.47564697265625, 5.317461013793945, 11.62063217163086, 5.8605804443359375, 1.6484870910644531, -9.053962707519531, 0.055805206298828125, -1.506988525390625, 1.2575912475585938, 7.1645050048828125, -6.59600830078125, 10.117431640625, 25.018447875976562, 29.985305786132812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000104.npy"} +{"epoch": 0.15721844293272866, "step": 105, "batch_size": 64, "mean": 4.049013137817383, "std": 12.987411499023438, "min": -21.641525268554688, "p10": -8.610423278808593, "median": 2.5097122192382812, "p90": 19.470751953125, "max": 52.18218994140625, "pos_frac": 0.65625, "sample": [2.6685447692871094, 12.40380859375, 8.231826782226562, 5.897499084472656, 1.1204719543457031, 8.551036834716797, -19.768600463867188, 19.373382568359375, -3.13568115234375, 3.6600475311279297, 2.350879669189453, 4.830005645751953, -6.782482147216797, 16.529571533203125, 0.4687004089355469, 12.834060668945312, -2.6203689575195312, -11.785964965820312, -0.6663551330566406, 3.5160064697265625, 7.92352294921875, -3.840057373046875, 2.8102874755859375, -8.855010986328125, 2.796478271484375, 11.227813720703125, -0.00557708740234375, -20.3284912109375, 4.785957336425781, -13.851852416992188, -3.8786773681640625, 0.9593143463134766, -2.9531173706054688, 6.237159729003906, -21.641525268554688, 52.18218994140625, 1.3946037292480469, -0.45098876953125, 5.8668670654296875, -0.7972583770751953, 0.7132530212402344, -1.0970382690429688, 1.702728271484375, 1.5141544342041016, -19.089706420898438, 7.282176971435547, 33.77909851074219, -3.5556106567382812, 20.617721557617188, -5.377838134765625, -8.039718627929688, 4.024066925048828, 3.0626049041748047, 28.498367309570312, 41.33494567871094, -1.67449951171875, 7.904375076293945, 13.472686767578125, 1.1060371398925781, 4.321744918823242, 0.9064178466796875, 20.82256317138672, 10.137802124023438, 19.512481689453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000105.npy"} +{"epoch": 0.15873015873015872, "step": 106, "batch_size": 64, "mean": 2.2443604469299316, "std": 12.061930656433105, "min": -36.55528259277344, "p10": -11.63476181030273, "median": 1.7273826599121094, "p90": 13.088004684448244, "max": 34.44097900390625, "pos_frac": 0.65625, "sample": [-22.29094696044922, 11.478443145751953, 6.148662567138672, 10.102043151855469, -16.047378540039062, -0.5907707214355469, 12.901439666748047, 2.472604751586914, -0.8525466918945312, 0.09520721435546875, 12.333610534667969, 10.992046356201172, 20.616958618164062, 9.080673217773438, 0.64288330078125, -7.007926940917969, 1.7259368896484375, 13.375663757324219, 13.167961120605469, 1.5074920654296875, -0.5998077392578125, 0.4154624938964844, -22.530624389648438, 7.737495422363281, -2.0904436111450195, 1.53997802734375, -0.8067646026611328, -7.2530059814453125, 2.005462646484375, 32.70269775390625, -1.3760490417480469, -2.3889102935791016, -4.164642333984375, -8.951255798339844, 9.313789367675781, 6.350494384765625, -1.9807090759277344, -1.9074478149414062, 7.2182769775390625, 34.44097900390625, 8.310653686523438, 3.7795944213867188, 4.513254165649414, 1.4093494415283203, -12.784835815429688, 2.3640670776367188, 3.646587371826172, 0.660888671875, -36.55528259277344, 1.1338386535644531, -28.622467041015625, 1.7288284301757812, 12.161735534667969, 4.480920791625977, 12.245521545410156, 19.60242462158203, 2.0075416564941406, 2.1785240173339844, -1.769266128540039, 26.674423217773438, -13.179901123046875, 1.1804218292236328, -2.4913864135742188, 3.436614990234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000106.npy"} +{"epoch": 0.1602418745275888, "step": 107, "batch_size": 64, "mean": 1.126960039138794, "std": 15.579540252685547, "min": -50.857452392578125, "p10": -13.935502624511717, "median": 3.3383283615112305, "p90": 14.203653717041018, "max": 52.57255554199219, "pos_frac": 0.65625, "sample": [2.6025238037109375, -9.136619567871094, 6.2669525146484375, -8.45339584350586, 14.346405029296875, 5.988395690917969, -50.857452392578125, 12.630958557128906, 3.8812026977539062, 3.0839385986328125, -40.33222961425781, -2.5545616149902344, 2.6687049865722656, 6.652076721191406, 8.49098014831543, -6.331268310546875, -11.333938598632812, 11.417457580566406, 12.320121765136719, 4.7427215576171875, 0.3585968017578125, 20.708160400390625, 13.870567321777344, 1.0986900329589844, 8.947315216064453, -19.90484619140625, 5.256221771240234, -4.299900054931641, -3.2644729614257812, 3.513702392578125, 3.532817840576172, -17.17613983154297, 3.0217208862304688, 13.593254089355469, 0.08715057373046875, 12.535186767578125, 7.2046051025390625, 4.89935302734375, 7.5471954345703125, 15.342239379882812, 31.830841064453125, 17.15345001220703, -12.172279357910156, -2.152008056640625, 3.351278305053711, -2.9292831420898438, -1.1645011901855469, 9.343368530273438, 5.542110443115234, -35.31634521484375, 16.325103759765625, 3.8173675537109375, -6.384763717651367, -8.211706161499023, 0.6256446838378906, 2.6185989379882812, 11.077362060546875, 52.57255554199219, 8.250740051269531, -5.6450347900390625, -10.424163818359375, -37.58149719238281, 3.32537841796875, -14.691169738769531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000107.npy"} +{"epoch": 0.1617535903250189, "step": 108, "batch_size": 64, "mean": 3.4140186309814453, "std": 13.23709487915039, "min": -32.61216735839844, "p10": -9.640488815307616, "median": 1.6174163818359375, "p90": 18.280426025390632, "max": 47.894287109375, "pos_frac": 0.609375, "sample": [2.4267807006835938, 10.51751708984375, 23.824081420898438, -10.916374206542969, 0.3325061798095703, 9.838569641113281, 10.964462280273438, 3.45220947265625, 2.148956298828125, -10.770309448242188, 8.966056823730469, 4.705085754394531, 7.994781494140625, -2.0346202850341797, 19.09686279296875, 3.726318359375, -7.891410827636719, -0.15925216674804688, 1.7277565002441406, -1.9531707763671875, 14.4765625, -1.1863594055175781, -1.7508964538574219, 16.60369873046875, 0.066131591796875, -19.215171813964844, 5.137767791748047, -32.61216735839844, 18.9990234375, 0.21282196044921875, -27.953948974609375, -2.613454818725586, 13.125946044921875, -1.5426063537597656, 22.736839294433594, 4.004079818725586, 0.6853313446044922, 0.0062408447265625, -9.46249008178711, 11.490158081054688, -8.877830505371094, 16.59374237060547, 7.205883026123047, -1.6390056610107422, 1.5070762634277344, 8.34097671508789, -2.744274139404297, 0.8226261138916016, 10.73907470703125, -9.716773986816406, -16.536033630371094, 12.237388610839844, -8.847381591796875, -0.6793441772460938, 25.800186157226562, -3.1632537841796875, 5.154045104980469, 47.894287109375, 2.6445236206054688, -5.566162109375, -0.6114692687988281, 10.295825958251953, -2.168560028076172, 42.607330322265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000108.npy"} +{"epoch": 0.16326530612244897, "step": 109, "batch_size": 64, "mean": 6.316323280334473, "std": 14.699426651000977, "min": -16.8548583984375, "p10": -6.894155120849608, "median": 2.165411949157715, "p90": 22.478090667724615, "max": 62.7845458984375, "pos_frac": 0.640625, "sample": [-3.4021453857421875, 37.785057067871094, 41.21624755859375, 1.5767822265625, 6.054452896118164, 7.371736526489258, 11.88888931274414, -5.06463623046875, 2.5673770904541016, 8.35036849975586, -3.914754867553711, 4.45989990234375, -1.3974456787109375, 10.378555297851562, 2.177276611328125, -16.8548583984375, 0.41156768798828125, 1.12005615234375, -4.476116180419922, 0.3694896697998047, 0.4632453918457031, 6.007761001586914, -10.66357421875, -2.2421350479125977, -8.582229614257812, 62.7845458984375, 3.994781494140625, -3.4167308807373047, 12.265279769897461, 51.20782470703125, 8.080785751342773, 1.0228500366210938, 4.016574859619141, -0.0087432861328125, -0.5843639373779297, 4.219043731689453, -3.340372085571289, 24.568023681640625, -3.641876220703125, 9.540824890136719, -1.9125938415527344, 9.359231948852539, 2.8369369506835938, 21.40362548828125, -0.09129524230957031, -5.848731994628906, -0.7934951782226562, 2.1535472869873047, -15.683509826660156, -8.65283203125, 1.5641307830810547, 9.98876953125, 16.03807830810547, 16.9390869140625, 22.938575744628906, 40.19828796386719, 20.124202728271484, 1.6602630615234375, 13.329330444335938, 7.7610931396484375, -10.24127197265625, -0.06749153137207031, -7.342193603515625, 12.273628234863281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000109.npy"} +{"epoch": 0.16477702191987906, "step": 110, "batch_size": 64, "mean": -1.2916697263717651, "std": 16.592599868774414, "min": -70.35711669921875, "p10": -13.388101196289062, "median": 1.4902973175048828, "p90": 13.65412940979004, "max": 38.9805908203125, "pos_frac": 0.5625, "sample": [4.304744720458984, -1.1480484008789062, -13.4599609375, 2.2904891967773438, -3.463747024536133, 17.68231201171875, 6.7244415283203125, -10.867500305175781, -40.064727783203125, -5.2820281982421875, 9.545669555664062, 6.484947204589844, 3.3104934692382812, 14.775558471679688, 2.1047821044921875, -10.916648864746094, 4.742244720458984, 25.82366943359375, -1.8923702239990234, -2.0950279235839844, 7.64862060546875, -10.625267028808594, 3.166135787963867, 13.590728759765625, 0.3449287414550781, 5.699249267578125, 0.8608875274658203, 13.68130111694336, 0.8323440551757812, -9.343292236328125, -13.220428466796875, 1.7371253967285156, 3.79217529296875, -3.2635154724121094, -15.310447692871094, 5.9842529296875, -11.956146240234375, 7.865997314453125, -4.958580017089844, 2.396718978881836, 27.761436462402344, 11.697257995605469, -5.2190704345703125, -9.273704528808594, -2.7403182983398438, 1.77740478515625, 10.805633544921875, -21.924270629882812, -39.994140625, -48.48683166503906, -7.896514892578125, -2.2745590209960938, -4.225852966308594, 3.018035888671875, -5.3474578857421875, 38.9805908203125, 21.46233367919922, 4.251012802124023, 4.299028396606445, 7.4407501220703125, -9.439020156860352, 1.24346923828125, 4.252960205078125, -70.35711669921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000110.npy"} +{"epoch": 0.16628873771730915, "step": 111, "batch_size": 64, "mean": 4.560818672180176, "std": 16.700504302978516, "min": -24.57843017578125, "p10": -11.106382179260253, "median": 0.5162830352783203, "p90": 22.407507705688477, "max": 90.17572021484375, "pos_frac": 0.53125, "sample": [-3.2056217193603516, -2.2273120880126953, 0.36081695556640625, 24.692794799804688, 2.245464324951172, 22.67870330810547, 0.11965560913085938, 6.470306396484375, 0.6717491149902344, -3.2751922607421875, 11.987655639648438, -11.241844177246094, -6.990806579589844, 4.216392517089844, -21.53826904296875, -3.8409347534179688, -1.7108535766601562, -2.9745407104492188, -4.18536376953125, -1.2981433868408203, 34.03413391113281, 17.581390380859375, 32.06664276123047, 22.103466033935547, 40.48432922363281, 9.063507080078125, 11.176910400390625, 11.881248474121094, 1.7360820770263672, -24.57843017578125, -0.6977901458740234, -4.37724494934082, 3.043304443359375, -14.372917175292969, -5.874595642089844, -10.281959533691406, -3.969806671142578, 6.039939880371094, -6.847175598144531, 8.152400970458984, 21.437156677246094, 20.25592803955078, 22.537811279296875, -1.6399669647216797, -12.007509231567383, -2.528280258178711, 5.314615249633789, 16.514892578125, -2.7115707397460938, -1.3057327270507812, 9.337028503417969, 1.7751502990722656, 4.079771041870117, -10.790304183959961, -9.905651092529297, 1.8431320190429688, -12.93853759765625, 18.23912811279297, -11.901790618896484, -0.14632034301757812, 90.17572021484375, -2.862323760986328, 6.251300811767578, 5.5506591796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000111.npy"} +{"epoch": 0.16780045351473924, "step": 112, "batch_size": 64, "mean": 6.1081366539001465, "std": 15.067253112792969, "min": -40.439605712890625, "p10": -8.556674194335933, "median": 3.3144187927246094, "p90": 23.351484680175787, "max": 58.449920654296875, "pos_frac": 0.703125, "sample": [41.689483642578125, 15.265579223632812, 12.999618530273438, 5.428409576416016, 0.483245849609375, 1.1709442138671875, 38.4696044921875, 4.34466552734375, 10.550056457519531, -0.3535003662109375, 3.3205108642578125, 23.8690185546875, -14.718555450439453, 5.658134460449219, 3.763355255126953, -14.602508544921875, 0.5005035400390625, 13.006492614746094, -4.58966064453125, -4.826774597167969, 6.855861663818359, -0.15032958984375, 0.20109176635742188, 10.443260192871094, 11.950225830078125, 2.7143630981445312, 16.95482635498047, 0.10135650634765625, 58.449920654296875, 8.003900527954102, 20.0863037109375, 0.6322860717773438, -5.037008285522461, -0.5665798187255859, 13.826587677001953, 2.238677978515625, 4.4745330810546875, -22.130706787109375, 12.708732604980469, 8.988136291503906, -5.176319122314453, 1.29437255859375, 13.29937744140625, 3.2884521484375, 21.662796020507812, 33.70857238769531, 11.442367553710938, 9.724048614501953, -0.9399318695068359, 1.9058189392089844, -12.481178283691406, -0.28414154052734375, -3.1013259887695312, 3.3083267211914062, 22.143905639648438, 0.0834808349609375, 6.6826171875, -4.277746200561523, -10.801742553710938, 25.59881591796875, -10.00539779663086, -40.439605712890625, -1.391326904296875, 33.50244903564453], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000112.npy"} +{"epoch": 0.1693121693121693, "step": 113, "batch_size": 64, "mean": 2.278538227081299, "std": 15.60328483581543, "min": -46.757568359375, "p10": -13.595832443237303, "median": 2.2105255126953125, "p90": 19.36755638122559, "max": 64.51272583007812, "pos_frac": 0.625, "sample": [-9.281112670898438, -14.411117553710938, -2.8741302490234375, 2.9977073669433594, 1.4105949401855469, 7.751491546630859, 6.0089874267578125, -26.141098022460938, -5.311004638671875, 2.337827682495117, 4.177181243896484, -6.621467590332031, -11.970623016357422, -1.09442138671875, -6.674674987792969, -11.314537048339844, 2.359912872314453, 14.261409759521484, 9.937171936035156, -4.779689788818359, -15.176399230957031, 4.8698577880859375, 19.69211196899414, -12.328948974609375, 3.350860595703125, 6.5975799560546875, 6.944740295410156, 5.747367858886719, 22.312240600585938, -1.9751033782958984, 4.003866195678711, 64.51272583007812, 2.4632034301757812, 1.4079208374023438, 9.668914794921875, 10.347257614135742, -46.757568359375, 4.087026596069336, 26.45703125, -7.852363586425781, 2.083223342895508, 4.872472763061523, -31.2152099609375, 20.823265075683594, -5.775661468505859, 37.357086181640625, 1.57940673828125, -0.8516960144042969, 0.3935432434082031, 4.310400009155273, 8.740127563476562, -16.385086059570312, 2.046466827392578, 35.008209228515625, 14.014312744140625, 0.9177036285400391, 5.5662689208984375, -0.21187400817871094, 18.610260009765625, 0.7365875244140625, -4.9150238037109375, -2.3340606689453125, -14.138782501220703, 5.453788757324219], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000113.npy"} +{"epoch": 0.1708238851095994, "step": 114, "batch_size": 64, "mean": 3.7484827041625977, "std": 15.16589069366455, "min": -24.727752685546875, "p10": -12.747393798828124, "median": 1.6210174560546875, "p90": 24.36168708801271, "max": 49.327423095703125, "pos_frac": 0.578125, "sample": [-24.21392822265625, -20.352745056152344, -0.9356269836425781, 19.98019027709961, 3.545787811279297, 5.2492828369140625, 0.5415306091308594, 33.4140625, 3.1217041015625, -3.921875, -11.949100494384766, -13.089519500732422, 6.394840240478516, -15.302291870117188, -4.008735656738281, 10.167121887207031, -0.14804458618164062, -2.0346145629882812, 0.9687137603759766, -8.903553009033203, 1.7330970764160156, -9.006134033203125, 37.94670104980469, -10.600963592529297, 1.0829143524169922, 17.602264404296875, 7.876285552978516, 11.141386032104492, 14.093208312988281, 10.41324234008789, -7.510404586791992, 1.2675552368164062, 13.916725158691406, -0.309051513671875, -2.1155242919921875, 2.0003890991210938, 15.035087585449219, -3.7417144775390625, 5.158550262451172, -11.787689208984375, 43.5821533203125, 9.845880508422852, 49.327423095703125, 3.675588607788086, 14.903831481933594, 1.7284870147705078, 30.29986572265625, -13.574745178222656, -3.4768295288085938, -13.413471221923828, 3.6092300415039062, -24.727752685546875, 4.22816276550293, 26.239471435546875, 18.035648345947266, 35.69146728515625, 4.519414901733398, -2.5841121673583984, -9.941398620605469, -6.418920516967773, 4.336021423339844, -6.5832672119140625, -3.631927490234375, 1.5135478973388672], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000114.npy"} +{"epoch": 0.17233560090702948, "step": 115, "batch_size": 64, "mean": 4.282554626464844, "std": 16.113279342651367, "min": -31.598854064941406, "p10": -14.472342300415038, "median": 2.411726951599121, "p90": 21.16875915527344, "max": 51.954132080078125, "pos_frac": 0.59375, "sample": [-6.917457580566406, -16.465206146240234, 15.239356994628906, 1.05419921875, 51.898040771484375, -2.7906055450439453, 8.198192596435547, 9.511722564697266, -2.4104537963867188, 51.954132080078125, 2.1440486907958984, -1.9273529052734375, 8.955490112304688, -14.582893371582031, -12.310585021972656, -28.977920532226562, -0.6353168487548828, 43.38507080078125, 6.96295166015625, 18.521347045898438, 5.9822845458984375, 16.378021240234375, 10.208572387695312, 8.139602661132812, -5.739667892456055, 1.7094898223876953, 21.596145629882812, 3.172954559326172, -31.598854064941406, 17.51987075805664, -19.0284423828125, -3.6940155029296875, 0.7915802001953125, 31.334762573242188, -1.35076904296875, 12.604530334472656, -2.8062591552734375, 3.8398208618164062, -20.764556884765625, 5.1972503662109375, 2.6794052124023438, -14.21438980102539, 15.728408813476562, 14.5859375, 33.75691223144531, 5.024791717529297, -9.112922668457031, -3.1514720916748047, -9.132404327392578, 27.944580078125, 0.3263568878173828, -0.8591823577880859, 5.922174453735352, -0.49251556396484375, 0.0403900146484375, -2.022979736328125, -20.16901397705078, 4.001096725463867, 8.267929077148438, -0.5284423828125, 8.232400894165039, -1.2637062072753906, 4.0495452880859375, 20.171524047851562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000115.npy"} +{"epoch": 0.17384731670445955, "step": 116, "batch_size": 64, "mean": 8.010391235351562, "std": 15.915340423583984, "min": -23.1494140625, "p10": -10.85517349243164, "median": 4.278899192810059, "p90": 27.87156524658203, "max": 46.300628662109375, "pos_frac": 0.640625, "sample": [23.025238037109375, -18.64061737060547, -1.3563041687011719, 14.1451416015625, 19.641586303710938, 18.941383361816406, 24.36236572265625, 1.4783172607421875, 13.022069931030273, 23.475128173828125, 25.680160522460938, -8.274288177490234, -19.060577392578125, 45.974456787109375, -0.8729515075683594, -1.7818641662597656, -0.3963890075683594, -0.42485809326171875, 3.5418167114257812, 27.876220703125, -8.065376281738281, -3.3257522583007812, 13.14664077758789, 15.927616119384766, -2.5085525512695312, 21.07036590576172, 24.659805297851562, 13.414508819580078, 20.10992431640625, 0.14563369750976562, 5.624950408935547, -6.349052429199219, -8.609333038330078, 0.8441486358642578, 27.860702514648438, 1.2806167602539062, 3.6983871459960938, 8.580368041992188, -13.523429870605469, -11.817676544189453, 33.6011962890625, 10.872520446777344, 1.7625617980957031, -4.3548431396484375, 44.101470947265625, 7.4893341064453125, 46.300628662109375, -11.846412658691406, -1.3293838500976562, 4.646797180175781, -1.8822917938232422, 13.270355224609375, 2.8503570556640625, -23.1494140625, -2.0814552307128906, 32.60137939453125, -15.152706146240234, 5.827362060546875, 5.907939910888672, 39.68147277832031, 19.473831176757812, 3.911001205444336, 7.890556335449219, -0.2477264404296875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000116.npy"} +{"epoch": 0.17535903250188964, "step": 117, "batch_size": 64, "mean": 3.042750835418701, "std": 17.451448440551758, "min": -62.35734558105469, "p10": -15.937534332275389, "median": 3.7808361053466797, "p90": 19.456221008300787, "max": 54.53700256347656, "pos_frac": 0.640625, "sample": [-16.78070068359375, 3.929706573486328, 11.028823852539062, 8.286636352539062, 49.49446105957031, 2.152891159057617, 13.939233779907227, 3.0737380981445312, -12.77813720703125, 10.538591384887695, 0.018894195556640625, -2.1814651489257812, 28.70208740234375, -21.787811279296875, -6.723731994628906, 0.20973968505859375, -17.230331420898438, 6.706367492675781, 6.5080108642578125, 4.863460540771484, 17.871864318847656, 8.667156219482422, 12.525863647460938, -4.9647979736328125, -5.165063858032227, -21.27684783935547, 15.031036376953125, -13.970146179199219, 13.815156936645508, 16.69042205810547, -8.7496337890625, 20.135231018066406, 17.31885528564453, 4.39518928527832, 35.00799560546875, -8.815704345703125, 8.41819953918457, 25.42772674560547, 0.48987579345703125, 9.092781066894531, 5.844303131103516, -0.15351104736328125, 9.319450378417969, 6.0679168701171875, -2.707225799560547, -26.927955627441406, 1.0142993927001953, 3.6319656372070312, 3.430908203125, 14.469261169433594, 5.328975677490234, -9.757278442382812, 23.53827667236328, -4.152645111083984, 54.53700256347656, -27.33226776123047, -12.716747283935547, 14.036273956298828, -62.35734558105469, -12.799964904785156, 6.924018859863281, -3.969818115234375, 1.6768302917480469, -6.124286651611328], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000117.npy"} +{"epoch": 0.17687074829931973, "step": 118, "batch_size": 64, "mean": 5.920293807983398, "std": 13.770094871520996, "min": -27.956459045410156, "p10": -7.922771453857422, "median": 3.4442033767700195, "p90": 25.398291778564463, "max": 41.32698059082031, "pos_frac": 0.640625, "sample": [-2.3792800903320312, 1.9193649291992188, 14.78717041015625, -8.647903442382812, -23.77619743347168, -27.956459045410156, 13.648239135742188, 9.961185455322266, 8.803995132446289, -2.770153045654297, 4.750518798828125, 5.942413330078125, -0.4456520080566406, 12.23480224609375, 0.1342449188232422, -0.7387542724609375, 2.2112884521484375, 19.148590087890625, -2.691051483154297, -1.6340579986572266, -10.591667175292969, 30.667724609375, 3.1550769805908203, 11.822052001953125, 11.159866333007812, -15.099773406982422, 0.7894439697265625, -8.106918334960938, 6.028806686401367, -0.07269668579101562, 13.899528503417969, 2.6898880004882812, 22.78313446044922, -7.267416000366211, 3.6628265380859375, 26.519073486328125, 9.936737060546875, 22.109710693359375, 32.43927001953125, -5.432899475097656, -1.141366958618164, 20.930496215820312, 5.299491882324219, 4.690643310546875, -8.055339813232422, 31.76580810546875, -5.4488525390625, 41.32698059082031, 3.9316482543945312, 28.757766723632812, 21.87890625, -7.613445281982422, 22.093948364257812, 35.50639724731445, 5.031932830810547, 2.341552734375, -5.855804443359375, 3.2255802154541016, 6.498590469360352, 17.077829360961914, -4.844841003417969, -6.223487854003906, 1.2226200103759766, -7.092323303222656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000118.npy"} +{"epoch": 0.17838246409674982, "step": 119, "batch_size": 64, "mean": 3.8084638118743896, "std": 22.424837112426758, "min": -52.365966796875, "p10": -16.664029121398926, "median": 0.8365869522094727, "p90": 28.479321289062504, "max": 104.32998657226562, "pos_frac": 0.546875, "sample": [-16.468290328979492, -2.5695114135742188, -11.644376754760742, -9.736743927001953, -9.935546875, -8.883964538574219, 4.380563735961914, 12.387557983398438, -1.2780342102050781, 3.621776580810547, -13.057973861694336, -8.180679321289062, -1.609283447265625, 46.013946533203125, 15.281787872314453, -52.365966796875, -6.1613616943359375, -7.018463134765625, 4.128377914428711, 30.495208740234375, 0.3300304412841797, 7.031438827514648, -19.80291748046875, 0.5181732177734375, 20.250404357910156, 14.648681640625, -12.647090911865234, -3.5087814331054688, 28.867660522460938, 44.69132995605469, 14.332412719726562, 0.8429679870605469, 3.9476051330566406, 9.788942337036133, 19.130905151367188, 104.32998657226562, 8.071889877319336, -16.74791717529297, -0.6898612976074219, 20.91656494140625, -1.5783767700195312, 27.573196411132812, -6.36451530456543, -0.3884735107421875, -21.570724487304688, -16.30946922302246, 47.46711730957031, 8.079681396484375, -2.8198318481445312, 24.611984252929688, -28.071578979492188, -46.761810302734375, -18.548477172851562, 0.8302059173583984, 1.7551822662353516, 1.0954818725585938, 9.275262832641602, 35.69929504394531, 4.299896240234375, 9.506149291992188, 6.091972351074219, 9.348457336425781, -4.293224334716797, -6.887176513671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000119.npy"} +{"epoch": 0.17989417989417988, "step": 120, "batch_size": 64, "mean": 7.770325183868408, "std": 16.213685989379883, "min": -21.34979248046875, "p10": -10.310378456115721, "median": 6.44258975982666, "p90": 27.89682617187501, "max": 64.27813720703125, "pos_frac": 0.6875, "sample": [31.676849365234375, -5.743562698364258, -8.581676483154297, 8.773351669311523, 14.639450073242188, -0.8246498107910156, 17.703868865966797, -0.29878997802734375, 11.390106201171875, -9.7208251953125, 64.27813720703125, 5.231830596923828, 14.270122528076172, 35.922035217285156, 8.173606872558594, 44.36918640136719, 6.8094329833984375, -5.166587829589844, 10.161712646484375, 14.436233520507812, 13.758293151855469, -19.45093536376953, 13.380546569824219, 5.2197723388671875, 0.191680908203125, 1.8966064453125, 25.252685546875, -14.558494567871094, 23.668304443359375, 10.252876281738281, 29.030029296875, -14.03900146484375, 36.098533630371094, -3.14666748046875, 23.50334930419922, -1.8690605163574219, 0.03855133056640625, 6.830631256103516, 5.969841003417969, -21.34979248046875, 7.622304916381836, 5.991401672363281, 7.353912353515625, 56.96771240234375, -0.05367851257324219, -12.0362548828125, -9.127370834350586, 8.703353881835938, 14.24652099609375, 8.969062805175781, -1.7756671905517578, 7.596635818481445, 2.9562759399414062, 16.431716918945312, 7.447870254516602, -0.234832763671875, 3.1802215576171875, -9.748855590820312, -10.551031112670898, -10.949043273925781, 12.541122436523438, 6.075746536254883, 4.68220329284668, 2.83392333984375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000120.npy"} +{"epoch": 0.18140589569160998, "step": 121, "batch_size": 64, "mean": 10.940115928649902, "std": 25.018835067749023, "min": -25.631332397460938, "p10": -9.145733451843261, "median": 4.660331726074219, "p90": 46.34732284545903, "max": 103.05526733398438, "pos_frac": 0.625, "sample": [-25.631332397460938, 85.77420043945312, 4.7698822021484375, -4.79475212097168, 7.803068161010742, 103.05526733398438, 29.90102767944336, -6.44331169128418, -8.122756958007812, 34.2945556640625, -0.2029876708984375, -6.965919494628906, 31.447891235351562, 12.353439331054688, -11.276168823242188, 0.6727581024169922, 3.8499088287353516, 16.491416931152344, 18.10546875, -17.863128662109375, 51.512794494628906, -6.390171051025391, 27.091583251953125, 18.851181030273438, 32.54314422607422, -4.25262451171875, 8.520633697509766, -4.783885955810547, 9.276397705078125, -5.719085693359375, -1.8878936767578125, 71.5152587890625, 0.017139434814453125, 4.55078125, 10.532394409179688, 11.18914794921875, -12.609420776367188, -17.894378662109375, 4.934776306152344, -5.905979156494141, -8.775325775146484, -2.3403377532958984, 17.357025146484375, 11.894027709960938, -6.742851257324219, 12.623161315917969, -10.59884262084961, 63.225013732910156, 4.778564453125, 3.4194393157958984, 14.730428695678711, 1.6274681091308594, -5.173377990722656, 3.492015838623047, -7.632898330688477, 67.796142578125, -1.8550243377685547, 13.027873992919922, 0.517822265625, 7.9944305419921875, 7.619266510009766, 6.275062561035156, 57.9024658203125, -9.304479598999023], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000121.npy"} +{"epoch": 0.18291761148904007, "step": 122, "batch_size": 64, "mean": 7.086280822753906, "std": 29.22617530822754, "min": -54.7432861328125, "p10": -16.996042251586914, "median": 2.087080955505371, "p90": 37.480245971679686, "max": 127.0810546875, "pos_frac": 0.59375, "sample": [66.17617797851562, -19.450225830078125, 0.9853668212890625, 9.321556091308594, -6.44769287109375, -8.538291931152344, 88.98507690429688, 46.436546325683594, -10.480108261108398, 22.131458282470703, 6.458614349365234, -10.651397705078125, -4.490530014038086, 17.391387939453125, 3.679414749145508, 127.0810546875, -14.737411499023438, 2.2882537841796875, -16.647663116455078, 7.514331817626953, 0.4199504852294922, 13.469488143920898, 15.889461517333984, 7.222259521484375, 14.23147201538086, 74.04324340820312, -16.173513412475586, -13.908626556396484, 37.05340576171875, 9.91119384765625, -54.7432861328125, 5.540840148925781, -10.44205093383789, -19.713912963867188, -17.145347595214844, 4.717559814453125, 2.140697479248047, 7.9900360107421875, 2.0334644317626953, -3.62994384765625, -2.108428955078125, -3.4959335327148438, -18.075260162353516, 37.663177490234375, -30.961868286132812, 10.545370101928711, 2.171743392944336, 9.305858612060547, -5.0105743408203125, -1.7984390258789062, 2.3520660400390625, -7.380638122558594, 0.27550697326660156, 1.3526153564453125, -21.275848388671875, 5.871160507202148, 4.220134735107422, -4.149898529052734, -5.121826171875, 14.907432556152344, -13.467033386230469, 1.9660205841064453, 16.0213623046875, 93.80296325683594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000122.npy"} +{"epoch": 0.18442932728647016, "step": 123, "batch_size": 64, "mean": 6.915421962738037, "std": 22.248323440551758, "min": -53.667877197265625, "p10": -12.989586067199706, "median": 2.6916379928588867, "p90": 26.07507781982422, "max": 123.87335205078125, "pos_frac": 0.65625, "sample": [10.441154479980469, 36.882568359375, 26.26593780517578, 123.87335205078125, -13.19523811340332, -6.266618728637695, 5.803508758544922, 8.019927978515625, 19.618331909179688, 1.3943805694580078, 12.743339538574219, 44.88609313964844, -0.4890098571777344, -2.2263031005859375, 22.554039001464844, 18.7222900390625, 6.550909042358398, 6.977731704711914, 14.540153503417969, 47.083648681640625, 4.460517883300781, 8.264083862304688, -8.287284851074219, 14.366744995117188, 16.337913513183594, 33.71778869628906, 1.9212360382080078, -3.2913646697998047, 17.850814819335938, 5.3411102294921875, 2.4695682525634766, 25.629737854003906, 10.607124328613281, 17.08180046081543, 2.913707733154297, -17.75745391845703, -2.19305419921875, -12.50973129272461, -0.26367759704589844, 2.427082061767578, 19.520095825195312, -20.018428802490234, 1.6122570037841797, -19.60309600830078, 1.4044666290283203, 22.2349853515625, 1.9420700073242188, -8.381120681762695, 0.40521812438964844, -1.2635631561279297, -17.19049072265625, -7.7893524169921875, 16.046592712402344, -8.236503601074219, -12.413421630859375, 33.104278564453125, 0.20101356506347656, 6.683013916015625, -2.2831039428710938, -11.603042602539062, 0.9161739349365234, 15.413066864013672, -53.667877197265625, -17.713111877441406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000123.npy"} +{"epoch": 0.18594104308390022, "step": 124, "batch_size": 64, "mean": 9.172840118408203, "std": 30.87040138244629, "min": -134.52444458007812, "p10": -14.32810955047607, "median": 5.901910781860352, "p90": 48.30064239501954, "max": 66.79777526855469, "pos_frac": 0.71875, "sample": [-3.7738037109375, 54.78271484375, 19.2537841796875, 5.862262725830078, 8.185392379760742, 7.739814758300781, 47.53269958496094, 7.091743469238281, 57.84001922607422, 29.971405029296875, 42.16826629638672, 26.681873321533203, 0.8284378051757812, 57.56977844238281, 3.043773651123047, 14.772701263427734, 3.343719482421875, 4.7042694091796875, 1.5763397216796875, -3.518373489379883, -134.52444458007812, 5.840362548828125, 0.23411941528320312, 26.321136474609375, 5.348644256591797, 14.300453186035156, 2.010660171508789, -22.225692749023438, -79.62884521484375, -21.13518524169922, 6.308738708496094, 17.541412353515625, -43.47157287597656, 39.02210998535156, 16.227649688720703, 48.6297607421875, 52.57221984863281, -16.183761596679688, -0.23285675048828125, 8.350387573242188, -1.3572463989257812, 5.941558837890625, 6.249378204345703, -7.2664642333984375, 4.364475250244141, 20.271087646484375, 4.98272705078125, 40.412818908691406, -9.998254776000977, -3.939516067504883, 52.349029541015625, 3.835285186767578, 2.515026092529297, -0.5254631042480469, 27.116729736328125, 18.862258911132812, 14.894065856933594, -3.3330459594726562, -26.85080337524414, 44.6839599609375, -1.70635986328125, 26.351608276367188, -8.550949096679688, 66.79777526855469], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000124.npy"} +{"epoch": 0.1874527588813303, "step": 125, "batch_size": 64, "mean": 10.86001205444336, "std": 36.79265594482422, "min": -106.75173950195312, "p10": -15.542245292663573, "median": 7.247762680053711, "p90": 48.58537521362305, "max": 145.26632690429688, "pos_frac": 0.703125, "sample": [-106.75173950195312, 15.861114501953125, 35.483551025390625, 20.703536987304688, -7.305145263671875, 0.18235015869140625, 10.097923278808594, 2.220132827758789, 88.551025390625, 6.4660186767578125, 145.26632690429688, -35.14788055419922, 20.53216552734375, 0.9036941528320312, 7.496549606323242, 21.68145751953125, 47.81788635253906, 22.502609252929688, 20.66588592529297, -8.953788757324219, 8.641489028930664, 9.906719207763672, -9.636756896972656, 5.972846984863281, -11.666740417480469, -2.4515037536621094, 11.159271240234375, -15.937227249145508, -10.429100036621094, -4.2921905517578125, 51.26837158203125, 12.911674499511719, 16.315391540527344, 116.26953125, 10.647483825683594, -11.511589050292969, 4.297698974609375, 0.4867210388183594, 48.91429901123047, 104.64668273925781, -22.087356567382812, 24.254676818847656, 57.567474365234375, 9.650760650634766, 8.615863800048828, -34.396484375, 9.699455261230469, 6.193817138671875, 7.217782974243164, 5.276037216186523, 36.38496398925781, 3.9912967681884766, 16.51732635498047, 12.410675048828125, 5.507453918457031, 26.482223510742188, 5.040477752685547, -0.27204132080078125, -8.512298583984375, -2.251096725463867, 7.277742385864258, -14.620620727539062, -41.03803253173828, -67.65611267089844], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000125.npy"} +{"epoch": 0.1889644746787604, "step": 126, "batch_size": 64, "mean": 0.26197350025177, "std": 19.253292083740234, "min": -62.26776885986328, "p10": -23.558727264404293, "median": 2.0386247634887695, "p90": 22.432675170898452, "max": 51.801116943359375, "pos_frac": 0.546875, "sample": [6.401010513305664, 3.2315311431884766, 3.6881771087646484, 7.242086410522461, 1.7091732025146484, 2.3680763244628906, -2.339324951171875, -1.1642074584960938, 18.44933319091797, -4.932188034057617, 8.05488395690918, 51.801116943359375, -30.209762573242188, -24.704864501953125, 3.161712646484375, 7.99493408203125, -14.30963134765625, -6.645374298095703, 4.485298156738281, -14.666030883789062, 39.35789489746094, 17.41516876220703, -18.259702682495117, 5.359890937805176, -10.322669982910156, 2.434478759765625, 16.797225952148438, -27.147369384765625, -62.26776885986328, 6.125661849975586, 4.473457336425781, -14.471450805664062, -4.046140670776367, 3.511037826538086, -12.312524795532227, -0.10323333740234375, 3.578012466430664, -6.1087799072265625, 12.887411117553711, -29.075515747070312, 38.05384826660156, -28.999290466308594, 11.142217636108398, 9.124202728271484, 13.135177612304688, 26.150169372558594, 24.09405517578125, 34.31146240234375, -14.121932983398438, -2.0314178466796875, -13.07345199584961, 12.30615234375, 9.787269592285156, 1.2352104187011719, -18.511688232421875, -14.3682861328125, 0.1422271728515625, -1.9364490509033203, -20.88440704345703, -29.948387145996094, 38.00080871582031, -10.913887023925781, -11.924459457397461, 18.556121826171875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000126.npy"} +{"epoch": 0.19047619047619047, "step": 127, "batch_size": 64, "mean": 5.973593711853027, "std": 22.61130142211914, "min": -38.71842956542969, "p10": -23.7593521118164, "median": 5.000694274902344, "p90": 31.28158130645752, "max": 79.11250305175781, "pos_frac": 0.65625, "sample": [-9.709625244140625, 79.11250305175781, 31.846435546875, -7.385135650634766, -37.119895935058594, 58.194610595703125, -8.004417419433594, 20.503646850585938, 5.168266296386719, 15.96905517578125, 19.785175323486328, -2.68804931640625, 8.446426391601562, 5.274330139160156, -27.20941162109375, -3.1247329711914062, 13.846839904785156, 11.11334228515625, 10.732841491699219, -4.669239044189453, 17.09613037109375, 9.8514404296875, 4.833122253417969, 6.060722351074219, 7.129360198974609, -18.380104064941406, 12.203987121582031, 1.9850578308105469, 66.6055908203125, 0.14897537231445312, 2.2036399841308594, 21.314971923828125, 0.5178642272949219, 39.58082580566406, -0.3723106384277344, 30.79393196105957, 11.602519989013672, 12.184700012207031, 31.49057388305664, 27.43689727783203, -28.813568115234375, 21.467750549316406, 19.387832641601562, -5.021537780761719, -38.71842956542969, 20.30402374267578, -33.860748291015625, -10.05487060546875, -7.696479797363281, -17.986160278320312, -25.53575897216797, 0.535858154296875, -3.6875572204589844, 9.311065673828125, 1.7835578918457031, -19.614402770996094, -35.20777130126953, 2.32763671875, 45.21620178222656, 2.196138381958008, 3.4464263916015625, 9.245832443237305, 12.111465454101562, -3.197345733642578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000127.npy"} +{"epoch": 0.19198790627362056, "step": 128, "batch_size": 64, "mean": 15.453136444091797, "std": 39.123355865478516, "min": -64.29150390625, "p10": -19.30442371368408, "median": 4.794497489929199, "p90": 55.08079986572269, "max": 147.09060668945312, "pos_frac": 0.640625, "sample": [35.898101806640625, 23.700332641601562, 3.1170692443847656, -2.1435203552246094, -4.930141448974609, 1.1615447998046875, 32.84587097167969, 128.82992553710938, 2.640827178955078, 99.75686645507812, 15.9407958984375, -64.29150390625, 36.488121032714844, -1.2352294921875, 2.2958240509033203, -30.89820098876953, -0.8144893646240234, 28.73700714111328, -35.301719665527344, 58.72230529785156, 126.5382080078125, 0.1732635498046875, 3.921123504638672, 147.09060668945312, 108.63986206054688, 46.583953857421875, -0.20372772216796875, 1.0977859497070312, -21.549962997436523, 6.711206436157227, -17.897247314453125, -10.900726318359375, 15.193603515625, 43.29296875, 26.226909637451172, 34.07868957519531, -6.657596588134766, 11.629112243652344, -29.572315216064453, 31.938430786132812, -7.8662872314453125, 7.151416778564453, 10.094039916992188, 4.684501647949219, -19.907499313354492, 26.341049194335938, -0.5345993041992188, 5.757545471191406, -14.596817016601562, -2.9534969329833984, 3.8154296875, 11.321975708007812, -6.25799560546875, -51.727691650390625, 4.90449333190918, -9.51858901977539, 7.145179748535156, 29.686569213867188, 66.18861389160156, -6.522272109985352, -0.6132583618164062, 26.241943359375, 13.141860961914062, 46.170623779296875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000128.npy"} +{"epoch": 0.19349962207105065, "step": 129, "batch_size": 64, "mean": 2.4847474098205566, "std": 35.81731414794922, "min": -91.88339233398438, "p10": -30.866317749023434, "median": 3.6772069931030273, "p90": 35.04278717041016, "max": 143.45458984375, "pos_frac": 0.53125, "sample": [-57.03346252441406, -6.281126022338867, 9.97132682800293, -20.27362823486328, 3.450551986694336, 22.740337371826172, 56.64305114746094, -0.5200729370117188, 30.66211700439453, 17.94686508178711, 20.6456298828125, 1.6034355163574219, 35.34519958496094, -21.574562072753906, -0.08670806884765625, 13.389938354492188, 31.332565307617188, -5.030523300170898, -3.0633544921875, 6.852752685546875, 48.23518371582031, -90.62033081054688, -7.257110595703125, 29.524658203125, -0.5712871551513672, 143.45458984375, 11.845701217651367, -31.7900390625, 5.954578399658203, -21.184188842773438, 3.9038619995117188, -91.88339233398438, -28.710968017578125, 10.817268371582031, 5.570659637451172, 40.293373107910156, -25.727012634277344, -19.932159423828125, -25.91498374938965, 16.974987030029297, 4.412361145019531, -11.373672485351562, -16.824188232421875, -3.112558364868164, 9.809337615966797, 5.36866569519043, -37.190711975097656, 31.618839263916016, 11.948974609375, 4.287483215332031, 5.451206207275391, 34.337158203125, 60.07044219970703, -1.8934288024902344, -6.284093856811523, -16.32303237915039, -11.737285614013672, -56.761932373046875, 79.66708374023438, -1.3719482421875, 8.280488967895508, -68.79421997070312, -2.6558609008789062, 28.391006469726562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000129.npy"} +{"epoch": 0.19501133786848074, "step": 130, "batch_size": 64, "mean": 12.089003562927246, "std": 30.164640426635742, "min": -50.734466552734375, "p10": -21.042720413208006, "median": 5.957810401916504, "p90": 57.59384918212893, "max": 86.57850646972656, "pos_frac": 0.625, "sample": [-18.680187225341797, 16.67245101928711, 25.672012329101562, 64.745361328125, 1.1097488403320312, -50.734466552734375, -0.46044921875, 21.691162109375, 23.844884872436523, 1.5325069427490234, -6.293975830078125, 31.185569763183594, 37.042930603027344, 2.947324752807617, -28.212181091308594, 46.31157684326172, -1.5082836151123047, -45.05426788330078, -20.648887634277344, 82.94820404052734, 5.596179962158203, -12.486289978027344, -32.07910919189453, 27.133529663085938, -21.211505889892578, -12.30179214477539, -29.852767944335938, 12.011688232421875, 0.680206298828125, -1.7980194091796875, 59.947601318359375, 26.621170043945312, 25.201759338378906, 13.108718872070312, 13.446037292480469, 27.190082550048828, 49.15473937988281, 17.810142517089844, 86.57850646972656, 19.956714630126953, -12.684993743896484, -24.08034896850586, 17.82976531982422, -7.191368103027344, -9.536537170410156, 6.319440841674805, 28.096237182617188, 4.912330627441406, 8.272262573242188, -14.791885375976562, -12.679922103881836, -0.946136474609375, 72.59786987304688, 2.634593963623047, -11.680892944335938, 30.69253921508789, 81.32505798339844, -3.106098175048828, 30.71422576904297, 65.2901611328125, 12.726829528808594, 0.08483695983886719, -2.0221214294433594, 52.10176086425781], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000130.npy"} +{"epoch": 0.1965230536659108, "step": 131, "batch_size": 64, "mean": 18.0293025970459, "std": 39.593727111816406, "min": -65.45376586914062, "p10": -15.61365394592285, "median": 4.9795074462890625, "p90": 74.41515426635746, "max": 179.00563049316406, "pos_frac": 0.65625, "sample": [14.632423400878906, 23.7420597076416, -12.290237426757812, -12.236038208007812, -0.7442150115966797, 49.50953674316406, 13.293813705444336, 2.0819950103759766, -4.383955001831055, 78.50347137451172, 3.0841064453125, 3.6113128662109375, 0.4547615051269531, -7.789005279541016, 10.761577606201172, 7.14076042175293, 50.30046081542969, 37.56519317626953, 16.056228637695312, -65.45376586914062, -26.03803253173828, 28.648757934570312, 179.00563049316406, -3.284942626953125, -29.871826171875, -17.022003173828125, -1.1459693908691406, 8.258766174316406, -18.19643783569336, 15.961677551269531, -0.11074447631835938, 64.87574768066406, 37.7987060546875, -2.8440418243408203, 51.64007568359375, 130.63795471191406, -0.5984001159667969, 108.93013000488281, 9.714046478271484, 80.96246337890625, -3.084707260131836, 47.62975311279297, -2.0281524658203125, 0.9433364868164062, -14.908348083496094, -7.438438415527344, -3.8380985260009766, -25.86700439453125, 5.188209533691406, 92.067626953125, 3.0637969970703125, 3.9020156860351562, 18.42110824584961, -15.91592788696289, 3.7099456787109375, 34.18238067626953, 4.978366851806641, 4.980648040771484, 84.91822814941406, 29.343894958496094, 23.258529663085938, 0.9393463134765625, 26.15031623840332, 18.116443634033203], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000131.npy"} +{"epoch": 0.1980347694633409, "step": 132, "batch_size": 64, "mean": 10.544490814208984, "std": 34.0684700012207, "min": -68.64266967773438, "p10": -25.603727340698242, "median": 7.376962661743164, "p90": 44.55811691284183, "max": 137.85256958007812, "pos_frac": 0.671875, "sample": [-1.0183067321777344, 5.926357269287109, 15.483177185058594, 9.357357025146484, -8.523548126220703, 1.874979019165039, 22.81726837158203, -1.0046367645263672, -38.67955017089844, -37.00596618652344, -1.0562286376953125, 6.158262252807617, 9.118789672851562, 5.736503601074219, 27.28482437133789, -7.529594421386719, -24.96084213256836, 137.85256958007812, 18.326126098632812, 5.4197540283203125, -21.58959197998047, -68.64266967773438, 27.562822341918945, -30.324996948242188, -11.283443450927734, 2.0047454833984375, 24.781639099121094, 23.024036407470703, 7.166383743286133, 4.512866973876953, 86.1397476196289, 1.8736457824707031, -16.868886947631836, 13.292938232421875, -3.9889144897460938, 77.59217834472656, 48.534767150878906, 7.587541580200195, 19.819580078125, 14.662979125976562, 6.519927978515625, -32.18998718261719, 58.94891357421875, 9.613426208496094, 9.569490432739258, -5.935333251953125, 31.82256317138672, 14.995552062988281, 2.711061477661133, -1.3328380584716797, 21.41608428955078, 16.277694702148438, -25.879249572753906, 18.435197830200195, 10.092496871948242, 12.504989624023438, 81.819091796875, -13.570615768432617, 28.4522705078125, 28.20079803466797, -13.709548950195312, 35.279266357421875, -66.30329895019531, 95.6748046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000132.npy"} +{"epoch": 0.19954648526077098, "step": 133, "batch_size": 64, "mean": 2.652921199798584, "std": 34.15180587768555, "min": -95.25880432128906, "p10": -37.109083557128905, "median": 0.8013134002685547, "p90": 28.5053457260132, "max": 120.19557189941406, "pos_frac": 0.53125, "sample": [-4.478973388671875, -35.04338073730469, -0.64630126953125, 16.090736389160156, -1.0021610260009766, -41.783660888671875, 15.915241241455078, 55.26421356201172, 7.621242523193359, 19.115036010742188, -25.0513916015625, 120.19557189941406, 10.20134162902832, 12.087596893310547, -30.83462905883789, 3.8591766357421875, -34.812713623046875, -95.25880432128906, 15.194538116455078, -10.936294555664062, 15.004463195800781, 15.541763305664062, -17.15945053100586, 10.097526550292969, 3.4698619842529297, -12.391647338867188, 79.43580627441406, 24.579193115234375, -0.30428314208984375, -0.2734241485595703, -17.765655517578125, -9.306367874145508, -55.21452331542969, 12.435234069824219, -37.994384765625, -19.35149383544922, 0.24326705932617188, -62.54716491699219, 19.520654678344727, 92.59159851074219, -63.20781707763672, 21.692413330078125, 20.88470458984375, -6.254457473754883, 12.556842803955078, 45.909088134765625, 13.995956420898438, 21.870018005371094, -8.493133544921875, 51.189430236816406, 22.92412567138672, -5.6608734130859375, -4.130840301513672, 24.1859130859375, 18.90619659423828, -3.7511863708496094, 3.3459625244140625, 30.1879825592041, 1.3593597412109375, -5.912614822387695, -15.105766296386719, -40.65985870361328, -2.5204315185546875, 0.1685943603515625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000133.npy"} +{"epoch": 0.20105820105820105, "step": 134, "batch_size": 64, "mean": 15.986906051635742, "std": 45.1189079284668, "min": -88.22990417480469, "p10": -25.941170501708978, "median": 8.713602066040039, "p90": 76.44755477905281, "max": 156.74220275878906, "pos_frac": 0.6875, "sample": [-53.443328857421875, 26.32037353515625, -18.714927673339844, 16.15863037109375, 13.88827133178711, 4.9439544677734375, 11.172111511230469, 10.795845031738281, 20.48906707763672, 141.31982421875, 58.89752960205078, 11.801490783691406, -12.927719116210938, -11.145755767822266, -4.405519485473633, 1.9436988830566406, 41.7341423034668, 34.61103057861328, -31.59096908569336, 28.23839569091797, 8.71585464477539, 20.869522094726562, 4.20478630065918, -30.629459381103516, 33.976173400878906, 21.30099868774414, -29.038131713867188, -7.0771484375, 5.743846893310547, 2.3634262084960938, -0.04016876220703125, -1.5011539459228516, 134.41172790527344, 8.711349487304688, 0.1348419189453125, -9.449090957641602, 17.2459716796875, 1.4774913787841797, -65.94169616699219, 4.062929153442383, 104.55267333984375, 13.474285125732422, -88.22990417480469, 51.14103698730469, -1.7634010314941406, 14.200653076171875, 83.968994140625, -79.84841918945312, 36.044677734375, 5.579986572265625, 22.332721710205078, 0.7445068359375, -9.853874206542969, 30.12091064453125, 156.74220275878906, 19.29004669189453, 14.175056457519531, -1.4414291381835938, -7.894733428955078, 53.841800689697266, 99.84434509277344, 99.86460876464844, 2.7591896057128906, -6.112157821655273], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000134.npy"} +{"epoch": 0.20256991685563114, "step": 135, "batch_size": 64, "mean": 8.431817054748535, "std": 28.82426643371582, "min": -115.56353759765625, "p10": -17.45561828613281, "median": 5.181539535522461, "p90": 43.8841148376465, "max": 93.78330993652344, "pos_frac": 0.671875, "sample": [-16.122108459472656, -5.862560272216797, -29.426498413085938, -4.802606582641602, 9.457239151000977, 3.1288394927978516, -35.18987274169922, -10.714317321777344, 2.3416824340820312, 40.64734649658203, 56.781158447265625, -34.12987518310547, -6.787239074707031, 4.905525207519531, 52.7423095703125, -23.28484344482422, 32.89208984375, 2.1420745849609375, 12.419061660766602, 4.008323669433594, 4.256898880004883, 45.27130126953125, -0.09353256225585938, 30.07048797607422, 14.506908416748047, 0.5255775451660156, 19.34808349609375, 21.888633728027344, 28.174514770507812, 12.54422378540039, 24.346572875976562, -18.027122497558594, 65.2288589477539, 10.088756561279297, 14.672927856445312, -2.1534595489501953, 29.7672119140625, -5.654045104980469, 14.049583435058594, 6.045623779296875, 1.3921623229980469, 93.78330993652344, 3.087970733642578, 30.196670532226562, 3.338397979736328, -11.527931213378906, 3.6744461059570312, -8.603652954101562, 8.397575378417969, 51.8369140625, -7.306362152099609, -115.56353759765625, -24.193191528320312, 6.62696647644043, 32.07465362548828, 16.400562286376953, 5.457553863525391, 15.587980270385742, -12.754817962646484, 29.115589141845703, -3.030609130859375, 11.899299621582031, 55.1590461730957, -15.416397094726562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000135.npy"} +{"epoch": 0.20408163265306123, "step": 136, "batch_size": 64, "mean": 18.51007843017578, "std": 53.436500549316406, "min": -90.38005065917969, "p10": -38.22278060913086, "median": 9.700971603393555, "p90": 69.16993713378906, "max": 199.68496704101562, "pos_frac": 0.640625, "sample": [-5.47320556640625, 12.738780975341797, -9.015233993530273, 12.634937286376953, -76.61280822753906, 30.643939971923828, -39.67871856689453, -9.753639221191406, -43.33422088623047, 19.754762649536133, -3.0326309204101562, -90.38005065917969, -0.3675098419189453, 4.021337509155273, -4.970771789550781, 9.723793029785156, 20.012842178344727, 15.216133117675781, 3.812570571899414, -7.894374847412109, 60.78828430175781, 10.645870208740234, 37.435302734375, 78.37708282470703, -6.24310302734375, 7.468196868896484, 47.895729064941406, -5.632274627685547, 26.102615356445312, 179.96395874023438, 64.08827209472656, -18.05670928955078, 162.35540771484375, 69.62655639648438, 142.67373657226562, 39.481475830078125, 16.94481658935547, 9.678150177001953, 34.20880126953125, -18.986000061035156, -17.336963653564453, -41.04475402832031, -34.825592041015625, 39.06800079345703, 49.26633834838867, 6.433170318603516, -48.21441650390625, 45.29449462890625, 0.1626739501953125, 50.539398193359375, -5.048053741455078, -13.30609130859375, 26.488746643066406, 114.91563415527344, 20.24810791015625, 199.68496704101562, -62.18650817871094, 5.199287414550781, 10.50201416015625, 12.920360565185547, 68.1044921875, 7.760894775390625, 1.0684013366699219, -27.911697387695312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000136.npy"} +{"epoch": 0.20559334845049132, "step": 137, "batch_size": 64, "mean": 11.542734146118164, "std": 44.92219543457031, "min": -183.00592041015625, "p10": -20.857094573974603, "median": 7.331094741821289, "p90": 56.0562610626221, "max": 188.4976043701172, "pos_frac": 0.71875, "sample": [-8.80120849609375, -80.21066284179688, 3.3174781799316406, 45.176116943359375, 7.624982833862305, -0.7058029174804688, 49.11600875854492, 59.03065490722656, 1.7960624694824219, 188.4976043701172, 15.851879119873047, -6.985832214355469, 21.609539031982422, -14.793800354003906, 10.6781005859375, -41.227577209472656, -15.296737670898438, 6.76763916015625, 4.1009063720703125, 12.233835220336914, 48.69060516357422, 16.469970703125, 33.84205627441406, -28.25391387939453, 2.296724319458008, 32.26988983154297, 2.5971221923828125, 62.420494079589844, 15.788625717163086, 28.16193389892578, 30.140052795410156, -56.511322021484375, -10.230537414550781, 20.064170837402344, -11.334762573242188, 5.4397430419921875, 3.780179977416992, 11.881988525390625, 1.6568069458007812, 7.037206649780273, 37.02713394165039, 16.08498764038086, 9.261093139648438, 72.58358764648438, 1.8914375305175781, -14.664970397949219, -7.319095611572266, 17.767324447631836, 107.46556091308594, 70.15745544433594, 2.0806026458740234, -6.749687194824219, -0.7468833923339844, -23.24010467529297, 17.24440574645996, 6.804178237915039, 1.3721981048583984, -39.362060546875, 17.78691864013672, 41.890869140625, 18.22437286376953, 26.779281616210938, -183.00592041015625, 75.41606140136719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000137.npy"} +{"epoch": 0.20710506424792138, "step": 138, "batch_size": 64, "mean": 10.865236282348633, "std": 56.70857238769531, "min": -156.860595703125, "p10": -40.639443969726564, "median": 11.12377643585205, "p90": 49.67519454956055, "max": 307.71368408203125, "pos_frac": 0.71875, "sample": [23.067298889160156, -58.93083190917969, 23.105899810791016, 0.8958396911621094, 9.041393280029297, -6.240114212036133, -62.758880615234375, -40.0238037109375, -102.58531188964844, 3.4271469116210938, 5.3768768310546875, -156.860595703125, 98.88967895507812, 28.71204376220703, 94.35757446289062, -89.38069152832031, 9.788192749023438, 9.16252326965332, -4.091560363769531, -34.05111312866211, 21.252269744873047, 9.084648132324219, 307.71368408203125, 18.252403259277344, 16.4954833984375, 17.329795837402344, 22.41962432861328, 36.82806396484375, 14.569000244140625, -11.993989944458008, 7.295036315917969, -1.5270309448242188, 17.982402801513672, -13.310379028320312, 10.489477157592773, -9.58856201171875, 2.736043930053711, -82.84785461425781, -1.4921073913574219, 23.132160186767578, 64.03475952148438, 49.914794921875, 25.67780303955078, 31.488861083984375, 2.4599037170410156, -19.937583923339844, 21.82686424255371, 17.115280151367188, 39.24481964111328, 17.041275024414062, 24.472862243652344, 25.346153259277344, 49.116127014160156, 22.70166778564453, 19.94110107421875, 2.411151885986328, 11.758075714111328, 12.11281967163086, -13.953973770141602, 9.936599731445312, 101.26134490966797, 0.9044761657714844, 65.68145751953125, -40.903289794921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000138.npy"} +{"epoch": 0.20861678004535147, "step": 139, "batch_size": 64, "mean": 8.233301162719727, "std": 44.20016098022461, "min": -126.41476440429688, "p10": -36.98216247558593, "median": 7.810025215148926, "p90": 52.81390304565431, "max": 112.13418579101562, "pos_frac": 0.625, "sample": [-7.342231750488281, 15.814462661743164, 4.114768981933594, 4.047080993652344, -38.6357421875, 49.49406814575195, 46.30769348144531, 9.00537109375, 47.913936614990234, -29.32292366027832, -20.49013900756836, 28.99471092224121, -0.8535022735595703, -9.292112350463867, 13.93414306640625, -7.5399322509765625, 76.62368774414062, 6.614679336547852, 34.915504455566406, 23.1303653717041, -68.72743225097656, -126.41476440429688, -62.37617492675781, 9.008018493652344, 1.7030181884765625, 22.790454864501953, 46.05720520019531, 11.955390930175781, 1.6525688171386719, -10.001907348632812, 23.78284454345703, 39.755035400390625, 10.718482971191406, 112.13418579101562, 5.6819610595703125, 51.03871154785156, -6.643257141113281, -9.125118255615234, 109.26028442382812, 14.43484878540039, 4.29840087890625, -29.276607513427734, 17.991500854492188, -4.019182205200195, 27.63444709777832, -54.939998626708984, 58.23741149902344, -2.3694686889648438, 99.68046569824219, 45.91262435913086, -105.64666748046875, -8.270767211914062, -29.467529296875, -77.818603515625, -33.123809814453125, 14.34823226928711, -19.420555114746094, 17.59223175048828, -3.3808765411376953, 13.701797485351562, 15.207664489746094, 53.57469940185547, 6.406278610229492, 95.96136474609375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000139.npy"} +{"epoch": 0.21012849584278157, "step": 140, "batch_size": 64, "mean": 13.691162109375, "std": 53.95283889770508, "min": -171.59178161621094, "p10": -36.887596893310544, "median": 10.084192276000977, "p90": 74.61408538818361, "max": 147.18231201171875, "pos_frac": 0.609375, "sample": [100.77783203125, 84.38796997070312, 27.965816497802734, -33.63066101074219, -37.07746124267578, -16.15355682373047, -26.156166076660156, -87.10089874267578, 12.666740417480469, 64.61589813232422, -171.59178161621094, 53.940704345703125, -29.928298950195312, 115.72256469726562, 53.08699035644531, -38.54170227050781, -3.946859359741211, 24.69487762451172, 41.15937805175781, 25.951629638671875, -3.7565784454345703, 60.908538818359375, 28.701148986816406, 31.618515014648438, -12.997928619384766, 147.18231201171875, -8.836591720581055, 31.407625198364258, 47.4686279296875, -36.444580078125, -10.8673095703125, 6.550941467285156, 21.38846206665039, -44.93883514404297, -24.01428985595703, 86.31007385253906, 17.010835647583008, 69.51895141601562, 49.26133728027344, 144.34100341796875, 59.45001220703125, 63.994163513183594, 47.33738708496094, 28.66098403930664, 0.19327545166015625, -5.623069763183594, 76.79771423339844, 0.25339508056640625, 64.05142974853516, -20.864778518676758, -33.839717864990234, 10.726699829101562, 8.883785247802734, 8.144388198852539, 13.081489562988281, 0.3036994934082031, -9.965461730957031, -99.63643646240234, -20.421340942382812, -36.41450500488281, 9.44168472290039, 25.114063262939453, -6.368648529052734, -67.72106170654297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000140.npy"} +{"epoch": 0.21164021164021163, "step": 141, "batch_size": 64, "mean": 14.721707344055176, "std": 47.925148010253906, "min": -118.71551513671875, "p10": -36.78585662841797, "median": 17.156002044677734, "p90": 64.64329528808597, "max": 175.25015258789062, "pos_frac": 0.6875, "sample": [-19.368408203125, -19.526371002197266, 113.85272216796875, 52.10342788696289, 30.14459228515625, 24.547096252441406, 17.07817840576172, 41.44917297363281, 6.32642936706543, -10.66334342956543, -35.312744140625, -44.89521026611328, 0.6807804107666016, -42.86036682128906, 67.831298828125, 17.344913482666016, 5.092632293701172, 2.2717819213867188, -13.810997009277344, -118.71551513671875, -14.386764526367188, -13.311830520629883, -22.14400863647461, 17.879825592041016, 20.167556762695312, 69.02511596679688, 16.73101806640625, -92.02133178710938, 48.7078857421875, -78.90280151367188, 6.406333923339844, -32.85334014892578, 32.09349060058594, 21.295570373535156, 20.072315216064453, 29.394908905029297, 41.145511627197266, 28.158348083496094, 36.8449821472168, 53.16361999511719, -13.032241821289062, 28.600482940673828, 24.233142852783203, 22.290924072265625, 48.50712585449219, 15.714042663574219, 0.41316986083984375, 17.00550079345703, 22.418415069580078, 17.23382568359375, -37.41719055175781, -5.864414215087891, 24.160419464111328, 142.33163452148438, 57.204620361328125, 175.25015258789062, -4.318689346313477, 15.620304107666016, 78.29195404052734, 92.50982666015625, 34.48207092285156, 2.9285926818847656, -61.32438659667969, -16.086441040039062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000141.npy"} +{"epoch": 0.21315192743764172, "step": 142, "batch_size": 64, "mean": 10.503482818603516, "std": 32.376461029052734, "min": -66.23089599609375, "p10": -19.59601173400879, "median": 4.848202705383301, "p90": 55.554609680175794, "max": 108.90632629394531, "pos_frac": 0.5625, "sample": [19.390113830566406, -51.097991943359375, 0.5883045196533203, 38.175743103027344, -14.14731216430664, 44.60148620605469, 36.52508544921875, 9.871490478515625, -5.016689300537109, -13.353157043457031, 30.364364624023438, -1.4481391906738281, 5.648599624633789, -10.483200073242188, -27.08533477783203, 26.193038940429688, -19.61819839477539, 5.443330764770508, -7.99188232421875, 11.791633605957031, 4.253074645996094, -18.030303955078125, 22.95085334777832, -12.762691497802734, 3.5472869873046875, 69.73564147949219, -17.77655029296875, -25.634170532226562, -9.453125, 37.81553649902344, -13.362236022949219, 56.804931640625, 76.12890625, 77.99497985839844, -20.29930877685547, 29.417236328125, 3.6971511840820312, -5.261653900146484, -5.998851776123047, 27.272960662841797, 69.11483764648438, 63.95082092285156, 13.782608032226562, -15.341712951660156, -19.54424285888672, -15.765914916992188, 22.709453582763672, 52.63719177246094, -7.814117431640625, 21.38665771484375, 25.955379486083984, -5.741903305053711, -66.23089599609375, 108.90632629394531, 19.449764251708984, 45.03497314453125, 23.74371337890625, 13.088289260864258, -46.23980712890625, 19.785654067993164, -8.532661437988281, -3.4216957092285156, -15.674201965332031, 17.59343719482422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000142.npy"} +{"epoch": 0.2146636432350718, "step": 143, "batch_size": 64, "mean": 3.237919330596924, "std": 43.30398178100586, "min": -155.02450561523438, "p10": -43.50662689208984, "median": 7.115828514099121, "p90": 48.024434280395525, "max": 119.52537536621094, "pos_frac": 0.59375, "sample": [-8.791900634765625, 81.04953002929688, 19.609500885009766, 44.41957473754883, 30.101516723632812, 17.927223205566406, -0.005096435546875, -0.7088241577148438, 33.626441955566406, -11.52676010131836, 14.630285263061523, -42.97589111328125, 119.52537536621094, -15.41278076171875, 42.448402404785156, -7.26580810546875, -3.844165802001953, -81.396728515625, 58.49165344238281, 0.6698684692382812, -69.34431457519531, 16.034053802490234, 36.894195556640625, 9.534801483154297, 11.345495223999023, 8.547571182250977, 37.36415100097656, -141.03704833984375, 3.022724151611328, -31.786258697509766, -43.73408508300781, 5.684085845947266, -62.877166748046875, 4.559452056884766, 49.569374084472656, -15.31839370727539, -18.646072387695312, 3.2953453063964844, 20.669578552246094, 31.05866241455078, 10.932422637939453, -155.02450561523438, 18.712566375732422, 18.600982666015625, -5.034656524658203, -12.443115234375, 55.80536651611328, 58.21388244628906, -0.4541282653808594, 1.7465362548828125, -7.556480407714844, -8.251363754272461, 18.163986206054688, 12.477607727050781, -66.69647979736328, 18.287412643432617, 50.10582733154297, 24.395751953125, 17.131797790527344, 12.264907836914062, 23.126081466674805, -3.932649612426758, -16.328929901123047, -2.4235706329345703], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000143.npy"} +{"epoch": 0.2161753590325019, "step": 144, "batch_size": 64, "mean": 10.374969482421875, "std": 54.66424560546875, "min": -120.20387268066406, "p10": -44.61282043457031, "median": 2.8654651641845703, "p90": 55.16204910278324, "max": 207.78436279296875, "pos_frac": 0.53125, "sample": [23.604766845703125, 23.923206329345703, 19.936737060546875, 0.4356555938720703, -2.4970321655273438, 7.962970733642578, 17.222131729125977, 16.086708068847656, 6.914091110229492, -5.997697830200195, 9.856193542480469, 40.48469543457031, -19.02734375, -8.408681869506836, -12.299636840820312, -5.732032775878906, 59.20436096191406, 6.948356628417969, -27.402725219726562, 45.72998809814453, 157.33529663085938, -55.5254020690918, -73.10792541503906, -54.948822021484375, -11.815597534179688, -33.428924560546875, -50.73168182373047, 61.21832275390625, -1.4453868865966797, -2.8318252563476562, 60.54948425292969, -28.72480010986328, -14.951423645019531, -19.772109985351562, 20.451255798339844, 3.8188629150390625, -2.9882545471191406, 41.44141387939453, 185.73011779785156, 37.849449157714844, 207.78436279296875, 28.221237182617188, -39.03828430175781, 164.59254455566406, -8.318893432617188, -47.00190734863281, -7.267280578613281, -4.390863418579102, 29.26705551147461, 8.886817932128906, 1.9120674133300781, -32.25640869140625, 17.519981384277344, 34.20953369140625, -120.20387268066406, 36.67185974121094, 18.009384155273438, 28.740310668945312, -11.647659301757812, -61.09880065917969, 25.058258056640625, -11.381847381591797, 4.2102813720703125, -13.546607971191406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000144.npy"} +{"epoch": 0.21768707482993196, "step": 145, "batch_size": 64, "mean": 7.412748336791992, "std": 64.5594711303711, "min": -224.18621826171875, "p10": -55.54498062133789, "median": 9.182653427124023, "p90": 54.252891921997076, "max": 301.4726867675781, "pos_frac": 0.6875, "sample": [8.1121826171875, 21.913463592529297, 3.5169506072998047, 85.10980224609375, -6.206718444824219, -91.44120788574219, 140.6469268798828, -45.721412658691406, 52.54659652709961, -106.39286804199219, 11.273359298706055, 15.510734558105469, 8.773700714111328, 27.30377197265625, 31.229331970214844, 14.768157958984375, 4.0359344482421875, 74.43714141845703, -53.71075439453125, 21.64226531982422, 23.645156860351562, 2.3191986083984375, -21.767963409423828, 54.984161376953125, 24.315031051635742, 4.095069885253906, 39.62623596191406, 91.79598999023438, -13.61260986328125, -13.809375762939453, 43.93821716308594, 0.02169036865234375, 17.48603057861328, 43.619789123535156, -224.18621826171875, 21.216480255126953, 4.591146469116211, 2.486867904663086, 31.851715087890625, 14.698345184326172, -70.57839965820312, 10.733570098876953, 15.0338134765625, 8.999237060546875, 2.7852535247802734, -23.548065185546875, -1.146261215209961, 11.64034652709961, -9.617546081542969, -56.331077575683594, 25.651214599609375, 15.40786361694336, 17.086040496826172, -29.647319793701172, -36.096473693847656, 301.4726867675781, -12.27325439453125, -38.27714538574219, -114.34698486328125, 44.36131286621094, 101.67626953125, 7.426918029785156, -70.02452087402344, 9.366069793701172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000145.npy"} +{"epoch": 0.21919879062736206, "step": 146, "batch_size": 64, "mean": 7.133736610412598, "std": 37.23088836669922, "min": -79.104248046875, "p10": -42.55087280273437, "median": 7.968650817871094, "p90": 43.48468170166017, "max": 143.2293243408203, "pos_frac": 0.6875, "sample": [36.565338134765625, -12.910751342773438, -24.140480041503906, 6.094001770019531, 10.444084167480469, -16.26909637451172, 13.381942749023438, -24.033416748046875, 5.392181396484375, 18.4619140625, -45.34513854980469, -44.76024627685547, 117.78443908691406, 7.248924255371094, -37.395668029785156, 13.220901489257812, 34.89990234375, -32.24529266357422, 22.559345245361328, -16.11804962158203, 26.34046173095703, -7.990806579589844, -79.104248046875, -4.680065155029297, -20.098480224609375, 12.637516021728516, 11.940900802612305, -16.160560607910156, -63.397544860839844, 23.708389282226562, 24.642499923706055, -47.705810546875, 143.2293243408203, 5.585517883300781, 40.44703674316406, 1.9009017944335938, 23.081703186035156, 8.006393432617188, -58.332122802734375, 5.768623352050781, 33.471397399902344, 5.643474578857422, 1.6007251739501953, 51.692626953125, 12.058723449707031, 48.44129943847656, 14.336977005004883, 12.174976348876953, 17.04399871826172, 5.021965026855469, 11.757692337036133, 44.786529541015625, 23.72386360168457, -50.35260009765625, -7.400045394897461, 7.930908203125, 84.6368408203125, -30.7236328125, 4.5007171630859375, 9.249103546142578, 46.80963897705078, 1.5383033752441406, 11.522560119628906, 34.43865203857422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000146.npy"} +{"epoch": 0.22071050642479215, "step": 147, "batch_size": 64, "mean": 7.003822326660156, "std": 42.162052154541016, "min": -213.34912109375, "p10": -29.401393508911127, "median": 6.824322700500488, "p90": 46.289954376220706, "max": 136.95550537109375, "pos_frac": 0.609375, "sample": [1.1995658874511719, 21.144948959350586, 28.54287338256836, -20.892379760742188, -13.971603393554688, 12.204658508300781, -34.18901062011719, 21.880447387695312, -17.987594604492188, -6.528600692749023, 30.58001708984375, 23.89480972290039, 79.32199096679688, 61.0062255859375, -16.795639038085938, -24.522727966308594, 26.141403198242188, -213.34912109375, 6.168966293334961, -0.8787498474121094, 0.34676170349121094, 27.37999725341797, -8.507501602172852, 2.81201171875, 21.44548797607422, 11.316696166992188, -14.863426208496094, 9.253164291381836, 6.854583740234375, 7.648628234863281, -15.340333938598633, -21.050315856933594, 81.36590576171875, -36.45355224609375, 45.57695007324219, 31.79834747314453, 19.885534286499023, 40.57879638671875, 59.69775390625, 6.794061660766602, 136.95550537109375, 46.59552764892578, -39.469322204589844, 34.436370849609375, 24.12189483642578, -16.645469665527344, 38.795326232910156, -9.460342407226562, 16.84197235107422, -9.739261627197266, -24.812938690185547, 21.516983032226562, -32.94183349609375, 5.772058486938477, -31.367874145507812, 3.520751953125, -14.91755485534668, -32.755958557128906, 13.074531555175781, 48.59515380859375, -17.180383682250977, 21.88957977294922, 26.62877082824707, -0.718902587890625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000147.npy"} +{"epoch": 0.2222222222222222, "step": 148, "batch_size": 64, "mean": 16.38731575012207, "std": 35.626243591308594, "min": -84.68721008300781, "p10": -19.003978729248043, "median": 10.983685493469238, "p90": 61.47180557250978, "max": 111.57833862304688, "pos_frac": 0.75, "sample": [111.57833862304688, 4.342262268066406, -4.8934783935546875, -11.640531539916992, 56.99006652832031, -11.982002258300781, 5.7590484619140625, 3.927532196044922, 79.30288696289062, 34.433624267578125, 0.11738204956054688, 23.119667053222656, 14.003334045410156, 19.00501251220703, 35.55692672729492, -36.460205078125, 3.356884002685547, 8.529678344726562, 63.17646026611328, 10.418388366699219, -2.951671600341797, -16.687156677246094, 57.49427795410156, 4.341941833496094, 33.35321044921875, 29.1300048828125, 20.56305694580078, 16.567955017089844, 104.01005554199219, -5.56591796875, -5.878681182861328, 75.28120422363281, 33.14165496826172, -37.50830841064453, 3.1734390258789062, 55.507110595703125, 2.8673839569091797, 13.972160339355469, -45.044883728027344, 4.375589370727539, 30.82245635986328, 10.662372589111328, 6.626029968261719, 100.61555480957031, -24.93644142150879, 18.69548988342285, 11.304998397827148, 4.675878524780273, 77.57450103759766, -84.68721008300781, 14.254596710205078, 43.478126525878906, 11.36237907409668, 19.53997802734375, 6.731132507324219, -13.156204223632812, 54.713348388671875, 19.984695434570312, 4.3852386474609375, -40.249351501464844, 13.704448699951172, 46.76209259033203, -12.862594604492188, -19.996902465820312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000148.npy"} +{"epoch": 0.2237339380196523, "step": 149, "batch_size": 64, "mean": 10.800019264221191, "std": 34.2532844543457, "min": -85.12741088867188, "p10": -24.740995788574217, "median": 7.888480186462402, "p90": 52.122961425781284, "max": 96.86337280273438, "pos_frac": 0.65625, "sample": [-4.952033996582031, 65.900390625, 23.467300415039062, -1.3081207275390625, -85.12741088867188, 33.684120178222656, -26.107898712158203, 13.322698593139648, 4.111011505126953, 20.699687957763672, 2.8575820922851562, -7.2662353515625, 6.670963287353516, 7.022058486938477, -42.08917236328125, -2.1256561279296875, 2.5426597595214844, -17.486480712890625, 8.772672653198242, 39.607269287109375, 9.892044067382812, 12.867805480957031, 27.256784439086914, -21.551555633544922, 37.564002990722656, 11.234037399291992, 4.734086990356445, 0.7515010833740234, 11.227201461791992, -9.841972351074219, -1.4713973999023438, 44.05097961425781, 85.06838989257812, 37.962745666503906, 39.4852294921875, 31.5236759185791, 29.003755569458008, -54.304359436035156, 96.86337280273438, -11.148641586303711, 59.76448059082031, 80.75486755371094, -15.072416305541992, 89.9708251953125, 8.754901885986328, 10.902765274047852, -1.9770431518554688, 3.01263427734375, 2.710214614868164, 26.691635131835938, -4.87774658203125, 18.76921844482422, -28.624832153320312, -3.261383056640625, 4.672924041748047, -79.112060546875, 44.192291259765625, 55.521820068359375, 20.152137756347656, 11.39365005493164, -3.6058349609375, 19.15784454345703, -36.03816223144531, -16.01457977294922], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000149.npy"} +{"epoch": 0.2252456538170824, "step": 150, "batch_size": 64, "mean": 15.787776947021484, "std": 41.78438186645508, "min": -90.46773529052734, "p10": -37.316831207275385, "median": 14.553839683532715, "p90": 64.90417861938477, "max": 121.30941772460938, "pos_frac": 0.671875, "sample": [-12.595317840576172, 6.140203475952148, 107.54791259765625, 63.17583465576172, -90.46773529052734, -2.8680877685546875, 9.564695358276367, -16.731552124023438, 4.5664825439453125, 17.06001091003418, -26.04412078857422, 46.466453552246094, -41.978973388671875, -21.354286193847656, -10.09332275390625, 8.862770080566406, 8.457778930664062, 96.91061401367188, -47.21295166015625, -22.45281982421875, 33.77964782714844, 8.102676391601562, 121.30941772460938, -63.79512023925781, 15.994949340820312, 19.297147750854492, 13.268720626831055, 16.463199615478516, -13.084373474121094, 35.31316375732422, 11.322736740112305, 76.25372314453125, 26.595722198486328, 48.995826721191406, 29.365150451660156, -32.91058349609375, -11.808660507202148, 12.866668701171875, 47.002235412597656, 56.777679443359375, 62.59007263183594, 16.590843200683594, 29.26385498046875, 15.838958740234375, 66.93693542480469, -59.81292724609375, 9.913846969604492, 22.968107223510742, -29.77147674560547, 99.39390563964844, 56.23179626464844, -1.1895828247070312, 31.185569763183594, -41.56676483154297, 0.9353313446044922, 65.6448974609375, -22.28443145751953, -9.879302978515625, 38.245819091796875, -39.205223083496094, 31.556602478027344, 57.39671325683594, 28.617408752441406, 52.75328826904297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000150.npy"} +{"epoch": 0.22675736961451248, "step": 151, "batch_size": 64, "mean": 20.149696350097656, "std": 39.89313888549805, "min": -44.23045349121094, "p10": -12.44385299682617, "median": 11.531228065490723, "p90": 62.6014991760254, "max": 230.4818115234375, "pos_frac": 0.78125, "sample": [1.7223587036132812, 31.263813018798828, 28.91751480102539, 1.8832435607910156, 14.05527114868164, 1.5859260559082031, 230.4818115234375, -7.676136016845703, 5.5845947265625, -13.167282104492188, 32.70027160644531, 16.26321029663086, 63.27748107910156, -44.23045349121094, 2.1853408813476562, 22.076629638671875, 28.866151809692383, 1.9895896911621094, -2.3052940368652344, 1.5811710357666016, -21.135902404785156, 7.372529983520508, 28.643943786621094, 7.513965606689453, 69.76806640625, -35.426002502441406, 4.064334869384766, 16.36062240600586, -1.0556297302246094, 3.448822021484375, 5.775320053100586, -9.105659484863281, -22.704757690429688, 85.57228088378906, 20.57763671875, 5.2798004150390625, 49.662384033203125, -14.336502075195312, 4.01171875, 133.1123046875, 1.0710582733154297, 20.010833740234375, 1.1074104309082031, 12.785394668579102, 18.900474548339844, 65.46498107910156, 10.911788940429688, 48.36091613769531, 39.32555389404297, 28.381603240966797, -10.755851745605469, 61.024208068847656, 0.3922996520996094, 13.008445739746094, -14.115798950195312, 20.190013885498047, 12.150667190551758, 14.699241638183594, -7.726322174072266, 89.165283203125, 36.829322814941406, 33.995853424072266, -0.9076118469238281, 40.85626220703125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000151.npy"} +{"epoch": 0.22826908541194255, "step": 152, "batch_size": 64, "mean": 14.8113374710083, "std": 36.34917068481445, "min": -64.19361114501953, "p10": -30.225102615356445, "median": 14.97220230102539, "p90": 61.159970855712906, "max": 123.65203857421875, "pos_frac": 0.671875, "sample": [9.575469970703125, 25.455894470214844, 23.56431007385254, -21.583656311035156, -51.02435302734375, 43.66246795654297, 62.96282958984375, 25.4864501953125, -12.317161560058594, -53.89283752441406, 6.403203964233398, 28.29718017578125, 12.3682861328125, -42.86521911621094, 33.54573059082031, 27.79022979736328, 7.336050033569336, 23.907527923583984, -1.2660770416259766, -11.323627471923828, -64.19361114501953, 31.83226776123047, 17.57611846923828, -30.291786193847656, -30.069507598876953, 3.4866504669189453, 23.781593322753906, -10.274581909179688, -5.754274368286133, 33.77198791503906, 6.536779403686523, 29.08668327331543, 3.424936294555664, -16.032028198242188, -14.590095520019531, 123.65203857421875, -1.5028800964355469, -54.95936584472656, 48.18232727050781, -7.552093505859375, -3.8743972778320312, 22.51291275024414, 32.02515411376953, 28.746002197265625, 10.433395385742188, 67.01446533203125, 26.301986694335938, 3.9754295349121094, 87.68761444091797, 65.2447509765625, -42.57257843017578, 34.17008972167969, 10.579862594604492, 83.13487243652344, 35.477622985839844, 28.878955841064453, 53.596961975097656, 56.95330047607422, 30.445953369140625, 89.41800689697266, -5.6236419677734375, -18.753150939941406, 0.8776473999023438, 29.080543518066406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000152.npy"} +{"epoch": 0.22978080120937264, "step": 153, "batch_size": 64, "mean": 11.96335220336914, "std": 33.810935974121094, "min": -80.27685546875, "p10": -20.902150726318354, "median": 6.534611701965332, "p90": 53.73253326416016, "max": 132.79296875, "pos_frac": 0.609375, "sample": [16.35875129699707, 25.873748779296875, -8.437973022460938, -9.78125, 24.409143447875977, 22.743309020996094, -37.468421936035156, -3.7020225524902344, 10.411544799804688, -1.541412353515625, 2.0045909881591797, 23.319080352783203, 11.449111938476562, 79.85211944580078, 53.90364074707031, -5.5980377197265625, 4.786827087402344, 13.47882080078125, -0.9683761596679688, 39.74847412109375, -5.801078796386719, 13.484542846679688, -12.07373046875, 5.796539306640625, 17.019359588623047, -15.21823501586914, 8.64046859741211, 0.3067588806152344, -11.4942626953125, 4.229755401611328, 34.86205291748047, 23.78583335876465, -17.13506317138672, -3.741058349609375, 31.561935424804688, -4.72613525390625, -36.775596618652344, 28.021583557128906, 132.79296875, 20.156639099121094, -80.27685546875, -0.5280838012695312, 81.31993103027344, -11.966304779052734, 16.12503433227539, -8.074213027954102, 48.87498474121094, -4.790863037109375, 53.333282470703125, -14.73709487915039, -33.16754150390625, 17.9056396484375, 11.527166366577148, -26.796653747558594, 0.04290008544921875, -29.46483612060547, -22.516616821289062, 19.2835636138916, 76.33172607421875, 86.74444580078125, 30.16556167602539, 7.272684097290039, 3.5938453674316406, 70.91793823242188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000153.npy"} +{"epoch": 0.23129251700680273, "step": 154, "batch_size": 64, "mean": 8.983856201171875, "std": 39.986534118652344, "min": -89.81486511230469, "p10": -45.22833480834961, "median": 9.872314453125, "p90": 43.20933570861817, "max": 163.376220703125, "pos_frac": 0.65625, "sample": [-56.51648712158203, -44.57337188720703, -70.38093566894531, -22.646411895751953, 37.94488525390625, 30.440290451049805, -17.114776611328125, 5.87257194519043, -68.75361633300781, -66.25223541259766, 20.204463958740234, -1.0568256378173828, 15.898847579956055, 82.92901611328125, -8.337928771972656, -0.6893386840820312, -16.198009490966797, 1.67608642578125, 1.5568504333496094, -28.800094604492188, 32.80139923095703, 49.57886505126953, 41.9123420715332, 21.591285705566406, -45.509033203125, 0.6723766326904297, 40.28632354736328, -50.89006805419922, 62.269195556640625, -89.81486511230469, 13.587385177612305, 10.15168571472168, 4.244483947753906, 36.863380432128906, 33.4402961730957, -5.797798156738281, 13.086481094360352, 35.0623779296875, 9.59294319152832, 10.344505310058594, 10.411861419677734, 27.41381072998047, 5.337099075317383, -11.694120407104492, 16.592300415039062, 26.931344985961914, -4.339790344238281, 3.412425994873047, 31.54131317138672, 8.347850799560547, 97.54661560058594, 21.917999267578125, -11.92422866821289, 30.688148498535156, 163.376220703125, 33.16900634765625, 6.9987030029296875, -1.5747356414794922, 43.76519012451172, 51.77223205566406, -25.900001525878906, 14.211603164672852, -2.0694007873535156, 20.358766555786133], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000154.npy"} +{"epoch": 0.2328042328042328, "step": 155, "batch_size": 64, "mean": 12.618437767028809, "std": 40.19599914550781, "min": -113.47488403320312, "p10": -22.098344802856445, "median": 8.32396125793457, "p90": 58.19476318359378, "max": 118.43864440917969, "pos_frac": 0.65625, "sample": [7.789642333984375, -7.5919647216796875, 101.236572265625, 21.59172821044922, 5.2971038818359375, 36.94187927246094, 16.734066009521484, 31.211929321289062, -3.1280155181884766, 37.47391891479492, 40.57444763183594, 15.822074890136719, -18.782699584960938, 44.139556884765625, -11.465190887451172, 8.762870788574219, -2.2317352294921875, 90.95269775390625, -3.3975601196289062, 44.079620361328125, 38.49842071533203, -16.2893123626709, 4.011985778808594, -21.100658416748047, 62.80316162109375, 42.132911682128906, -75.80694580078125, 81.97589111328125, -1.5377883911132812, 34.487037658691406, -113.47488403320312, 22.06350326538086, 31.26631736755371, 6.023155212402344, 118.43864440917969, 68.27906799316406, -6.1075897216796875, 51.857582092285156, -83.78390502929688, -18.345001220703125, 5.34857177734375, -10.758358001708984, -22.525924682617188, 0.1869659423828125, 60.91069793701172, 15.553386688232422, 4.787925720214844, 10.513999938964844, 36.97853088378906, 9.739944458007812, 40.312530517578125, -53.303375244140625, 7.885051727294922, -22.64642333984375, 0.9408817291259766, -7.3076019287109375, 16.94263458251953, -13.329803466796875, -53.63479232788086, 1.2940673828125, 48.680259704589844, 34.36780548095703, 30.228214263916016, -14.987686157226562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000155.npy"} +{"epoch": 0.23431594860166288, "step": 156, "batch_size": 64, "mean": 20.159339904785156, "std": 37.271636962890625, "min": -60.612152099609375, "p10": -21.951475524902342, "median": 12.471498489379883, "p90": 66.90407257080078, "max": 133.94503784179688, "pos_frac": 0.703125, "sample": [-0.6214141845703125, -23.296192169189453, -5.775676727294922, -31.753280639648438, 12.256668090820312, 67.23667907714844, 28.519018173217773, -7.0848388671875, 56.977264404296875, 12.686328887939453, 8.214263916015625, 35.28065872192383, -27.0511474609375, 91.26246643066406, -34.0799560546875, 37.16858673095703, 66.12799072265625, 45.401710510253906, 3.0554027557373047, 30.90581512451172, 8.420585632324219, 9.252010345458984, 20.762310028076172, -0.6882972717285156, -22.509796142578125, 35.37469482421875, -2.587686538696289, 44.210296630859375, 7.366119384765625, 25.49925994873047, 5.9150390625, 34.508544921875, 27.1322021484375, -20.648727416992188, 78.00648498535156, 114.0169448852539, -28.3592529296875, -60.612152099609375, -19.388107299804688, 47.300811767578125, 31.663192749023438, 57.84855651855469, 11.86456298828125, 8.955036163330078, 13.106805801391602, 10.495643615722656, -17.30551528930664, -1.04498291015625, 12.221702575683594, -8.775482177734375, 15.698646545410156, 36.08258056640625, 27.182086944580078, 16.065940856933594, 22.722984313964844, 2.9970245361328125, -14.980613708496094, 73.80498504638672, 133.94503784179688, 4.806488037109375, 24.471099853515625, 129.8433380126953, 30.902416229248047, -0.7754745483398438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000156.npy"} +{"epoch": 0.23582766439909297, "step": 157, "batch_size": 64, "mean": 11.376026153564453, "std": 26.38998794555664, "min": -52.42222595214844, "p10": -19.00675239562988, "median": 8.107128143310547, "p90": 45.7853744506836, "max": 88.66059875488281, "pos_frac": 0.703125, "sample": [44.08958435058594, 33.81502151489258, 6.350860595703125, 31.527671813964844, -10.179229736328125, -46.61848449707031, 53.89320373535156, 29.040496826171875, 15.337127685546875, -3.5178604125976562, -21.33050537109375, 3.3518600463867188, 63.406097412109375, 6.4860382080078125, -52.42222595214844, 0.9167518615722656, -23.899429321289062, -20.398300170898438, 34.38683319091797, 23.026891708374023, 7.866752624511719, 53.60977554321289, 88.66059875488281, 12.070928573608398, 2.7181758880615234, 39.254310607910156, -13.622264862060547, -7.004159927368164, 9.181427001953125, 2.6094970703125, 21.471471786499023, -7.172737121582031, 15.735942840576172, 46.35759735107422, 16.613235473632812, 43.042869567871094, -9.156806945800781, 27.27033233642578, 50.20928192138672, 21.7554931640625, 15.904243469238281, 9.792884826660156, -2.201587677001953, 44.45018768310547, -32.579925537109375, -2.1548404693603516, -3.22869873046875, 21.145774841308594, 14.226619720458984, 1.4853992462158203, -14.150192260742188, -15.759807586669922, 6.1136016845703125, 2.171039581298828, 5.671258926391602, 25.572168350219727, 17.196006774902344, 29.095722198486328, -42.696502685546875, 1.8782272338867188, 50.13365173339844, -3.675750732421875, 2.5946197509765625, 8.347503662109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000157.npy"} +{"epoch": 0.23733938019652306, "step": 158, "batch_size": 64, "mean": 15.548144340515137, "std": 47.088253021240234, "min": -92.35326385498047, "p10": -20.693905639648435, "median": 6.710661888122559, "p90": 48.558840942382815, "max": 205.09375, "pos_frac": 0.6875, "sample": [-49.82526397705078, 29.29120635986328, -34.38462829589844, 11.845733642578125, -5.654052734375, 26.916088104248047, 104.31829833984375, 59.50263977050781, -15.94940185546875, -9.692806243896484, 10.6669921875, 36.60832977294922, -7.4365386962890625, 16.35486602783203, -22.727264404296875, 5.916109085083008, 205.09375, 5.930742263793945, 4.361030578613281, 38.352867126464844, 9.542816162109375, 10.870773315429688, 31.987512588500977, 23.34699249267578, 37.257843017578125, 47.83721160888672, 9.433258056640625, 12.615280151367188, 33.44178771972656, -11.715522766113281, 14.925567626953125, 7.8621063232421875, 44.69084167480469, -92.35326385498047, 35.21135711669922, 48.86811065673828, 29.188026428222656, -8.292953491210938, -10.903867721557617, -58.40910339355469, 112.54621887207031, 46.67326354980469, -34.497764587402344, 137.84414672851562, 3.3675003051757812, 30.140350341796875, -2.5667057037353516, 4.396781921386719, 149.43661499023438, 3.175546646118164, 0.7905158996582031, 4.44378662109375, -12.199913024902344, -5.836860656738281, 7.490581512451172, -0.3094291687011719, -15.797418594360352, -14.542133331298828, 23.173995971679688, 1.0031013488769531, 0.3461570739746094, 5.119159698486328, 2.2166309356689453, -76.22634887695312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000158.npy"} +{"epoch": 0.23885109599395313, "step": 159, "batch_size": 64, "mean": 15.352785110473633, "std": 34.20684814453125, "min": -89.474853515625, "p10": -15.121523666381835, "median": 14.731132507324219, "p90": 56.67912673950197, "max": 116.33451843261719, "pos_frac": 0.671875, "sample": [37.39726638793945, 16.699077606201172, 28.034103393554688, 72.9377670288086, 17.840484619140625, -62.333396911621094, 77.65780639648438, 36.7366943359375, 25.495567321777344, 116.33451843261719, -34.179931640625, 11.657272338867188, 52.49534606933594, -20.78681182861328, -12.87643051147461, -14.392505645751953, 20.031646728515625, 73.17300415039062, 41.20118713378906, -1.4806232452392578, 13.722938537597656, -12.133804321289062, 6.566873550415039, -14.344268798828125, 41.75648498535156, 7.197412490844727, -2.8043384552001953, -47.55247497558594, 24.110794067382812, 41.34186553955078, -25.286468505859375, 97.36264038085938, 25.575273513793945, -8.510498046875, 23.127723693847656, 9.819696426391602, 21.9891357421875, 25.92064666748047, -1.03045654296875, -3.7483444213867188, 12.870475769042969, 1.5565948486328125, 59.965240478515625, -10.106632232666016, 22.09912109375, 39.48631286621094, -6.169439315795898, 19.514196395874023, -6.3844451904296875, 25.65997314453125, 9.658187866210938, 58.47217559814453, 41.16642761230469, 13.392440795898438, 4.034576416015625, -89.474853515625, 23.55428695678711, -10.28564453125, -15.4339599609375, 4.319799423217773, 51.252159118652344, 16.440277099609375, -3.4712677001953125, 15.739326477050781], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000159.npy"} +{"epoch": 0.24036281179138322, "step": 160, "batch_size": 64, "mean": 15.628469467163086, "std": 34.938018798828125, "min": -45.653167724609375, "p10": -20.554230308532713, "median": 11.853351593017578, "p90": 50.183908843994146, "max": 163.13037109375, "pos_frac": 0.6875, "sample": [2.6330337524414062, -30.147747039794922, 14.889312744140625, 63.8956298828125, -5.448207855224609, 30.271987915039062, 30.3961181640625, -8.736654281616211, 21.338027954101562, 20.57984161376953, 99.58525085449219, -17.416465759277344, 43.03303527832031, 27.554718017578125, 9.975410461425781, 13.822105407714844, 6.754232406616211, -42.97637176513672, 16.21883773803711, -10.378786087036133, -18.683143615722656, -26.49280548095703, 21.106338500976562, 37.243377685546875, 34.92219543457031, -1.1152610778808594, 37.013877868652344, 1.0230693817138672, 2.0239410400390625, 7.397747039794922, 40.05070495605469, 20.18337059020996, 96.16510009765625, 51.23649978637695, 3.161569595336914, -12.40982437133789, 41.73658752441406, -7.992588043212891, -1.2490768432617188, 13.731292724609375, 6.637866973876953, 27.207393646240234, 3.5976486206054688, -19.273040771484375, -9.54571533203125, 40.10779571533203, -41.23661804199219, -21.10331153869629, 29.91720199584961, 31.653480529785156, 48.911415100097656, -6.29576301574707, 8.521446228027344, 39.6681022644043, -17.687850952148438, 30.745162963867188, 6.54315185546875, 30.780181884765625, -40.01763153076172, 7.136821746826172, 50.85157775878906, 50.72926330566406, 163.13037109375, -45.653167724609375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000160.npy"} +{"epoch": 0.2418745275888133, "step": 161, "batch_size": 64, "mean": 15.65213680267334, "std": 30.611291885375977, "min": -61.29161071777344, "p10": -21.79409408569336, "median": 15.648178100585938, "p90": 56.12165069580079, "max": 122.9779052734375, "pos_frac": 0.71875, "sample": [9.663322448730469, -28.505863189697266, 12.288070678710938, 62.081512451171875, 17.162582397460938, 8.483413696289062, 122.9779052734375, 57.8887939453125, 10.314655303955078, -0.8530778884887695, -29.1182861328125, -22.018455505371094, 15.160110473632812, 24.09051513671875, 53.52817153930664, 34.56975555419922, 18.881479263305664, -61.29161071777344, 22.62784194946289, -30.617141723632812, -8.813491821289062, 16.136245727539062, 14.704376220703125, -21.270584106445312, 43.61205291748047, -5.995731353759766, 22.289024353027344, 13.510831832885742, -7.6470184326171875, 76.26392364501953, 31.138935089111328, 21.64714813232422, 22.265947341918945, -37.58222961425781, 17.92772674560547, -24.911529541015625, -17.029462814331055, 76.06282043457031, 25.86724853515625, 41.67190170288086, 18.06497573852539, 3.3912429809570312, -6.340061187744141, 22.875255584716797, 25.18792724609375, -8.725753784179688, 0.8313446044921875, 2.0046024322509766, 61.52742004394531, 33.816505432128906, 0.14923095703125, 27.43310546875, 43.741600036621094, 29.00226593017578, -11.051155090332031, 6.453031539916992, 22.19561767578125, -0.21361732482910156, 24.214691162109375, 54.501747131347656, 56.815895080566406, -12.889236450195312, 1.9419174194335938, 9.64633560180664], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000161.npy"} +{"epoch": 0.24338624338624337, "step": 162, "batch_size": 64, "mean": 22.24054718017578, "std": 52.56031036376953, "min": -161.30690002441406, "p10": -27.10833511352539, "median": 21.53130531311035, "p90": 56.74167633056641, "max": 176.31825256347656, "pos_frac": 0.78125, "sample": [33.035186767578125, -25.795562744140625, -125.97103881835938, -3.1829833984375, 80.24835968017578, 31.726398468017578, 21.63900375366211, 38.1884765625, -79.1907958984375, 47.34886932373047, 11.130409240722656, 19.228618621826172, 9.15852165222168, 176.31825256347656, 40.86254119873047, 27.19478416442871, 157.11419677734375, 34.28175354003906, 57.785491943359375, 33.28334426879883, 43.93633270263672, 47.93766403198242, 18.76538848876953, -34.49562072753906, 22.465362548828125, 24.362197875976562, 155.66635131835938, 54.30610656738281, -47.360618591308594, 107.37051391601562, 21.423606872558594, 16.8287353515625, 19.750526428222656, 35.1027946472168, -161.30690002441406, 15.283843994140625, 32.60804748535156, 31.322507858276367, -8.552606582641602, -6.34332275390625, -43.80255126953125, -4.8125762939453125, 115.9570541381836, 33.402374267578125, 34.70756530761719, 11.5819091796875, -0.6750106811523438, 9.866424560546875, 12.871482849121094, 12.366958618164062, 1.7952537536621094, 4.194061279296875, 47.45420837402344, 37.408714294433594, 48.22210693359375, 33.883399963378906, 14.045961380004883, -27.67095184326172, 13.2745361328125, 13.558429718017578, -2.584115982055664, 22.16106414794922, 50.98121643066406, 11.732719421386719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000162.npy"} +{"epoch": 0.24489795918367346, "step": 163, "batch_size": 64, "mean": 21.724830627441406, "std": 35.09114456176758, "min": -58.681800842285156, "p10": -22.19576187133789, "median": 18.163969039916992, "p90": 61.06978607177735, "max": 111.6259765625, "pos_frac": 0.78125, "sample": [1.1745023727416992, 45.167205810546875, 10.8040771484375, 24.455230712890625, 15.064826965332031, 15.986934661865234, -23.136783599853516, 109.55459594726562, 58.749786376953125, -20.567062377929688, 5.2994384765625, 81.7043228149414, 88.59261322021484, 26.182701110839844, 28.694053649902344, 2.10589599609375, 16.13369369506836, -36.654930114746094, 111.6259765625, 49.94390869140625, 2.0035858154296875, 52.76807403564453, 33.804962158203125, -19.175559997558594, -2.946636199951172, 46.04485321044922, 19.388103485107422, 67.9686279296875, 2.772066116333008, -26.626068115234375, 28.14446449279785, 14.238197326660156, 52.26734924316406, -8.070587158203125, 57.31037902832031, 53.15339660644531, -13.073688507080078, 74.60997009277344, -22.893775939941406, -0.65362548828125, 28.127269744873047, 56.1895866394043, 16.939834594726562, 34.12093734741211, 46.829925537109375, -8.808713912963867, 2.7916793823242188, 10.146282196044922, 6.012306213378906, -50.099510192871094, -58.681800842285156, 43.184547424316406, 28.84650421142578, 2.861745834350586, 9.25600814819336, -40.968231201171875, 26.76019287109375, 62.06407165527344, 52.99253845214844, 33.62771224975586, 31.956275939941406, 9.988077163696289, 0.510467529296875, 23.826313018798828], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000163.npy"} +{"epoch": 0.24640967498110355, "step": 164, "batch_size": 64, "mean": 24.983139038085938, "std": 40.3116569519043, "min": -63.330909729003906, "p10": -17.710041046142578, "median": 20.839577674865723, "p90": 69.7843246459961, "max": 176.6715087890625, "pos_frac": 0.75, "sample": [6.171028137207031, 0.3795127868652344, -16.843772888183594, 7.977001190185547, 8.707649230957031, 77.95525360107422, 65.64097595214844, 62.103736877441406, 37.400596618652344, 28.440845489501953, 16.00506591796875, -18.081298828125, 23.286270141601562, -2.6832351684570312, 21.06887435913086, 92.81202697753906, 11.428115844726562, 20.6706485748291, 129.1844482421875, 0.7290306091308594, 12.006401062011719, -58.09153747558594, 16.842247009277344, 64.17739868164062, -0.9931697845458984, 21.478004455566406, 48.8875617980957, -4.0312347412109375, 12.737762451171875, 2.891551971435547, 38.88648223876953, 49.423072814941406, -14.952978134155273, 33.482791900634766, -6.112495422363281, -20.555740356445312, 75.822509765625, 35.65150451660156, 48.9776496887207, -63.330909729003906, 26.27923011779785, -18.923805236816406, 8.62990951538086, 49.466590881347656, 35.705997467041016, 16.253278732299805, 63.311241149902344, -3.5466060638427734, 176.6715087890625, 38.32664108276367, 72.65753173828125, -0.13478469848632812, 40.46267318725586, -32.66919708251953, 5.682119369506836, -46.1033935546875, 5.442718505859375, 69.35617065429688, 60.574127197265625, -5.3703460693359375, 69.96781921386719, 54.21955871582031, 26.103816986083984, 21.008506774902344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000164.npy"} +{"epoch": 0.24792139077853365, "step": 165, "batch_size": 64, "mean": 21.854755401611328, "std": 39.527862548828125, "min": -66.84976196289062, "p10": -24.722153663635247, "median": 22.575064659118652, "p90": 66.04389801025391, "max": 136.55227661132812, "pos_frac": 0.75, "sample": [75.40045928955078, 8.086114883422852, 12.161951065063477, 41.221920013427734, 32.488319396972656, -6.501129150390625, 25.472164154052734, -66.84976196289062, 1.4555015563964844, 3.7744388580322266, 14.900840759277344, 25.337421417236328, 66.39462280273438, -10.642065048217773, -27.18292236328125, 11.89971923828125, 25.546371459960938, 11.522018432617188, -40.45079040527344, -28.653480529785156, -11.648948669433594, -16.537704467773438, 49.34087371826172, -1.8623676300048828, 63.86114501953125, 18.59410285949707, 23.76125144958496, 45.681846618652344, 50.29682159423828, 51.25385284423828, 30.95465850830078, 2.0251903533935547, 80.53958129882812, 54.47669982910156, 17.197021484375, -60.301605224609375, 9.796028137207031, 24.612491607666016, 48.028751373291016, 35.16450500488281, -18.98036003112793, -16.78143310546875, 65.22554016113281, 34.914306640625, 24.560537338256836, -8.300926208496094, 6.303829193115234, 13.849098205566406, 37.42338562011719, -9.95452880859375, 117.72634887695312, 60.047874450683594, -65.91484069824219, 40.28512191772461, 12.583147048950195, -50.91242599487305, 54.750953674316406, 13.325510025024414, 32.68999481201172, 42.991729736328125, 90.99848175048828, 73.31588745117188, 136.55227661132812, 21.388877868652344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000165.npy"} +{"epoch": 0.2494331065759637, "step": 166, "batch_size": 64, "mean": 21.415264129638672, "std": 38.87615203857422, "min": -55.95924377441406, "p10": -23.52275161743164, "median": 14.288116455078125, "p90": 63.41234512329102, "max": 142.8706817626953, "pos_frac": 0.6875, "sample": [62.486412048339844, 10.571399688720703, 13.424957275390625, 19.929420471191406, 48.3857421875, 13.01171875, -23.550865173339844, 82.59831237792969, 45.58232116699219, -23.4571533203125, 15.151275634765625, 6.183332443237305, 38.51066970825195, -24.34485626220703, 53.302215576171875, 43.67561340332031, 5.397247314453125, 29.587034225463867, -28.09796905517578, 4.669532775878906, -0.20879173278808594, 39.6749153137207, -3.4716110229492188, 11.055534362792969, 55.95606231689453, -1.745046615600586, -15.803009033203125, 132.62396240234375, 23.011383056640625, 54.750648498535156, -55.95924377441406, -41.84956741333008, 6.763145446777344, 36.04682159423828, 77.41846466064453, 1.8328437805175781, 63.809173583984375, -17.958709716796875, 42.22509765625, 33.32000732421875, 19.361862182617188, 62.310028076171875, -35.7098388671875, -24.250076293945312, 103.12210083007812, -11.653350830078125, -14.012107849121094, -15.406246185302734, 18.8481388092041, 55.56321716308594, 49.87530517578125, -17.353179931640625, 142.8706817626953, 21.186370849609375, -16.05942153930664, -4.4265899658203125, 36.0546875, -1.041341781616211, 65.52395629882812, 5.1139984130859375, 50.63941955566406, 38.998016357421875, 0.17809295654296875, 6.334705352783203], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000166.npy"} +{"epoch": 0.2509448223733938, "step": 167, "batch_size": 64, "mean": 22.31304168701172, "std": 34.235740661621094, "min": -69.79014587402344, "p10": -17.776457977294918, "median": 24.510845184326172, "p90": 63.69577865600587, "max": 100.80227661132812, "pos_frac": 0.71875, "sample": [32.79270935058594, 25.045597076416016, 6.279092788696289, 39.622615814208984, 10.788406372070312, 52.38359069824219, 59.32714080810547, -4.079002380371094, 12.013006210327148, 58.267356872558594, 40.53618621826172, -0.97027587890625, 29.596920013427734, -5.538787841796875, -9.39023208618164, -3.2144508361816406, 52.817718505859375, 97.62220764160156, 53.93827819824219, -39.71675109863281, -31.838279724121094, 5.564830780029297, 4.837835311889648, -38.383262634277344, 32.86590576171875, -69.79014587402344, -9.141441345214844, 1.873941421508789, -5.7609405517578125, 50.90068054199219, 39.626434326171875, 53.93928527832031, 8.667509078979492, 45.83499526977539, 26.53734588623047, 15.464569091796875, 64.83488464355469, 100.80227661132812, 11.554489135742188, 24.672569274902344, 5.338294982910156, 80.26363372802734, -14.984367370605469, 18.785167694091797, 39.60905456542969, -28.62469482421875, -7.040641784667969, 61.037864685058594, -10.888862609863281, 66.52046966552734, 30.42612075805664, 4.157283782958984, 31.824392318725586, 73.31837463378906, 12.738676071166992, 46.20799255371094, -22.58367919921875, 38.73158264160156, 29.851276397705078, 83.12085723876953, 45.95164489746094, -2.286588668823242, 24.34912109375, -18.973068237304688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000167.npy"} +{"epoch": 0.25245653817082386, "step": 168, "batch_size": 64, "mean": 22.150875091552734, "std": 48.526275634765625, "min": -68.59466552734375, "p10": -26.634132385253903, "median": 13.133010864257812, "p90": 62.676322174072276, "max": 191.59930419921875, "pos_frac": 0.609375, "sample": [20.67291259765625, -8.751472473144531, 58.85470199584961, -17.223243713378906, 4.187566757202148, 43.11686706542969, 48.250762939453125, -10.215065002441406, -52.51014709472656, -68.59466552734375, -3.6304244995117188, -5.435918807983398, 42.18513488769531, 61.07539367675781, -18.294326782226562, -0.37322235107421875, 34.20673751831055, 28.141220092773438, 49.99366760253906, 41.76496124267578, 59.84113311767578, 1.9698562622070312, -7.486686706542969, 16.652450561523438, -9.291061401367188, 46.34711837768555, 57.76359939575195, 2.3784446716308594, -31.78685760498047, 42.80751037597656, -44.21308898925781, -9.720458984375, 142.5550537109375, 55.892852783203125, 57.67599105834961, -17.123092651367188, 181.61212158203125, -33.7711181640625, -6.610633850097656, 13.152671813964844, 4.711294174194336, 63.36243438720703, -23.042221069335938, 53.34473419189453, 14.59466552734375, -28.17352294921875, 109.61674499511719, -22.0128173828125, 36.820411682128906, 10.227485656738281, 66.796875, 26.706680297851562, 44.49972152709961, -11.152059555053711, -12.012397766113281, 43.95262145996094, -7.1302947998046875, -18.267593383789062, -38.46631622314453, 12.536354064941406, 46.72709655761719, 73.23625946044922, 191.59930419921875, 13.113349914550781], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000168.npy"} +{"epoch": 0.25396825396825395, "step": 169, "batch_size": 64, "mean": 19.29210090637207, "std": 45.666378021240234, "min": -112.84326171875, "p10": -22.405795288085937, "median": 14.449321746826172, "p90": 71.22001419067384, "max": 146.61749267578125, "pos_frac": 0.703125, "sample": [9.192947387695312, 46.370948791503906, 68.39612579345703, 1.0090484619140625, 26.931177139282227, -8.062324523925781, 20.014911651611328, 0.31229400634765625, 17.006248474121094, 37.529151916503906, -22.630592346191406, -19.898948669433594, 100.39752197265625, 54.18553161621094, -22.526947021484375, -17.44755744934082, 30.737567901611328, -1.8028945922851562, 49.80763244628906, 1.5759658813476562, -13.287384033203125, 78.18919372558594, 52.98345184326172, 40.286231994628906, -0.7357177734375, 113.2142562866211, 19.311866760253906, -9.131187438964844, -15.52572250366211, 31.548038482666016, 11.125802993774414, -29.906707763671875, 24.59891128540039, -4.674795150756836, 48.90635681152344, 10.638914108276367, 61.84618377685547, -111.1026611328125, 41.445457458496094, 42.392333984375, 2.7492198944091797, 44.21551513671875, 2.7733078002929688, 11.89239501953125, 3.123109817504883, 28.819313049316406, 38.337425231933594, -22.12310791015625, 110.07485961914062, 56.78874206542969, 3.168243408203125, 31.821897506713867, 32.194923400878906, -112.84326171875, -82.32484436035156, 146.61749267578125, 88.22465515136719, 7.270774841308594, -18.465341567993164, 72.43025207519531, -28.82464599609375, 50.108795166015625, -6.0165557861328125, 11.460639953613281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000169.npy"} +{"epoch": 0.25547996976568405, "step": 170, "batch_size": 64, "mean": 21.57918930053711, "std": 46.06220245361328, "min": -79.02304077148438, "p10": -41.01193695068359, "median": 25.357257843017578, "p90": 70.73103408813478, "max": 133.42845153808594, "pos_frac": 0.734375, "sample": [22.486328125, -4.878959655761719, 31.143903732299805, 13.980682373046875, -1.0903282165527344, 40.65277862548828, 13.499200820922852, 29.985898971557617, -10.577186584472656, 63.53166198730469, -63.15320587158203, 5.096757888793945, 25.32556915283203, 122.58393859863281, 66.63922882080078, -42.96562194824219, -13.034383773803711, 27.54638671875, 1.8149490356445312, -34.20433807373047, 4.471675872802734, 59.51197814941406, 29.43068504333496, 9.286882400512695, -60.23712921142578, 40.93925476074219, -79.02304077148438, 41.72576141357422, -68.40242767333984, 101.6666259765625, 61.66256332397461, 91.71029663085938, 57.298194885253906, 31.5210018157959, -11.157461166381836, 41.04808044433594, -36.453338623046875, 26.667320251464844, 66.15259552001953, 5.616424560546875, 42.173583984375, 2.285043716430664, -8.547119140625, 47.12483215332031, 33.32817840576172, 133.42845153808594, 72.48466491699219, 116.74893188476562, 10.885059356689453, 11.059226989746094, 1.3686904907226562, -24.30582618713379, 17.189247131347656, 53.54820251464844, -69.13345336914062, 25.388946533203125, 46.59638214111328, 43.519142150878906, -6.415828704833984, 9.576650619506836, 50.195953369140625, 73.37942504882812, 58.69934844970703, -67.32884979248047], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000170.npy"} +{"epoch": 0.25699168556311414, "step": 171, "batch_size": 64, "mean": 27.42411231994629, "std": 58.09111785888672, "min": -123.17868041992188, "p10": -38.90941925048827, "median": 27.058337211608887, "p90": 85.49695281982423, "max": 206.85394287109375, "pos_frac": 0.734375, "sample": [44.16044616699219, 6.642726898193359, 51.28096008300781, 29.990768432617188, 47.46687316894531, 77.85124206542969, 83.04649353027344, 72.11170959472656, 4.520009994506836, 5.759744644165039, -2.378316879272461, -81.13444519042969, 69.08159637451172, 36.17369842529297, 86.54714965820312, 17.80071258544922, 55.44131851196289, 25.043781280517578, 35.62675857543945, 1.7508544921875, 5.301155090332031, 25.862478256225586, 21.272632598876953, 206.85394287109375, 55.07558059692383, 0.330230712890625, -27.701248168945312, -16.67304229736328, 105.34662628173828, -55.994850158691406, 63.422691345214844, 51.359130859375, 67.8847427368164, -43.712921142578125, 29.122913360595703, 52.556827545166016, -72.45442962646484, -53.51664733886719, 76.72720336914062, 117.11276245117188, -101.15596771240234, 3.105012893676758, 72.41667175292969, 20.909805297851562, 59.3677978515625, 116.44725036621094, 124.19296264648438, 28.254196166992188, 3.7008285522460938, 60.45269775390625, 14.288589477539062, 50.71599578857422, 54.65131378173828, -18.160079956054688, -16.171470642089844, 40.28839111328125, 167.94802856445312, 2.9625091552734375, -20.702838897705078, -123.17868041992188, -11.88629150390625, -20.477951049804688, -25.577957153320312, -2.2074737548828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000171.npy"} +{"epoch": 0.2585034013605442, "step": 172, "batch_size": 64, "mean": 26.919471740722656, "std": 49.15607833862305, "min": -52.537109375, "p10": -32.03645286560058, "median": 20.526268005371094, "p90": 78.404061126709, "max": 254.56405639648438, "pos_frac": 0.6875, "sample": [-33.43614196777344, 23.09961700439453, -9.402557373046875, 40.33708190917969, -46.493316650390625, 0.42180633544921875, -0.4515056610107422, 37.45815658569336, 85.19256591796875, 73.50621032714844, 254.56405639648438, 48.147979736328125, 23.261962890625, 17.952919006347656, 60.21636962890625, 77.26036071777344, 75.03874206542969, -18.41473388671875, -13.454269409179688, 37.47868347167969, 66.14212036132812, -1.44915771484375, -34.9490966796875, 12.524505615234375, 9.819084167480469, 8.182195663452148, -35.66138458251953, 85.3968505859375, 32.623809814453125, -45.19841003417969, 32.18256759643555, 72.28336334228516, 53.9283447265625, 111.98217010498047, -7.146636962890625, 5.575254440307617, 66.62957763671875, 45.59728240966797, 73.21234130859375, -3.9064102172851562, -4.95667839050293, 25.200782775878906, 0.6792182922363281, 51.00569152832031, 34.97557830810547, -28.770511627197266, -27.5816650390625, -35.499839782714844, 12.029067993164062, -3.9147987365722656, -21.114273071289062, 81.24066162109375, 78.89421844482422, -52.537109375, 8.083379745483398, 55.69148254394531, 9.328941345214844, 12.244361877441406, 4.222227096557617, 65.33172607421875, 23.677520751953125, -7.453824996948242, 75.34432220458984, 86.67327880859375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000172.npy"} +{"epoch": 0.2600151171579743, "step": 173, "batch_size": 64, "mean": 33.819602966308594, "std": 62.85457992553711, "min": -82.49879455566406, "p10": -26.856155204772946, "median": 20.070772171020508, "p90": 125.15487518310549, "max": 217.4893798828125, "pos_frac": 0.6875, "sample": [-25.666879653930664, -60.63880920410156, -13.26284408569336, 43.876461029052734, 54.22899627685547, 90.01744079589844, 1.8051776885986328, 3.2119293212890625, -0.3193473815917969, 52.912376403808594, 5.276660919189453, 42.24163055419922, 217.4893798828125, -8.682334899902344, -0.12235832214355469, 21.334701538085938, 143.24659729003906, 4.0919952392578125, 61.70111083984375, -1.3816642761230469, 104.15005493164062, 54.3123779296875, -11.144855499267578, 121.3985595703125, -66.99732971191406, -82.49879455566406, 29.243934631347656, 36.462890625, 18.806842803955078, -9.115665435791016, -14.547760009765625, 143.8408966064453, -19.490772247314453, 84.82646179199219, 60.4356689453125, 5.335416793823242, 55.1204948425293, 13.125713348388672, 54.58782196044922, 126.76472473144531, 32.54902648925781, 3.8102951049804688, 2.4336605072021484, 94.66381072998047, 47.46660232543945, -46.84368896484375, 15.975288391113281, -45.55687713623047, 185.7393798828125, 46.20567321777344, 74.55963134765625, -72.89482879638672, 32.90338134765625, 151.16217041015625, 60.5980110168457, 14.577201843261719, -16.540252685546875, -11.299739837646484, 46.887184143066406, 188.88372802734375, 6.6027069091796875, 57.966678619384766, -14.005535125732422, -27.3658447265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000173.npy"} +{"epoch": 0.2615268329554044, "step": 174, "batch_size": 64, "mean": 37.283050537109375, "std": 50.09931182861328, "min": -74.63858032226562, "p10": -18.131761169433595, "median": 35.76353454589844, "p90": 96.6236427307129, "max": 167.67251586914062, "pos_frac": 0.734375, "sample": [66.15091705322266, 76.51426696777344, 33.95063018798828, -0.0006618499755859375, 19.571077346801758, 56.209503173828125, 25.82341766357422, 73.02471923828125, 37.576438903808594, 33.22731399536133, 70.32840728759766, 90.56693267822266, 18.86194610595703, -17.880401611328125, 121.74481201171875, -2.1796092987060547, 71.9410400390625, 69.16217041015625, 69.8826904296875, -72.18706512451172, 66.55012512207031, 12.805976867675781, -5.993747711181641, 69.1156997680664, 99.36458587646484, -1.3934345245361328, 167.67251586914062, 44.9288330078125, 67.51942443847656, 67.69989013671875, 19.35138702392578, 101.05075073242188, 94.67768096923828, -1.7774848937988281, 65.36375427246094, 143.02392578125, 81.95777893066406, -61.9798583984375, -0.1255950927734375, 97.45762634277344, -19.172348022460938, 118.19358825683594, 9.092426300048828, 5.654834747314453, 4.708351135253906, 84.58023834228516, 72.28683471679688, -47.68744659423828, 59.87294006347656, 40.29231262207031, -15.479042053222656, 55.0421142578125, 32.819297790527344, -3.1878929138183594, 58.83125305175781, 10.852546691894531, 57.11267852783203, -43.861671447753906, 33.92731857299805, -74.63858032226562, 1.015188217163086, 1.2721099853515625, -18.239486694335938, -6.730644226074219], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000174.npy"} +{"epoch": 0.26303854875283444, "step": 175, "batch_size": 64, "mean": 35.31770706176758, "std": 63.79440689086914, "min": -107.29620361328125, "p10": -35.046549987792964, "median": 32.70631790161133, "p90": 110.50601577758793, "max": 257.4674377441406, "pos_frac": 0.6875, "sample": [-66.6393051147461, 60.67713165283203, -15.129806518554688, -3.0275516510009766, -34.20256805419922, 66.94706726074219, 0.9662361145019531, 151.70162963867188, 61.74871826171875, -6.981147766113281, -17.855796813964844, 92.3233413696289, 51.32933044433594, 73.99343872070312, -16.368457794189453, 76.28661346435547, 77.36332702636719, 51.13970184326172, 65.8334732055664, -14.295097351074219, 30.402420043945312, 15.310630798339844, 27.482421875, 146.63279724121094, 99.63850402832031, 27.63812255859375, -21.022640228271484, -31.482177734375, 78.57630157470703, -89.91702270507812, 58.17274475097656, -36.918617248535156, 88.37889862060547, -35.40825653076172, 14.907546997070312, 12.162403106689453, 46.594818115234375, 70.07866668701172, 2.656953811645508, -76.80503845214844, 35.010215759277344, -107.29620361328125, 13.136062622070312, 41.36094665527344, -39.134521484375, 48.86235046386719, 76.29783630371094, -20.5125732421875, 115.16352081298828, 86.14958953857422, 8.312183380126953, 70.31954956054688, 41.784629821777344, -24.43378448486328, 30.35137176513672, 119.50609588623047, -15.158647537231445, 116.44735717773438, 88.67613220214844, 257.4674377441406, 10.451980590820312, 150.8972930908203, -11.509437561035156, 85.29399108886719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000175.npy"} +{"epoch": 0.26455026455026454, "step": 176, "batch_size": 64, "mean": 23.626522064208984, "std": 71.45909881591797, "min": -155.1552276611328, "p10": -59.82547454833984, "median": 13.462091445922852, "p90": 110.5086624145508, "max": 295.76251220703125, "pos_frac": 0.65625, "sample": [132.0876007080078, -92.2824478149414, 93.26232147216797, 28.33600616455078, 1.95538330078125, -5.886932373046875, -42.760520935058594, 22.30426788330078, 0.3431415557861328, 1.55914306640625, -105.98049926757812, -47.02001190185547, -16.294612884521484, 31.242431640625, -62.65422821044922, 75.17874145507812, -10.656116485595703, 54.49857711791992, -13.944046020507812, -6.0828094482421875, 20.575027465820312, 21.626441955566406, 23.732465744018555, 114.05128479003906, 176.18014526367188, 53.787452697753906, -63.791526794433594, 81.3596420288086, 35.340599060058594, 82.44772338867188, -53.99699401855469, 4.5738677978515625, 9.426177978515625, 113.37361145019531, 47.208900451660156, 87.7454833984375, 13.361465454101562, 112.16009521484375, 13.56271743774414, -50.42822265625, -63.16210174560547, 131.10195922851562, 68.80628204345703, 82.80953979492188, 48.48738098144531, 6.236278533935547, -52.567657470703125, -31.714149475097656, 36.41899108886719, 106.65531921386719, -20.611190795898438, 64.7765884399414, 1.371246337890625, 66.62922668457031, 10.172134399414062, 13.01529312133789, 101.213134765625, -2.7142181396484375, -34.05244445800781, 295.76251220703125, -7.541984558105469, -155.1552276611328, 28.982215881347656, -62.323394775390625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000176.npy"} +{"epoch": 0.2660619803476946, "step": 177, "batch_size": 64, "mean": 49.94912338256836, "std": 51.39891815185547, "min": -38.092010498046875, "p10": -13.51575012207031, "median": 51.96393585205078, "p90": 111.3998474121094, "max": 193.10079956054688, "pos_frac": 0.828125, "sample": [-23.238235473632812, 22.095726013183594, 64.49439239501953, 137.4227752685547, 49.373565673828125, -3.218151092529297, 79.93108367919922, 79.76806640625, 158.27574157714844, 67.16001892089844, 121.1505355834961, 82.98756408691406, 25.12368392944336, -15.944255828857422, 64.65016174316406, -32.85436248779297, 84.16819763183594, 20.365875244140625, -8.298141479492188, 9.036102294921875, 9.387161254882812, 7.3558502197265625, 7.7823486328125, 67.2486801147461, 36.77969741821289, 61.10902404785156, 33.82042694091797, 85.35167694091797, -10.815877914428711, 12.172168731689453, 98.11508178710938, 5.832977294921875, 65.06227111816406, -14.34259033203125, 30.000627517700195, 89.5029296875, 85.7671890258789, 6.986351013183594, 50.842315673828125, 68.4251480102539, 29.95362091064453, 72.60206604003906, 105.80300903320312, 148.59263610839844, 98.72097778320312, -11.586456298828125, -35.191009521484375, 53.102752685546875, 113.79849243164062, 15.200981140136719, 169.27294921875, 20.666601181030273, -38.092010498046875, 89.9891357421875, 46.18083190917969, 57.409156799316406, 72.05911254882812, 193.10079956054688, 58.20001220703125, 53.08555603027344, 33.748199462890625, 86.34124755859375, -20.927703857421875, 5.879341125488281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000177.npy"} +{"epoch": 0.2675736961451247, "step": 178, "batch_size": 64, "mean": 34.32612609863281, "std": 54.11427307128906, "min": -63.19164276123047, "p10": -21.23418350219726, "median": 23.001148223876953, "p90": 103.51495819091798, "max": 223.653564453125, "pos_frac": 0.6875, "sample": [55.602256774902344, 78.36959838867188, 10.015403747558594, -15.338424682617188, 65.4514389038086, -15.855737686157227, 12.114112854003906, 71.30032348632812, 92.46124267578125, 31.67721939086914, -22.471099853515625, 223.653564453125, 99.40144348144531, -51.916221618652344, 23.550430297851562, 12.545623779296875, 82.51152038574219, 1.8016204833984375, 15.666603088378906, -6.075370788574219, 88.23583984375, 34.346168518066406, 36.30052185058594, -5.969024658203125, -24.175662994384766, 90.81787109375, -18.348045349121094, 87.91323852539062, -16.51685333251953, 12.838289260864258, 43.10699462890625, -0.5365409851074219, 21.55181121826172, 114.17337036132812, 32.50196838378906, 22.451866149902344, -12.199554443359375, 16.96910858154297, 117.54386138916016, 18.683013916015625, -25.918304443359375, 38.51112365722656, 79.39645385742188, 10.787227630615234, -53.50788879394531, 105.27789306640625, -16.399307250976562, -52.26554870605469, 41.662841796875, -3.0841140747070312, 74.27647399902344, 130.23106384277344, 25.687978744506836, 68.34204864501953, 39.72388458251953, 14.209693908691406, 145.4435577392578, -15.312522888183594, -63.19164276123047, -8.050491333007812, 40.00294494628906, 109.49555206298828, 89.22132110595703, -1.8220653533935547], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000178.npy"} +{"epoch": 0.2690854119425548, "step": 179, "batch_size": 64, "mean": 35.03150177001953, "std": 65.55333709716797, "min": -92.59703063964844, "p10": -44.54313354492187, "median": 29.571091651916504, "p90": 113.02062149047853, "max": 218.15504455566406, "pos_frac": 0.703125, "sample": [62.594207763671875, -62.46722412109375, -27.356277465820312, 73.83251953125, 10.479103088378906, 48.85865783691406, 31.54203224182129, 41.78155517578125, 85.45376586914062, 65.55021667480469, 87.72977447509766, 63.05638122558594, 85.50885772705078, -56.75395202636719, 6.541051864624023, -73.74828338623047, -91.6487808227539, -45.33574676513672, -20.618988037109375, 7.980476379394531, -6.751211166381836, 83.786865234375, -24.005142211914062, 149.19723510742188, 56.088653564453125, -92.59703063964844, -25.08330535888672, 94.35823059082031, 27.60015106201172, 76.2948989868164, 38.39893341064453, 123.38813018798828, 9.288558959960938, 25.238983154296875, 3.7008590698242188, 1.9321022033691406, -26.141647338867188, 205.6206512451172, 114.62762451171875, 6.124015808105469, 38.90556335449219, 107.40856170654297, 80.06249237060547, -3.9233055114746094, -60.39818572998047, 109.88609313964844, 155.97494506835938, -26.340545654296875, 114.3639907836914, 218.15504455566406, 101.15994262695312, 66.080810546875, 90.1495361328125, -6.165397644042969, 48.12813949584961, 45.879608154296875, 38.28650665283203, 7.9312286376953125, 20.926223754882812, -17.10405731201172, -42.693702697753906, 0.4167137145996094, 24.26612091064453, -3.386964797973633], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000179.npy"} +{"epoch": 0.2705971277399849, "step": 180, "batch_size": 64, "mean": 25.78931427001953, "std": 56.636104583740234, "min": -119.43909454345703, "p10": -25.79564132690429, "median": 10.824772834777832, "p90": 101.43201904296878, "max": 159.32601928710938, "pos_frac": 0.625, "sample": [36.327911376953125, 8.744621276855469, 19.374670028686523, 22.62763214111328, -2.0700225830078125, 31.26502227783203, -7.7909698486328125, 54.15555191040039, -11.386449813842773, -4.8048858642578125, 76.6760025024414, -4.200115203857422, -1.11474609375, -10.432340621948242, 34.305633544921875, 10.818107604980469, -59.856231689453125, 137.6186065673828, -30.01943588256836, -113.32122802734375, 133.4422607421875, 39.08074188232422, 0.26056671142578125, 143.6569061279297, 0.7589645385742188, 61.283897399902344, -13.723278045654297, -11.543113708496094, 10.831438064575195, 73.15167236328125, 27.49017333984375, 77.17617797851562, -13.729179382324219, 66.66485595703125, 14.787578582763672, 28.903839111328125, 9.63211441040039, 159.32601928710938, -30.138471603393555, -5.138574600219727, 4.67411994934082, 95.18510437011719, 88.5910873413086, -15.319786071777344, -15.940120697021484, -1.3378582000732422, 136.5593719482422, 8.507698059082031, 104.10926818847656, 40.18470001220703, 73.05191802978516, -32.53485870361328, 64.09053039550781, -13.691463470458984, 42.101112365722656, -5.318531036376953, 105.33045196533203, -119.43909454345703, 69.3953857421875, -85.86880493164062, 2.8317413330078125, 64.6441879272461, 82.01589965820312, -0.3979053497314453], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000180.npy"} +{"epoch": 0.272108843537415, "step": 181, "batch_size": 64, "mean": 42.71110916137695, "std": 60.33169937133789, "min": -103.15669250488281, "p10": -11.316823577880859, "median": 32.947853088378906, "p90": 124.15504913330079, "max": 242.18252563476562, "pos_frac": 0.734375, "sample": [22.75926971435547, 4.870853424072266, -11.476516723632812, -0.27523040771484375, 49.999141693115234, -8.015533447265625, 48.46800994873047, -33.934844970703125, 13.05606460571289, -3.240589141845703, 76.54306030273438, 12.03036117553711, 39.48904037475586, 19.640792846679688, 47.386444091796875, -3.5255813598632812, 95.50764465332031, -36.102569580078125, 103.32695770263672, -103.15669250488281, 134.77853393554688, 125.44479370117188, 60.39899826049805, 16.31842041015625, 141.722900390625, 114.75202941894531, 17.377029418945312, 141.27345275878906, -81.33688354492188, -33.617584228515625, 70.89581298828125, -9.04302978515625, 242.18252563476562, -4.574176788330078, 121.14564514160156, 13.477302551269531, -11.59872817993164, 149.7716827392578, 6.963829040527344, 39.916282653808594, 83.46958923339844, -5.341724395751953, 118.95997619628906, 121.0578842163086, 77.37820434570312, 10.945291519165039, 45.79405212402344, 49.942138671875, 15.728385925292969, 9.393575668334961, 17.00104522705078, 93.6646728515625, 133.0586395263672, -10.944206237792969, 57.43070983886719, 72.20317840576172, 32.3594970703125, 33.53620910644531, 8.932863235473633, -10.128620147705078, 98.61558532714844, -2.381288528442383, 57.76898193359375, 35.467315673828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000181.npy"} +{"epoch": 0.273620559334845, "step": 182, "batch_size": 64, "mean": 44.49578094482422, "std": 68.44499206542969, "min": -103.9338607788086, "p10": -31.023181915283203, "median": 38.55175018310547, "p90": 143.84914703369145, "max": 241.97503662109375, "pos_frac": 0.734375, "sample": [95.77049255371094, -69.64408111572266, -39.51163864135742, 38.29137420654297, 13.295928955078125, 92.04154205322266, 44.826934814453125, -54.94489288330078, -1.51171875, 60.14198303222656, 61.089542388916016, 4.447784423828125, 155.17239379882812, 50.5101318359375, 99.19384002685547, 195.72610473632812, -2.9920501708984375, 6.769676208496094, 83.42387390136719, 33.204017639160156, 74.71925354003906, 4.111118316650391, 43.754302978515625, -7.76336669921875, 67.40209197998047, 24.752578735351562, 92.65703582763672, 1.8156013488769531, 241.97503662109375, 109.7376708984375, 187.87957763671875, 44.98927307128906, 34.29710388183594, 18.93838882446289, 172.30865478515625, -29.711669921875, -26.683334350585938, 25.403244018554688, 190.955078125, -0.42820167541503906, 26.565818786621094, -81.36795043945312, 130.25128173828125, 54.59156799316406, 85.13998413085938, 149.6768035888672, 1.65362548828125, -31.989715576171875, 53.06172180175781, 38.81212615966797, 68.79755401611328, 64.21136474609375, -16.86119842529297, 78.85042572021484, 87.1909408569336, -0.18826866149902344, -14.392852783203125, 9.56843376159668, 73.20533752441406, 11.683366775512695, 59.568885803222656, -31.58525848388672, -1.190908432006836, -103.9338607788086], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000182.npy"} +{"epoch": 0.2751322751322751, "step": 183, "batch_size": 64, "mean": 18.806425094604492, "std": 70.02229309082031, "min": -179.90621948242188, "p10": -60.158447265625, "median": 7.735899925231934, "p90": 120.2717338562012, "max": 180.7825164794922, "pos_frac": 0.625, "sample": [22.123443603515625, 114.60195922851562, -42.31651306152344, -16.208324432373047, 8.592777252197266, 31.046653747558594, -88.44844055175781, 148.16351318359375, -61.04582214355469, -46.0650634765625, -93.6608657836914, -9.1954345703125, 80.69913482666016, -34.05406188964844, 67.02611541748047, 9.196552276611328, 2.5733795166015625, 88.87483215332031, 86.74432373046875, 46.518577575683594, 97.45079040527344, -58.08790588378906, 94.7197494506836, -9.52916145324707, 16.1494140625, -16.97869873046875, 87.51240539550781, 10.034400939941406, -129.81993103027344, -21.527908325195312, 70.0568618774414, -90.55926513671875, 1.6917495727539062, 61.72119140625, -21.910621643066406, -1.2990188598632812, 36.16168975830078, 124.590087890625, -179.90621948242188, 65.79055786132812, -15.60763168334961, 0.8221282958984375, 132.05276489257812, -27.773296356201172, 6.649744033813477, 6.510313034057617, 6.879022598266602, 0.5792560577392578, -91.25547790527344, -18.45478630065918, 92.55033111572266, 46.1556396484375, 3.348712921142578, 11.079399108886719, 153.3714599609375, -20.58069610595703, 122.7016372680664, -12.848541259765625, -5.504997253417969, 11.86248779296875, 17.707130432128906, 180.7825164794922, 25.246997833251953, 125.91020202636719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000183.npy"} +{"epoch": 0.2766439909297052, "step": 184, "batch_size": 64, "mean": 37.276283264160156, "std": 56.81777572631836, "min": -73.6332015991211, "p10": -13.499717712402344, "median": 24.25734233856201, "p90": 106.55242614746095, "max": 188.70071411132812, "pos_frac": 0.6875, "sample": [-3.0226898193359375, 74.7368392944336, 95.93359375, 7.046913146972656, 38.00495147705078, 28.959625244140625, -4.528079986572266, -12.762199401855469, 19.374122619628906, -39.187644958496094, -2.637744903564453, 107.26371765136719, -0.42668724060058594, 42.70557403564453, 86.55760192871094, -15.739002227783203, 44.764251708984375, 17.502704620361328, 43.99013900756836, 186.4839630126953, -73.6332015991211, 93.67474365234375, 100.00767517089844, 104.89274597167969, -66.76702880859375, 133.54823303222656, 71.59574127197266, 85.12062072753906, 78.2841796875, 17.30535888671875, 178.0833740234375, 10.307113647460938, 16.82293701171875, -64.45263671875, -8.117313385009766, 51.488037109375, 112.46522521972656, 26.208478927612305, -24.714393615722656, 126.08429718017578, 48.713722229003906, 33.569061279296875, 87.34949493408203, 1.3920516967773438, 47.52619934082031, -0.18408584594726562, 46.58135986328125, 59.3818359375, -2.188444137573242, 98.6117935180664, -12.976015090942383, -9.582687377929688, 22.30620574951172, 6.2330169677734375, 188.70071411132812, 56.13328552246094, -3.727853775024414, -13.287429809570312, 2.7373485565185547, 16.47461700439453, -13.5906982421875, 45.45330047607422, 0.44078636169433594, -3.6093368530273438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000184.npy"} +{"epoch": 0.2781557067271353, "step": 185, "batch_size": 64, "mean": 40.499412536621094, "std": 76.19145202636719, "min": -168.89915466308594, "p10": -48.683709335327144, "median": 51.60733604431152, "p90": 128.08662719726564, "max": 194.22171020507812, "pos_frac": 0.640625, "sample": [8.292377471923828, 106.2852554321289, 9.342796325683594, 95.95611572265625, 169.4317169189453, -49.336490631103516, 17.58407211303711, 51.14065170288086, -49.016571044921875, -10.689346313476562, 135.0822296142578, 88.22216033935547, -24.86937713623047, -22.248092651367188, 76.64714050292969, -0.33516693115234375, 129.777587890625, 31.403518676757812, 100.29335021972656, 99.54530334472656, 1.79901123046875, 115.73023986816406, 77.39236450195312, 34.41669464111328, -4.38630485534668, 110.19126892089844, 73.95189666748047, 60.654869079589844, 52.07402038574219, 194.22171020507812, -44.86338424682617, -36.48594665527344, 81.08515167236328, 124.14105224609375, -101.4336929321289, 105.88248443603516, 99.470947265625, 58.75691223144531, 101.79002380371094, -8.499019622802734, -123.99918365478516, 89.19278717041016, -15.436473846435547, 111.50669860839844, 136.71237182617188, -168.89915466308594, 100.12118530273438, -5.899003982543945, 173.66009521484375, -13.174615859985352, 148.3736114501953, -68.76678466796875, 103.39558410644531, -19.73261260986328, -34.121307373046875, 85.95126342773438, -47.90703201293945, -57.7109375, 16.53958511352539, -35.0050048828125, -43.30649948120117, 29.76595687866211, 66.10597229003906, 106.19642639160156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000185.npy"} +{"epoch": 0.2796674225245654, "step": 186, "batch_size": 64, "mean": 43.943119049072266, "std": 87.81253051757812, "min": -168.51388549804688, "p10": -47.74979743957518, "median": 27.71319580078125, "p90": 130.65536193847657, "max": 332.3638916015625, "pos_frac": 0.6875, "sample": [54.48963928222656, 118.0555419921875, -56.97064971923828, 3.006084442138672, 98.30150604248047, 18.708023071289062, 24.84703826904297, -64.92281341552734, -13.977378845214844, 135.0538330078125, 47.51652526855469, -25.270259857177734, 2.6273651123046875, 22.956836700439453, 332.3638916015625, 100.19873046875, 0.5317611694335938, 25.828582763671875, 42.08183288574219, -166.55543518066406, 14.49129867553711, 98.28136444091797, 94.63829803466797, 132.07174682617188, -168.51388549804688, -17.520050048828125, -10.688095092773438, -10.656673431396484, -18.27893829345703, -27.402652740478516, -18.16644287109375, 18.96195411682129, 114.77276611328125, -83.02719116210938, -26.347440719604492, -13.38082504272461, 88.11919403076172, 126.15990447998047, 101.19440460205078, -32.38651657104492, -19.970077514648438, -71.23406219482422, 31.460025787353516, -4.68841552734375, 125.18330383300781, 3.4564666748046875, 90.63884735107422, 114.82858276367188, -54.33406066894531, 43.35047149658203, 200.08042907714844, 4.039739608764648, 160.55154418945312, 288.51953125, 107.4983901977539, 53.034881591796875, 171.63902282714844, 97.64511108398438, 94.6041259765625, 79.28791046142578, 25.251007080078125, 127.3504638671875, 29.597808837890625, 53.3758430480957], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000186.npy"} +{"epoch": 0.2811791383219955, "step": 187, "batch_size": 64, "mean": 38.2210693359375, "std": 79.06280517578125, "min": -154.24038696289062, "p10": -62.21356506347654, "median": 34.89251708984375, "p90": 130.45771026611328, "max": 249.51756286621094, "pos_frac": 0.703125, "sample": [22.87377166748047, -106.30062866210938, 186.7967987060547, 134.050537109375, 249.51756286621094, 60.48710632324219, 77.54926300048828, 35.30951690673828, 54.53558349609375, 82.51456451416016, -106.02999877929688, 53.0562744140625, -22.946186065673828, 97.69413757324219, 56.033355712890625, 6.413442611694336, -2.210935592651367, 47.61854553222656, 63.42301940917969, 4.041055679321289, -109.43240356445312, -85.426025390625, 124.00995635986328, 3.353893280029297, 8.465988159179688, -38.97611999511719, 12.272632598876953, 59.94122314453125, -5.9703369140625, 130.48959350585938, 76.20394897460938, 130.38331604003906, -27.035606384277344, -22.765727996826172, 67.657470703125, 50.83709716796875, 7.045848846435547, 34.47551727294922, -78.17557525634766, 67.61727905273438, -18.15994644165039, -72.17247009277344, 88.093017578125, 5.368268966674805, -16.895164489746094, 101.90592193603516, 173.13084411621094, -5.410652160644531, 18.47900390625, 102.60809326171875, 81.91181945800781, -154.24038696289062, 124.0555648803711, 243.350341796875, 5.187360763549805, 79.84675598144531, 160.874755859375, 69.13687133789062, -12.617012023925781, 43.63105010986328, -20.393760681152344, -17.90523910522461, 32.49102783203125, 34.47373962402344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000187.npy"} +{"epoch": 0.28269085411942557, "step": 188, "batch_size": 64, "mean": 38.517242431640625, "std": 70.37803649902344, "min": -149.92120361328125, "p10": -23.963814926147457, "median": 22.23183822631836, "p90": 125.53130111694337, "max": 300.50762939453125, "pos_frac": 0.71875, "sample": [53.65612030029297, -17.98320960998535, 99.81993103027344, -17.19472312927246, -19.13030242919922, 175.98561096191406, 141.72035217285156, 14.617715835571289, 101.92758178710938, 74.86172485351562, -10.934524536132812, 300.50762939453125, 121.947021484375, 31.37751007080078, -10.017688751220703, 9.657493591308594, 71.43721771240234, 109.87450408935547, -11.17776107788086, 1.0398693084716797, 54.46781921386719, 21.206954956054688, -35.608734130859375, -15.71527099609375, 27.479698181152344, 15.958030700683594, -51.9299430847168, 70.80537414550781, 1.8146400451660156, 107.07554626464844, 18.479717254638672, 26.067413330078125, 35.532073974609375, 186.944091796875, 164.44140625, -7.296741485595703, 88.27693176269531, 39.17958068847656, 20.37725067138672, 20.500694274902344, 138.9781494140625, -50.04034423828125, 3.641040802001953, -4.419189453125, 58.78472137451172, 39.73530578613281, -1.8580684661865234, 82.95288848876953, 23.25672149658203, 17.082508087158203, 40.57511901855469, -149.92120361328125, -26.035320281982422, 2.2602691650390625, 42.83331298828125, 8.162261962890625, 3.2152137756347656, -93.96318054199219, 78.82170104980469, 107.87954711914062, 127.06742095947266, 48.25354766845703, -13.890886306762695, -28.316558837890625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000188.npy"} +{"epoch": 0.2842025699168556, "step": 189, "batch_size": 64, "mean": 21.56586456298828, "std": 71.5008544921875, "min": -149.3134765625, "p10": -62.85268249511717, "median": 11.581321716308594, "p90": 115.22103729248047, "max": 183.85983276367188, "pos_frac": 0.65625, "sample": [-28.326873779296875, 9.537364959716797, -1.0637550354003906, 89.16867065429688, -30.369491577148438, 171.86505126953125, 129.10293579101562, 23.20869255065918, 9.333600997924805, -46.96498107910156, -133.9799346923828, 130.6317138671875, 9.992965698242188, 24.13744354248047, -12.596244812011719, -7.754280090332031, 115.44365692138672, 25.077388763427734, -4.509838104248047, 9.950366973876953, 18.977928161621094, 72.68359375, 22.23554229736328, 77.70791625976562, 24.77471923828125, 5.177989959716797, 70.11559295654297, 62.59652328491211, 3.6596298217773438, 8.605308532714844, 13.169677734375, -143.17921447753906, 39.28870391845703, 114.70159149169922, 74.45682525634766, 45.566162109375, -5.1403656005859375, -2.4421234130859375, 64.29842376708984, 28.29454803466797, 89.79499816894531, -149.3134765625, 98.80227661132812, 40.20318603515625, 20.771987915039062, -4.922611236572266, -33.13465881347656, -89.34760284423828, -41.2314453125, -1.5340194702148438, -69.66169738769531, 27.42673683166504, -87.53369140625, 183.85983276367188, 176.97808837890625, 18.366069793701172, -32.509788513183594, -94.74728393554688, 6.46934700012207, 7.334136962890625, 5.576021194458008, -24.416595458984375, 168.67794799804688, 86.87407684326172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000189.npy"} +{"epoch": 0.2857142857142857, "step": 190, "batch_size": 64, "mean": 64.08396911621094, "std": 72.68651580810547, "min": -73.75971984863281, "p10": -18.410417556762688, "median": 47.28064155578613, "p90": 176.17110900878907, "max": 196.78680419921875, "pos_frac": 0.765625, "sample": [8.574359893798828, 134.28073120117188, 165.41656494140625, -34.73832702636719, 90.23431396484375, 164.2245635986328, 33.92707824707031, 11.661956787109375, -24.625812530517578, 143.75099182128906, 83.3538589477539, 129.92369079589844, 29.274139404296875, 19.857749938964844, 60.06415557861328, -0.6212387084960938, -12.18612289428711, 187.39981079101562, 39.69996643066406, -2.835643768310547, 123.89195251464844, 94.61618041992188, -5.457733154296875, 19.939083099365234, -1.8614463806152344, 64.38434600830078, -73.75971984863281, 46.41862106323242, 16.170333862304688, 88.47064208984375, 48.142662048339844, -61.9396858215332, 182.6646270751953, 194.28125, 108.60344696044922, 4.315074920654297, 128.04080200195312, -23.676969528198242, -1.7091350555419922, 94.45509338378906, -21.077972412109375, 90.79464721679688, 148.08914184570312, 179.52423095703125, 22.618276596069336, -35.67278289794922, -7.356115341186523, 51.6485595703125, 54.870208740234375, -5.293601989746094, 42.23253631591797, 13.324264526367188, 6.576852798461914, 195.01004028320312, 42.73626708984375, 174.36605834960938, 196.78680419921875, 13.427116394042969, 96.72265625, 141.36639404296875, 24.111167907714844, 176.9447021484375, 152.2772216796875, 74.72114562988281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000190.npy"} +{"epoch": 0.2872260015117158, "step": 191, "batch_size": 64, "mean": 37.89714050292969, "std": 96.86518096923828, "min": -289.71630859375, "p10": -58.264878845214824, "median": 35.21876907348633, "p90": 161.00877990722665, "max": 340.79071044921875, "pos_frac": 0.65625, "sample": [175.29107666015625, -13.923545837402344, -12.427383422851562, 138.45144653320312, 102.96316528320312, -25.50841522216797, 195.48062133789062, 170.67620849609375, 50.385162353515625, 34.56230163574219, -73.38764953613281, 133.97201538085938, 83.28575897216797, -67.09400939941406, 35.87523651123047, 7.998584747314453, -125.00968933105469, 37.13079833984375, 340.79071044921875, 87.56128692626953, 51.50074768066406, 95.163330078125, 46.83256530761719, 116.91170501708984, -85.41613006591797, 86.83712005615234, 55.392822265625, 46.08492660522461, -37.66357421875, -2.3439579010009766, 38.90158462524414, -16.826171875, -1.761117935180664, 21.440990447998047, 191.0492706298828, -168.08909606933594, 6.783130645751953, -151.06431579589844, -5.649188995361328, 78.01785278320312, 44.79513168334961, 33.79471969604492, -5.409912109375, -6.803863525390625, 84.90974426269531, 12.822774887084961, 28.246231079101562, 122.67791748046875, -9.055585861206055, -0.144744873046875, -7.166387557983398, 52.04993438720703, 254.88531494140625, 1.0045242309570312, -12.60409164428711, 177.42312622070312, 110.17913818359375, -289.71630859375, 48.023780822753906, 37.528602600097656, 26.606369018554688, 104.0040283203125, -26.13219451904297, 0.32254791259765625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000191.npy"} +{"epoch": 0.2887377173091459, "step": 192, "batch_size": 64, "mean": 20.622892379760742, "std": 60.880653381347656, "min": -140.75430297851562, "p10": -46.22601737976074, "median": 10.679533004760742, "p90": 100.41837005615234, "max": 156.0648193359375, "pos_frac": 0.65625, "sample": [-140.75430297851562, -54.569740295410156, -16.317752838134766, 97.13536071777344, 33.96554946899414, 9.82673454284668, -13.009490966796875, 66.93060302734375, 103.03361511230469, -117.74472045898438, -16.726146697998047, -20.201026916503906, 17.77758026123047, 2.628652572631836, 52.814422607421875, -48.05448913574219, 110.52156066894531, -65.47813415527344, -35.36497116088867, 31.38152313232422, 99.02906036376953, 100.47423553466797, 4.614095687866211, -22.609909057617188, 58.62609100341797, 8.582162857055664, 83.95265197753906, 113.58895874023438, -16.48908233642578, 6.536960601806641, 29.11553955078125, 156.0648193359375, 97.94684600830078, 100.28801727294922, -19.314847946166992, -41.9595832824707, 22.111663818359375, -15.83798599243164, -91.26768493652344, 51.14775848388672, 10.387514114379883, -40.00197219848633, -101.55451965332031, 112.82222747802734, 88.18736267089844, 10.971551895141602, 5.344449996948242, 1.1574268341064453, 50.48882293701172, -29.262039184570312, 32.1558837890625, 83.54157257080078, 20.53343963623047, 45.65129852294922, -0.7129192352294922, 83.13957977294922, 8.277427673339844, -0.5071334838867188, 105.96485137939453, 76.92621612548828, 47.01734161376953, -26.806264877319336, 13.092300415039062, 0.6560955047607422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000192.npy"} +{"epoch": 0.29024943310657597, "step": 193, "batch_size": 64, "mean": 41.164764404296875, "std": 62.0535888671875, "min": -125.18783569335938, "p10": -21.439883804321287, "median": 32.22795104980469, "p90": 123.16378021240234, "max": 153.86231994628906, "pos_frac": 0.703125, "sample": [8.241561889648438, 32.56207275390625, 102.01242065429688, 30.99896240234375, 9.541078567504883, -35.08604431152344, 87.06826782226562, 70.82794952392578, -98.25558471679688, -7.161443710327148, 121.91088104248047, 7.450019836425781, 39.792930603027344, 87.003173828125, 126.93446350097656, 8.388862609863281, 80.06660461425781, -17.0009765625, 87.04830169677734, 77.45487976074219, -3.246124267578125, 94.53343200683594, 94.67742919921875, -20.142932891845703, 107.08899688720703, 153.86231994628906, 1.6692123413085938, -7.3507843017578125, -13.526611328125, 38.379066467285156, 81.0731201171875, 79.02104187011719, -13.92669677734375, 31.893829345703125, 1.1335906982421875, 18.154983520507812, -10.164207458496094, -2.0371246337890625, 79.66681671142578, 49.22113800048828, -30.183868408203125, -15.364826202392578, 116.7801513671875, 151.55902099609375, 1.6206932067871094, -57.78831481933594, 78.13508605957031, -10.433574676513672, 61.60488510131836, 144.96945190429688, 123.70073699951172, 5.19288444519043, 139.1219482421875, 109.79884338378906, 91.07781982421875, -34.14710998535156, 129.54078674316406, -125.18783569335938, 74.96337127685547, 112.90399169921875, -21.99571990966797, 12.8541259765625, -6.134735107421875, 2.1781272888183594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000193.npy"} +{"epoch": 0.29176114890400606, "step": 194, "batch_size": 64, "mean": 34.7567138671875, "std": 84.09516906738281, "min": -142.3514404296875, "p10": -63.92478256225586, "median": 14.550912857055664, "p90": 119.63060150146487, "max": 293.42987060546875, "pos_frac": 0.6875, "sample": [136.69192504882812, -61.25282287597656, 59.829124450683594, -17.305631637573242, 80.2972183227539, 23.020511627197266, -142.3514404296875, -3.0562591552734375, 13.507392883300781, 66.22653198242188, 0.7385406494140625, 69.03372192382812, 30.374954223632812, 96.72186279296875, -35.2650146484375, -60.47766876220703, 69.16297149658203, -74.9899673461914, 15.594432830810547, 3.5512161254882812, 88.57394409179688, 9.714302062988281, 60.61292266845703, 88.90184020996094, -14.597570419311523, 80.7101821899414, -65.06990814208984, 149.23345947265625, 9.222343444824219, 51.01581573486328, -0.77435302734375, 109.27162170410156, -68.53235626220703, 0.18799209594726562, -103.33247375488281, -6.682651519775391, 108.16742706298828, 240.39303588867188, 23.484844207763672, 111.98841094970703, -111.90016174316406, 8.811027526855469, 1.2579059600830078, 6.292694091796875, 96.48285675048828, 121.53569030761719, 9.089824676513672, -137.2374267578125, 274.3239440917969, 88.14474487304688, 25.739952087402344, -18.459518432617188, 53.67607116699219, 115.18539428710938, 150.1780548095703, 293.42987060546875, 10.38953971862793, 6.714441299438477, 64.49893188476562, 76.48233032226562, -3.2645320892333984, -26.975536346435547, -20.720600128173828, -1.7859859466552734], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000194.npy"} +{"epoch": 0.29327286470143615, "step": 195, "batch_size": 64, "mean": 39.91116714477539, "std": 81.48878479003906, "min": -138.49957275390625, "p10": -67.76695327758789, "median": 31.25967025756836, "p90": 133.40184173583984, "max": 230.573486328125, "pos_frac": 0.703125, "sample": [24.673538208007812, 27.131546020507812, -5.080718994140625, 28.079818725585938, 118.35464477539062, 133.76998901367188, -12.488029479980469, 51.481597900390625, 39.73155212402344, 93.69419860839844, 48.31520080566406, 112.32679748535156, -37.86480712890625, -127.63034057617188, -70.74162292480469, -131.19033813476562, -9.770740509033203, 22.801685333251953, 10.460687637329102, 115.17729949951172, -99.32591247558594, -39.00855255126953, 15.100852966308594, 110.4735107421875, 127.9193344116211, 81.9679183959961, 111.12521362304688, 121.58650207519531, 24.600135803222656, 132.54283142089844, 5.661195755004883, -14.376251220703125, 110.88633728027344, 29.342674255371094, 141.64129638671875, 6.875358581542969, -57.82731246948242, 128.7718505859375, 150.40927124023438, 128.0830535888672, 40.73194885253906, -89.97545623779297, 19.088600158691406, -48.168548583984375, -1.515869140625, 18.351293563842773, -84.54281616210938, 161.2515869140625, -138.49957275390625, 20.944387435913086, -60.82605743408203, -30.681472778320312, 120.71994018554688, 100.68391418457031, 84.00132751464844, 66.67985534667969, 230.573486328125, 57.71897888183594, -50.52607727050781, 165.5316925048828, 72.89656829833984, 33.176666259765625, 147.6505126953125, 71.36859130859375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000195.npy"} +{"epoch": 0.2947845804988662, "step": 196, "batch_size": 64, "mean": 39.7891960144043, "std": 65.32875061035156, "min": -133.0670623779297, "p10": -17.484213829040524, "median": 27.27680015563965, "p90": 121.7770477294922, "max": 266.21685791015625, "pos_frac": 0.75, "sample": [74.96409606933594, -133.0670623779297, -1.892782211303711, -41.51557922363281, 62.788787841796875, -18.549821853637695, -45.27159881591797, 38.867576599121094, 66.22391510009766, -18.743072509765625, 6.527587890625, 126.7160415649414, 87.62980651855469, 33.06132507324219, 12.099571228027344, 50.44379425048828, -130.9453125, 139.1888427734375, 19.390174865722656, 36.517913818359375, -10.218433380126953, 9.749359130859375, 111.58413696289062, -2.8625221252441406, -14.774463653564453, 30.384178161621094, 120.66452026367188, 43.62999725341797, 85.49746704101562, 266.21685791015625, 25.34897232055664, 14.239742279052734, 39.69874572753906, -9.241792678833008, 4.091056823730469, 29.204627990722656, 5.868169784545898, 95.37799072265625, -10.760391235351562, 71.5916748046875, 46.511566162109375, 77.162353515625, 110.01658630371094, -4.269985198974609, 59.25334930419922, -25.329742431640625, 12.493085861206055, 134.67083740234375, 109.3418960571289, 45.10748291015625, 103.982421875, 106.7435302734375, 24.271087646484375, -14.997795104980469, 1.2078990936279297, 122.25384521484375, 18.822744369506836, 133.57183837890625, 9.641948699951172, -0.1708526611328125, 10.605949401855469, 10.396610260009766, 7.6437225341796875, 177.85406494140625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000196.npy"} +{"epoch": 0.2962962962962963, "step": 197, "batch_size": 64, "mean": 46.079002380371094, "std": 87.25184631347656, "min": -196.93048095703125, "p10": -39.57163391113281, "median": 30.407302856445312, "p90": 146.83631134033206, "max": 271.3497619628906, "pos_frac": 0.78125, "sample": [1.3830299377441406, 174.24810791015625, -6.056781768798828, 42.348663330078125, 271.3497619628906, 110.87647247314453, -23.333621978759766, -196.93048095703125, 5.675762176513672, 19.96959686279297, 36.48970031738281, 139.0091094970703, -90.1159439086914, -26.362041473388672, 15.396373748779297, 158.61770629882812, 127.83838653564453, 49.338130950927734, 119.92437744140625, 165.17080688476562, 48.682456970214844, 10.721221923828125, 47.19646072387695, 91.91780853271484, 24.324905395507812, 14.431182861328125, 66.04457092285156, -34.38958740234375, 96.43132781982422, 63.819313049316406, 123.5932846069336, 249.08489990234375, -187.75399780273438, 100.11658477783203, -13.179996490478516, -139.10923767089844, 23.19363784790039, 24.146240234375, 79.94500732421875, 17.626556396484375, -8.242799758911133, 135.42825317382812, 113.94873046875, -41.792510986328125, 5.8098297119140625, 115.74737548828125, 22.862380981445312, -1.1684417724609375, 6.767017364501953, 94.552490234375, -50.91949462890625, 20.17095184326172, 80.32097625732422, 79.76786804199219, 8.08739185333252, -99.53878021240234, 176.04876708984375, 110.29817199707031, 6.5333404541015625, 97.20198059082031, 18.625930786132812, 150.19082641601562, 8.620416641235352, 98.05599975585938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000197.npy"} +{"epoch": 0.29780801209372637, "step": 198, "batch_size": 64, "mean": 42.00470733642578, "std": 70.55123138427734, "min": -149.5695037841797, "p10": -42.119448661804185, "median": 36.84178161621094, "p90": 116.32732315063477, "max": 209.26458740234375, "pos_frac": 0.734375, "sample": [-70.06671142578125, 114.76839447021484, 4.883188247680664, 116.83154296875, 65.92564392089844, 38.792259216308594, -102.43109130859375, 27.17742156982422, 89.33871459960938, 34.3721923828125, 93.22233581542969, 68.00184631347656, 2.7185325622558594, -87.83468627929688, 120.70030975341797, 90.21502685546875, 83.63409423828125, 209.26458740234375, 178.92282104492188, -26.857690811157227, 98.23968505859375, 93.4330062866211, 136.34188842773438, -24.017560958862305, 112.84941101074219, -149.5695037841797, -52.28453063964844, 5.118946075439453, 75.68594360351562, 110.08264923095703, 108.61201477050781, 7.78082275390625, -48.66020202636719, -124.18687438964844, 21.67929458618164, -2.9064483642578125, 9.181026458740234, -17.913131713867188, -19.927047729492188, 79.69268798828125, 29.042388916015625, 119.84209442138672, 98.91815185546875, 35.25184631347656, 50.356224060058594, 38.43171691894531, -16.815963745117188, 115.15081024169922, 108.29653930664062, 15.092880249023438, -12.795722961425781, 24.542938232421875, 136.6207275390625, 102.24162292480469, -0.0261993408203125, 54.340179443359375, 76.09555053710938, 24.971189498901367, -9.732316970825195, 13.351554870605469, 106.63917541503906, 22.76390838623047, -7.108856201171875, 92.01998138427734], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000198.npy"} +{"epoch": 0.29931972789115646, "step": 199, "batch_size": 64, "mean": 37.502113342285156, "std": 75.60433959960938, "min": -128.31517028808594, "p10": -55.2022819519043, "median": 28.783645629882812, "p90": 135.95650787353517, "max": 204.3214569091797, "pos_frac": 0.71875, "sample": [46.399288177490234, 102.44415283203125, 28.995162963867188, 5.116355895996094, -56.220130920410156, -111.95722961425781, 85.7291030883789, 94.98189544677734, 66.92594146728516, 67.75387573242188, 137.947509765625, -29.272804260253906, 163.7886199951172, 23.85956573486328, 120.79379272460938, 20.6318359375, 25.484264373779297, 89.7929916381836, 45.134521484375, 131.3108367919922, -21.516403198242188, -26.40926742553711, 49.936466217041016, 143.0484161376953, 1.0181140899658203, 75.9061050415039, 28.572128295898438, -63.00291442871094, 30.37757110595703, 122.9646224975586, 106.15583801269531, 66.44996643066406, 11.801067352294922, -26.40998077392578, -7.153048515319824, 112.16267395019531, 125.54837036132812, -32.35033416748047, -36.98200225830078, 56.39884948730469, -128.31517028808594, 192.98190307617188, 63.11201477050781, 105.59355926513672, -117.5091552734375, 15.990699768066406, 2.779050827026367, 10.815010070800781, -18.369461059570312, 63.792022705078125, 2.4127063751220703, -103.97126770019531, -52.827301025390625, 28.28237533569336, -35.79045104980469, 5.063056945800781, 110.05235290527344, -74.90717315673828, 149.4567413330078, 2.9903526306152344, 139.531494140625, -12.764602661132812, 204.3214569091797, 71.25923156738281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000199.npy"} +{"epoch": 0.30083144368858655, "step": 200, "batch_size": 64, "mean": 61.18167495727539, "std": 94.56924438476562, "min": -187.9285125732422, "p10": -46.9897819519043, "median": 49.66714096069336, "p90": 165.48570556640627, "max": 300.1242370605469, "pos_frac": 0.75, "sample": [4.721839904785156, 41.92685317993164, 300.1242370605469, 104.68730926513672, 115.50529479980469, -4.131660461425781, 143.63587951660156, 59.54291534423828, 4.650909423828125, -47.48616027832031, 69.84744262695312, 278.3159484863281, -59.79738235473633, 158.52825927734375, 22.29975128173828, -25.265254974365234, 43.15595245361328, -22.168182373046875, 168.46746826171875, 97.69384765625, 45.23088073730469, -92.14592742919922, 175.21047973632812, 150.97021484375, 0.3236198425292969, 102.87565612792969, 124.4051284790039, -66.25917053222656, 102.06527709960938, -15.584857940673828, 109.32881927490234, 51.22382354736328, 96.44452667236328, 281.8202819824219, 143.07232666015625, -13.699277877807617, -187.9285125732422, 110.70173645019531, 153.28848266601562, 124.13013458251953, 140.8040771484375, 100.70858764648438, 175.2624053955078, 50.295814514160156, 27.5133056640625, -126.19584655761719, -2.68572998046875, 36.385650634765625, 2.3209667205810547, 157.0722198486328, -45.831565856933594, 27.554359436035156, 49.03846740722656, -43.931907653808594, 114.65454864501953, 60.35597229003906, -24.1589412689209, -64.71826171875, 36.27948760986328, 40.31377029418945, 8.136098861694336, 3.4308176040649414, 123.30760192871094, 219.98635864257812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000200.npy"} +{"epoch": 0.30234315948601664, "step": 201, "batch_size": 64, "mean": 39.440101623535156, "std": 83.38401794433594, "min": -236.60531616210938, "p10": -62.917276763916, "median": 35.480873107910156, "p90": 127.33885650634768, "max": 201.5816650390625, "pos_frac": 0.734375, "sample": [19.263809204101562, 104.62890625, 33.3177375793457, 99.5823745727539, 38.69551086425781, 2.0663223266601562, 67.12808227539062, 2.1036434173583984, 93.7730484008789, -12.896957397460938, 113.2991714477539, 97.30784606933594, 110.0009536743164, 84.43072509765625, -3.4152698516845703, -236.60531616210938, 197.64767456054688, 91.72431182861328, -8.092315673828125, 115.93949890136719, -48.93346405029297, 5.720100402832031, 65.51750946044922, -102.21672058105469, 10.233039855957031, 112.7086181640625, 36.89075469970703, 34.935768127441406, -13.234018325805664, 139.2265625, 15.501575469970703, 129.83522033691406, 20.011411666870117, -68.91033935546875, 98.5899658203125, 138.7324676513672, -4.7098846435546875, -126.06343078613281, 146.759765625, 6.3928375244140625, 119.85474395751953, 32.386722564697266, -0.28907203674316406, 35.44102478027344, -75.44052124023438, 31.14928436279297, 75.6592788696289, -95.34814453125, 40.62310791015625, 201.5816650390625, -6.405420303344727, 34.61383056640625, 103.71888732910156, 110.03189086914062, 94.75448608398438, 172.83038330078125, 88.31114959716797, -34.80815887451172, 25.290489196777344, 40.65403747558594, 121.51400756835938, -47.51954650878906, 35.520721435546875, -186.8458251953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000201.npy"} +{"epoch": 0.30385487528344673, "step": 202, "batch_size": 64, "mean": 46.16645431518555, "std": 72.90229797363281, "min": -146.1041259765625, "p10": -21.2311388015747, "median": 36.1922607421875, "p90": 130.00836181640625, "max": 288.03350830078125, "pos_frac": 0.765625, "sample": [-7.993459701538086, 91.03419494628906, 129.80093383789062, 288.03350830078125, 114.27665710449219, -9.387535095214844, 99.72396850585938, 55.05021667480469, 97.81572723388672, 59.79801940917969, 17.805133819580078, 34.85866928100586, 37.52585220336914, 22.16609764099121, 3.33349609375, 77.96800231933594, 7.2159271240234375, 0.16358184814453125, 98.87482452392578, -34.795616149902344, 23.762535095214844, 50.74797821044922, 84.60359191894531, 18.39659881591797, 2.629232406616211, -23.8258056640625, 14.858404159545898, 168.5104217529297, -8.856391906738281, 84.41216278076172, 3.0013961791992188, -5.635015487670898, 99.12385559082031, -11.107070922851562, 79.01750183105469, -146.1041259765625, 78.41249084472656, 9.817062377929688, 71.31198120117188, -97.68427276611328, 79.87164306640625, 48.11680603027344, 99.5225601196289, 100.503173828125, 130.09725952148438, 31.14783477783203, 140.25405883789062, -24.8667049407959, 108.64256286621094, 39.082557678222656, -0.5418491363525391, -85.77792358398438, 1.3917179107666016, -15.176916122436523, 25.197921752929688, 155.54440307617188, 211.12831115722656, 15.030624389648438, -1.8699836730957031, 93.97502136230469, -98.95166015625, 50.390159606933594, 17.86182975769043, 155.41900634765625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000202.npy"} +{"epoch": 0.30536659108087677, "step": 203, "batch_size": 64, "mean": 45.926876068115234, "std": 73.51480865478516, "min": -110.45990753173828, "p10": -28.784179687499993, "median": 24.462379455566406, "p90": 128.5413757324219, "max": 246.22543334960938, "pos_frac": 0.734375, "sample": [-19.98302459716797, -17.846885681152344, -62.67778015136719, 62.209228515625, 6.4258575439453125, 104.99635314941406, 18.039207458496094, 246.22543334960938, 58.286590576171875, 4.9959869384765625, 181.63099670410156, 11.026443481445312, 93.95674896240234, 223.01895141601562, 8.511259078979492, 199.42893981933594, 95.01663208007812, -11.021156311035156, 77.44463348388672, 116.77328491210938, -85.7457275390625, 11.935081481933594, 56.229393005371094, 4.862674713134766, -19.613943099975586, 4.190704345703125, 18.258438110351562, 204.3363800048828, 13.098533630371094, 108.43619537353516, 25.540367126464844, 8.890968322753906, 122.73303985595703, 37.85853958129883, 77.48825073242188, 14.610031127929688, 123.61357879638672, 66.8107681274414, 68.4444580078125, 92.31153869628906, -2.8567123413085938, 113.43569946289062, 36.041481018066406, 13.790496826171875, 86.27840423583984, -8.15020751953125, 129.864990234375, 158.19973754882812, -53.70661163330078, 125.45294189453125, 95.33451843261719, -9.708297729492188, -31.69122314453125, 11.7913818359375, -22.409454345703125, -110.45990753173828, 23.38439178466797, -5.9720611572265625, 36.51127624511719, 42.83116912841797, 46.65544891357422, -31.516204833984375, -52.70266342163086, -1.8254375457763672], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000203.npy"} +{"epoch": 0.30687830687830686, "step": 204, "batch_size": 64, "mean": 45.888301849365234, "std": 68.19896697998047, "min": -159.99794006347656, "p10": -21.49314289093017, "median": 41.267791748046875, "p90": 136.06441497802734, "max": 187.55950927734375, "pos_frac": 0.75, "sample": [185.9351348876953, 121.96035766601562, -2.686420440673828, -2.43438720703125, 20.813629150390625, 142.9927978515625, 59.95733642578125, 50.56146240234375, -56.29991912841797, 174.15933227539062, 187.55950927734375, -55.03948211669922, 5.568605422973633, 6.282543182373047, 57.730613708496094, 73.49551391601562, 75.27288055419922, 7.3329925537109375, -2.3235015869140625, 19.696510314941406, 87.71993255615234, -8.512046813964844, 68.6607437133789, 114.99201202392578, -24.44734764099121, 19.92498016357422, 136.57003784179688, 58.83158493041992, 14.272041320800781, 24.19123077392578, 134.88462829589844, -12.137252807617188, 2.747722625732422, 47.452667236328125, 23.935497283935547, -56.237667083740234, 86.0393295288086, 126.78694152832031, 1.396505355834961, 93.22177124023438, -5.207355499267578, 43.43741989135742, 102.68927764892578, 125.0133285522461, -14.599998474121094, 84.86035919189453, 41.89411926269531, -0.49201011657714844, 68.51702880859375, 29.497509002685547, 139.36244201660156, 133.63436889648438, -26.651710510253906, 4.36279296875, 72.46176147460938, 169.2354278564453, 85.44763946533203, 6.036247253417969, 6.365581512451172, 40.64146423339844, -90.9630126953125, -4.150390625, 74.62810516357422, -159.99794006347656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000204.npy"} +{"epoch": 0.30839002267573695, "step": 205, "batch_size": 64, "mean": 47.16590118408203, "std": 88.971435546875, "min": -169.8841552734375, "p10": -41.34978485107422, "median": 34.85503387451172, "p90": 164.86118774414066, "max": 300.1700439453125, "pos_frac": 0.671875, "sample": [300.1700439453125, 17.355859756469727, -4.141611099243164, 38.81947326660156, 92.40953063964844, 121.11782836914062, 102.288330078125, 57.22266387939453, -139.0430908203125, 234.3450927734375, 21.641740798950195, -7.835536956787109, 218.72457885742188, 73.6067123413086, 74.20652770996094, -31.571456909179688, 4.072967529296875, 39.62947082519531, 23.5816650390625, 24.77190399169922, 13.801872253417969, 100.51526641845703, -1.6821613311767578, -10.428466796875, 126.02891540527344, -19.002288818359375, -41.87055969238281, -1.1072769165039062, 7.130649566650391, 207.40347290039062, -15.244529724121094, -85.10977172851562, 135.94427490234375, 58.519195556640625, 30.890594482421875, 93.792236328125, 23.268667221069336, 69.66952514648438, -29.509422302246094, 40.034751892089844, -169.8841552734375, 154.58950805664062, 91.45099639892578, 107.40675354003906, 216.98422241210938, 169.26333618164062, -40.1346435546875, 60.35370635986328, 118.12956237792969, -11.862014770507812, -8.50054931640625, 30.882095336914062, 92.59300231933594, 27.25891876220703, -67.5728759765625, 171.28292846679688, 102.96955871582031, -9.50761604309082, 127.50759887695312, -25.399829864501953, -59.77813720703125, -126.03388977050781, 44.514305114746094, 57.687198638916016], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000205.npy"} +{"epoch": 0.30990173847316704, "step": 206, "batch_size": 64, "mean": 33.35157012939453, "std": 77.93977355957031, "min": -124.1749267578125, "p10": -53.260765075683594, "median": 12.806089401245117, "p90": 146.4884460449219, "max": 228.37265014648438, "pos_frac": 0.640625, "sample": [-13.466619491577148, -22.704269409179688, 10.980560302734375, -27.13695526123047, -11.675741195678711, 3.4130096435546875, 45.61973571777344, 25.353946685791016, 102.4597396850586, 14.63161849975586, -55.729461669921875, 175.314697265625, 65.24702453613281, 6.670736312866211, -17.701210021972656, 228.37265014648438, -1.7081527709960938, 144.17080688476562, 60.775604248046875, 7.266668319702148, 147.48171997070312, 51.6700325012207, 36.032249450683594, 6.437255859375, 6.8252105712890625, 151.07931518554688, 23.412647247314453, -99.82669067382812, 106.41697692871094, 58.96458435058594, 109.96389770507812, 143.91323852539062, -28.977500915527344, 10.556652069091797, 114.17498779296875, -39.999725341796875, -124.1749267578125, -95.98729705810547, -12.377267837524414, -15.433517456054688, 25.415420532226562, -8.606700897216797, 17.13067626953125, -13.827468872070312, -21.118606567382812, -90.45481872558594, 122.9566650390625, 191.20281982421875, 1.9308700561523438, 36.3541259765625, 165.03250122070312, 135.6128692626953, 30.70062255859375, -35.6973876953125, 90.97381591796875, 186.389404296875, 41.63931655883789, -3.4758377075195312, 49.24684143066406, 106.95732116699219, -85.68465423583984, -53.67662048339844, -52.290435791015625, 7.483451843261719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000206.npy"} +{"epoch": 0.31141345427059713, "step": 207, "batch_size": 64, "mean": 42.942047119140625, "std": 78.88887786865234, "min": -100.50311279296875, "p10": -45.8644229888916, "median": 21.841050148010254, "p90": 156.80584106445315, "max": 257.1612243652344, "pos_frac": 0.75, "sample": [234.27073669433594, 22.373743057250977, -23.045269012451172, 41.16948699951172, 32.99601745605469, 7.624851226806641, 24.433502197265625, -39.03501892089844, 3.5205612182617188, 166.25582885742188, 0.11177825927734375, 41.265586853027344, 18.61456298828125, 132.85910034179688, 0.8699245452880859, -26.547744750976562, 56.105377197265625, 6.156585693359375, 50.79296875, 165.01124572753906, 88.65950012207031, 52.97746276855469, 14.023696899414062, -90.659423828125, -72.89325714111328, -17.060455322265625, -8.424154281616211, 159.95254516601562, -40.165184020996094, 21.30835723876953, 118.4809799194336, 33.13729476928711, 257.1612243652344, -47.98735809326172, 8.616043090820312, -40.91090774536133, -80.80050659179688, 56.09092712402344, -100.50311279296875, 6.169788360595703, 72.76564025878906, 133.11607360839844, 11.900123596191406, 48.64655303955078, 20.65729522705078, 129.08615112304688, 78.02365112304688, -86.50125885009766, 118.86556243896484, 98.76475524902344, -54.00110626220703, 105.47506713867188, 129.24398803710938, 16.539642333984375, 3.3900604248046875, 168.89724731445312, 117.66856384277344, 18.36083221435547, 177.5548858642578, 89.477294921875, -13.727729797363281, 10.953140258789062, -29.306243896484375, 149.46353149414062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000207.npy"} +{"epoch": 0.3129251700680272, "step": 208, "batch_size": 64, "mean": 19.594409942626953, "std": 68.01295471191406, "min": -154.0918731689453, "p10": -57.57189826965332, "median": 12.449504852294922, "p90": 103.26523361206056, "max": 154.65591430664062, "pos_frac": 0.59375, "sample": [5.975011825561523, 3.5114002227783203, 82.64204406738281, -22.382497787475586, 90.263916015625, 90.40430450439453, 71.10671997070312, -8.239303588867188, 26.151397705078125, -98.7121810913086, -55.44493103027344, 94.62411499023438, 82.9664077758789, 66.07365417480469, 96.23233032226562, -3.6939353942871094, 18.07231903076172, -27.376251220703125, -5.016191482543945, -143.8295440673828, 64.57498168945312, -26.920265197753906, 0.1345233917236328, -79.43167877197266, 97.61199188232422, 13.077543258666992, 103.9017562866211, -58.483455657958984, 25.551185607910156, 82.74127960205078, -20.036239624023438, 2.4829559326171875, -41.11846923828125, 101.78001403808594, -154.0918731689453, 47.469573974609375, 129.58929443359375, 112.47016906738281, 61.75712585449219, -12.766668319702148, 154.65591430664062, 66.11906433105469, 17.126197814941406, -52.02803039550781, 108.41629791259766, -42.22235870361328, 116.78433227539062, -13.756961822509766, 11.821466445922852, 56.43629837036133, -41.35889434814453, 35.100868225097656, -8.140419006347656, 58.018043518066406, -38.609249114990234, 0.286163330078125, 39.659912109375, -6.5865936279296875, -103.23261260986328, -30.188339233398438, -79.3868637084961, -20.62297821044922, 94.22952270507812, 117.89908599853516], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000208.npy"} +{"epoch": 0.3144368858654573, "step": 209, "batch_size": 64, "mean": 48.06984329223633, "std": 98.64625549316406, "min": -309.392822265625, "p10": -51.29801788330077, "median": 44.654762268066406, "p90": 160.63648529052736, "max": 304.8506774902344, "pos_frac": 0.734375, "sample": [146.11050415039062, 111.34273529052734, 35.16413116455078, 43.49966812133789, -12.07830810546875, 284.219970703125, 45.030418395996094, 108.70710754394531, 44.27910614013672, 304.8506774902344, 46.318138122558594, -0.86334228515625, -112.21170043945312, 86.06800842285156, 51.46458435058594, 58.7669677734375, -56.059295654296875, 17.823707580566406, 21.312137603759766, 226.32579040527344, 8.677593231201172, -6.927886962890625, 8.668228149414062, 58.50592041015625, 37.63395690917969, -15.898651123046875, 69.71361541748047, 78.31527709960938, 154.138916015625, 96.30497741699219, 68.25733184814453, 129.8494873046875, 24.617488861083984, 153.37445068359375, 163.42115783691406, -25.559432983398438, 166.4267578125, 76.59724426269531, -168.223388671875, 26.578125, -40.18836975097656, -4.800725936889648, -34.06022644042969, -29.108078002929688, 169.69573974609375, 15.423238754272461, 206.20977783203125, 84.34259033203125, 63.29510498046875, 97.76921081542969, 39.87135314941406, -114.65801239013672, 9.998453140258789, -309.392822265625, -63.369049072265625, 1.0172157287597656, 73.8785400390625, -94.351318359375, 21.464279174804688, 73.06787109375, 140.24249267578125, 119.25648498535156, 106.06124877929688, -9.73687744140625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000209.npy"} +{"epoch": 0.31594860166288735, "step": 210, "batch_size": 64, "mean": 57.38462829589844, "std": 90.6273193359375, "min": -109.36109161376953, "p10": -67.24890518188477, "median": 47.65961456298828, "p90": 166.49384002685548, "max": 300.8780517578125, "pos_frac": 0.796875, "sample": [91.26642608642578, 18.738540649414062, 8.32989501953125, 36.117767333984375, 3.060546875, 59.990081787109375, 161.96974182128906, 85.40347290039062, 164.34640502929688, 224.02529907226562, 14.25582504272461, 281.54278564453125, 174.23638916015625, 300.8780517578125, 167.41416931152344, -8.093978881835938, -96.34688568115234, -102.90681457519531, 84.64846801757812, -109.36109161376953, 69.40376281738281, 49.306236267089844, 14.473756790161133, -92.110107421875, 143.85214233398438, 55.72032165527344, 3.758859634399414, 24.271835327148438, 162.36636352539062, 4.581886291503906, 168.0712890625, 123.3521499633789, 46.01299285888672, 89.16281127929688, 8.282787322998047, -15.728157043457031, -1.5027427673339844, -81.31008911132812, 23.589962005615234, 138.7286376953125, -4.824552536010742, 270.8165283203125, 35.731353759765625, 6.253974914550781, 101.31396484375, -3.7836532592773438, 12.310422897338867, 78.0391616821289, 66.95320892333984, 12.962455749511719, 88.98951721191406, 107.75870513916016, 59.63998031616211, -103.5010986328125, 23.26158905029297, 142.1661376953125, 83.70291900634766, 18.603187561035156, 113.41401672363281, -65.24760437011719, 14.79536247253418, -68.10660552978516, 82.49777221679688, 105.06973266601562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000210.npy"} +{"epoch": 0.31746031746031744, "step": 211, "batch_size": 64, "mean": 57.12138366699219, "std": 83.56978607177734, "min": -85.96954345703125, "p10": -27.511414527893066, "median": 22.63066291809082, "p90": 174.70261688232426, "max": 292.7169494628906, "pos_frac": 0.71875, "sample": [-20.801393508911133, -53.528663635253906, 61.989967346191406, 92.99279022216797, 17.644615173339844, -26.975427627563477, -75.71441650390625, 166.13140869140625, 203.30862426757812, 2.454965591430664, 100.8280029296875, -3.420391082763672, 103.8072509765625, 138.94216918945312, 24.00942611694336, 100.37252807617188, 7.09160041809082, -27.74112319946289, 130.31185913085938, -38.19771957397461, 67.00199890136719, -2.11376953125, 33.71285629272461, -85.96954345703125, 13.357158660888672, -4.600248336791992, -7.454368591308594, 18.170578002929688, 127.31007385253906, 10.904945373535156, 66.63694763183594, 15.70672607421875, 7.925481796264648, -13.929611206054688, 178.37599182128906, 25.305225372314453, 21.25189971923828, 129.68296813964844, -47.78291702270508, 45.60846710205078, -3.279754638671875, 292.7169494628906, 275.9542541503906, 20.319005966186523, 7.980384826660156, 106.43812561035156, 119.8368148803711, 107.50479125976562, 17.972854614257812, 183.4684600830078, -38.36865997314453, 79.25773620605469, 230.2285919189453, 127.0320053100586, 124.1549072265625, 123.0886001586914, 98.1937255859375, 93.90361785888672, -10.834249496459961, 211.3766632080078, -9.062324523925781, 6.668037414550781, 12.903785705566406, -24.29278564453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000211.npy"} +{"epoch": 0.31897203325774753, "step": 212, "batch_size": 64, "mean": 57.13621520996094, "std": 95.35843658447266, "min": -235.03404235839844, "p10": -57.121701812744135, "median": 47.84019088745117, "p90": 139.9503158569336, "max": 323.23992919921875, "pos_frac": 0.734375, "sample": [108.93572235107422, 121.83488464355469, 117.66371154785156, 118.76950073242188, -63.283653259277344, -4.130516052246094, 126.83273315429688, 48.54966735839844, -85.10608673095703, 131.14202880859375, -26.787761688232422, -235.03404235839844, 125.5121841430664, 312.8283996582031, 34.996971130371094, 92.89678955078125, 126.21955871582031, 92.89586639404297, -79.1157455444336, 91.57359313964844, 47.130714416503906, 11.798931121826172, 6.790571212768555, 89.66792297363281, -60.45252990722656, -7.8605194091796875, 323.23992919921875, 20.178037643432617, 107.39552307128906, 196.20945739746094, 95.60957336425781, 93.42729187011719, 125.02295684814453, 35.805625915527344, 31.467681884765625, 39.90642547607422, -27.941577911376953, 64.11896514892578, 6.54258918762207, 33.76766586303711, 40.11875915527344, 41.4622802734375, -48.14570999145508, 14.136001586914062, 76.07666778564453, 62.243141174316406, 79.30701446533203, 135.61241149902344, -28.060161590576172, -70.79296875, -7.2362518310546875, -15.474937438964844, 79.77305603027344, -10.749290466308594, 153.25131225585938, 141.80941772460938, 240.47329711914062, 286.0293884277344, 104.36949157714844, 10.548233032226562, -63.88934326171875, 51.70457458496094, -49.349769592285156, 44.48241424560547], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000212.npy"} +{"epoch": 0.3204837490551776, "step": 213, "batch_size": 64, "mean": 54.76559066772461, "std": 93.61469268798828, "min": -206.67672729492188, "p10": -64.71389198303223, "median": 55.794654846191406, "p90": 168.08329772949222, "max": 256.1842041015625, "pos_frac": 0.765625, "sample": [120.95054626464844, 67.34915161132812, 26.72283935546875, 163.0304718017578, 70.07533264160156, 24.372364044189453, -125.75527954101562, -0.4573020935058594, -206.67672729492188, 102.39069366455078, 193.79013061523438, 2.0388355255126953, 78.32063293457031, 9.379234313964844, -118.3265609741211, -65.65469360351562, 110.54859161376953, 256.1842041015625, -26.854026794433594, 109.89491271972656, 109.61705017089844, -109.45645141601562, 241.76348876953125, 208.30224609375, 83.37495422363281, 42.53527069091797, -38.356449127197266, -57.528282165527344, 16.18254852294922, 82.0422592163086, -62.5186882019043, 46.279052734375, 47.288719177246094, 40.61522674560547, 121.35060119628906, -91.74887084960938, 10.802783966064453, 83.21021270751953, 116.82121276855469, 26.586151123046875, 18.921531677246094, 95.60030364990234, 122.67045593261719, 5.729700088500977, 108.92579650878906, 50.513458251953125, -11.627082824707031, 100.55056762695312, 170.24879455566406, 27.74530792236328, -57.14232635498047, 110.86466217041016, -86.92935180664062, 61.07585144042969, 7.723518371582031, 85.20448303222656, 218.6064453125, 212.294189453125, 155.82518005371094, -14.139511108398438, 42.707489013671875, 120.4610824584961, 128.17520141601562, 122.5057373046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000213.npy"} +{"epoch": 0.3219954648526077, "step": 214, "batch_size": 64, "mean": 45.01720428466797, "std": 101.39070129394531, "min": -193.5103759765625, "p10": -65.7470489501953, "median": 40.3525333404541, "p90": 137.32393188476564, "max": 409.12451171875, "pos_frac": 0.6875, "sample": [141.4114990234375, 94.62430572509766, -52.34959411621094, 38.93752670288086, 52.41868591308594, 130.76312255859375, 32.909297943115234, 26.95024871826172, 82.4670639038086, 87.2728500366211, 120.51739501953125, 107.63330078125, 42.54761505126953, -144.96707153320312, 112.15864562988281, 131.40480041503906, 104.21758270263672, -77.02813720703125, 38.701683044433594, 28.734966278076172, -23.84770965576172, 69.92691040039062, 45.53178405761719, -6.551513671875, -5.0611724853515625, 84.63467407226562, -68.98127746582031, 13.405080795288086, 139.86070251464844, 109.80870056152344, 62.109657287597656, -22.768692016601562, -24.116445541381836, -48.403350830078125, 76.3301773071289, 129.24002075195312, -150.16473388671875, -26.989288330078125, 127.49415588378906, 227.31617736816406, -58.20051574707031, -24.115894317626953, 107.62605285644531, 43.71180725097656, 147.00291442871094, 409.12451171875, 23.163318634033203, 6.895423889160156, 41.767539978027344, -12.100933074951172, -193.5103759765625, 170.0591583251953, 11.15797233581543, 0.135833740234375, 2.0643062591552734, -4.691768646240234, 334.31390380859375, 12.06341552734375, -139.14427185058594, 85.93399047851562, 93.69445037841797, 126.26513671875, -15.186817169189453, -95.02783203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000214.npy"} +{"epoch": 0.3235071806500378, "step": 215, "batch_size": 64, "mean": 60.51953887939453, "std": 100.4614028930664, "min": -120.08200073242188, "p10": -67.15739517211914, "median": 53.19662094116211, "p90": 162.50755157470704, "max": 373.50531005859375, "pos_frac": 0.734375, "sample": [29.73630142211914, -103.33795928955078, -117.5320053100586, 90.5931396484375, 59.650794982910156, -87.12383270263672, 5.654296875, 4.549324035644531, 130.48817443847656, 132.2781219482422, 40.84259033203125, -22.348751068115234, -62.049171447753906, 48.21476745605469, 69.82807922363281, 152.44378662109375, 12.996978759765625, 119.67142486572266, 18.689041137695312, -14.984451293945312, 97.27613830566406, 4.700592041015625, -20.00936508178711, 4.144004821777344, 125.95647430419922, 3.3628578186035156, -8.487480163574219, 82.37893676757812, 134.11453247070312, -56.3575439453125, 65.85942077636719, 135.29177856445312, 9.724445343017578, -26.843475341796875, -120.08200073242188, -69.34663391113281, 108.45614624023438, 101.66244506835938, 35.377593994140625, 272.81011962890625, 11.330535888671875, 36.16381072998047, 264.9728088378906, -12.825279235839844, 187.1663818359375, 161.1128387451172, 163.10528564453125, 127.04550170898438, 120.29926300048828, 283.38885498046875, 121.36418151855469, 105.30850982666016, 105.99933624267578, -85.53901672363281, 58.17847442626953, 15.085960388183594, 247.72015380859375, 373.50531005859375, 100.7790756225586, -1.9108829498291016, 125.105712890625, -1.5698966979980469, 60.40599822998047, -81.19189453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000215.npy"} +{"epoch": 0.3250188964474679, "step": 216, "batch_size": 64, "mean": 62.77414321899414, "std": 81.60292053222656, "min": -142.09158325195312, "p10": -24.97222290039062, "median": 52.064109802246094, "p90": 175.02953186035165, "max": 255.89532470703125, "pos_frac": 0.78125, "sample": [85.257080078125, 152.2562255859375, 125.2962646484375, 192.8894805908203, 66.54405212402344, 30.90795135498047, 33.169761657714844, 24.41986083984375, 117.60758972167969, 117.69651794433594, 101.65050506591797, 65.92820739746094, 16.257164001464844, -14.490188598632812, 25.050498962402344, -54.55120849609375, 31.821487426757812, 43.41734313964844, -27.684417724609375, 141.26126098632812, 54.87774658203125, 227.86288452148438, 103.8951644897461, 141.04293823242188, 73.98909759521484, 63.4206657409668, 135.6981658935547, 2.867626190185547, -4.140388488769531, 120.05133056640625, 215.50912475585938, -142.09158325195312, 74.95574951171875, 85.94209289550781, 255.89532470703125, 184.43328857421875, -62.34584045410156, 1.8705329895019531, 59.63718795776367, -10.965608596801758, 3.2266921997070312, 43.661285400390625, 153.08743286132812, -103.3349838256836, 41.616539001464844, -78.04288482666016, 35.98365020751953, 9.591609954833984, -18.643768310546875, 49.25047302246094, 151.46861267089844, -28.507823944091797, -2.7364959716796875, -1.5655956268310547, 207.252197265625, 5.077831268310547, 41.79930114746094, 116.3518295288086, 69.94347381591797, 204.96102905273438, -6.679676055908203, 117.55917358398438, 49.244842529296875, 99.86936950683594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000216.npy"} +{"epoch": 0.32653061224489793, "step": 217, "batch_size": 64, "mean": 64.74864196777344, "std": 95.94368743896484, "min": -185.86404418945312, "p10": -34.66939620971679, "median": 55.35505676269531, "p90": 188.20010528564455, "max": 349.9578857421875, "pos_frac": 0.765625, "sample": [130.16229248046875, 117.00348663330078, -47.357749938964844, -1.1547622680664062, -0.29000282287597656, -15.449115753173828, 4.906642913818359, 348.3104248046875, -185.86404418945312, 227.57659912109375, 218.55519104003906, 178.47792053222656, 21.80736541748047, -38.99063491821289, 55.23115539550781, 55.47895812988281, 183.23846435546875, 78.63577270507812, 39.49397277832031, 86.61592102050781, 33.867706298828125, 59.447357177734375, 48.374664306640625, 121.8375244140625, 4.000526428222656, 123.53355407714844, -107.19386291503906, -21.835060119628906, 13.374664306640625, 69.52296447753906, 129.6582794189453, 54.98773956298828, 349.9578857421875, -54.90911102294922, 72.22795104980469, -24.586505889892578, 30.043716430664062, 19.69118309020996, 60.63348388671875, -83.40097045898438, -83.30853271484375, 122.18862915039062, 31.583389282226562, 9.65948486328125, -13.967742919921875, -19.81218719482422, 61.443756103515625, 96.8314208984375, 104.89486694335938, 197.22743225097656, 67.97422790527344, 109.4246597290039, 40.59669494628906, 9.63807487487793, 193.63198852539062, 190.32652282714844, 176.28201293945312, 113.94998168945312, 77.6376953125, 109.66081237792969, 23.510452270507812, 140.61337280273438, 40.6381950378418, -12.333847045898438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000217.npy"} +{"epoch": 0.328042328042328, "step": 218, "batch_size": 64, "mean": 30.4877986907959, "std": 85.8984146118164, "min": -213.99810791015625, "p10": -91.19821090698241, "median": 34.35157012939453, "p90": 131.26505050659182, "max": 250.70240783691406, "pos_frac": 0.625, "sample": [-25.8072509765625, 5.067329406738281, 75.9569091796875, 76.62486267089844, 55.273284912109375, -8.176589965820312, 38.28961181640625, -23.19525146484375, 7.1387176513671875, 134.0426025390625, 48.12046813964844, 30.413528442382812, 135.87106323242188, -120.51516723632812, -73.46190643310547, -110.22579956054688, 54.598976135253906, 123.8009033203125, -153.81515502929688, -13.605857849121094, 59.234046936035156, 75.97439575195312, -32.464881896972656, 68.91969299316406, 103.65946960449219, 107.87295532226562, 100.59369659423828, 250.70240783691406, 85.75519561767578, 111.73562622070312, -11.055233001708984, -32.791229248046875, 23.403350830078125, -65.61085510253906, -4.140895843505859, -32.61536407470703, -9.41754150390625, 74.46483612060547, 15.516143798828125, 88.87147521972656, 136.7301025390625, 98.1226806640625, -4.515239715576172, -3.475067138671875, 21.799755096435547, -99.04196166992188, 124.78409576416016, -20.105405807495117, -98.79948425292969, 0.668609619140625, 119.44061279296875, -139.92724609375, 41.12256622314453, 2.3667163848876953, 100.35005950927734, 150.55328369140625, -4.025196075439453, 111.0660629272461, -213.99810791015625, -22.348587036132812, 40.19671630859375, 48.99298095703125, 180.35458374023438, 145.90402221679688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000218.npy"} +{"epoch": 0.3295540438397581, "step": 219, "batch_size": 64, "mean": 36.991607666015625, "std": 70.43046569824219, "min": -112.57991790771484, "p10": -43.874532318115214, "median": 23.847177505493164, "p90": 124.59117736816407, "max": 259.0220642089844, "pos_frac": 0.703125, "sample": [-6.9328765869140625, 123.10455322265625, 103.46048736572266, 17.964235305786133, 27.97028350830078, -10.516563415527344, 7.793773651123047, 0.9625320434570312, 16.04759979248047, -27.694610595703125, 1.9442386627197266, 201.30410766601562, 259.0220642089844, -79.4880599975586, -66.72758483886719, 68.36016845703125, 27.383079528808594, -7.297760009765625, 85.4886474609375, 9.39512825012207, 7.439208984375, -73.19863891601562, 4.6592559814453125, 26.616107940673828, -16.057052612304688, -3.771331787109375, -15.893508911132812, -5.861225128173828, 125.22830200195312, 15.774337768554688, 82.7545166015625, 106.41311645507812, 105.13594055175781, 132.6813201904297, 58.385047912597656, 159.54441833496094, -20.474365234375, 1.9051456451416016, -50.80878448486328, 69.2054443359375, 101.20299530029297, 57.51741027832031, -112.57991790771484, 43.07903289794922, 31.004127502441406, -16.45166015625, 42.90571594238281, -52.06187438964844, 103.54749298095703, 112.46190643310547, 84.23568725585938, 55.28105163574219, 9.92294692993164, -18.62969207763672, -107.930419921875, 33.6296501159668, 68.8758773803711, -8.766319274902344, 153.55320739746094, 13.159191131591797, 129.158447265625, 78.41375732421875, 83.63516235351562, 21.0782470703125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000219.npy"} +{"epoch": 0.3310657596371882, "step": 220, "batch_size": 64, "mean": 49.80740737915039, "std": 73.3545913696289, "min": -124.17169189453125, "p10": -31.392801666259764, "median": 40.658185958862305, "p90": 138.7029922485352, "max": 235.81399536132812, "pos_frac": 0.734375, "sample": [88.2646713256836, -20.86346435546875, 94.1923599243164, 25.084571838378906, 40.81766891479492, 44.81999206542969, 235.81399536132812, 75.97477722167969, 8.702384948730469, 28.22461700439453, 2.6498260498046875, 163.049072265625, 3.1111011505126953, -92.9118881225586, -57.68708038330078, 3.1196060180664062, 141.91946411132812, -28.32146453857422, -17.880268096923828, 31.91815948486328, 116.53485107421875, 65.81641387939453, 0.4522666931152344, -49.38078308105469, -79.29106903076172, -33.60169982910156, 18.47403335571289, -3.962432861328125, 129.91253662109375, 69.9447021484375, 40.49870300292969, 70.03850555419922, 115.25328063964844, 98.38175201416016, 122.56149291992188, -2.838329315185547, 172.9993133544922, 113.40623474121094, 98.13816833496094, -31.568374633789062, 43.166709899902344, 97.42117309570312, -30.983131408691406, 106.5008544921875, 192.6997528076172, -11.288803100585938, 150.47174072265625, -3.847003936767578, 15.243417739868164, 129.65402221679688, 86.6015396118164, -5.684513092041016, 131.19789123535156, 4.866291046142578, 43.69664764404297, 35.48957061767578, -124.17169189453125, 25.028289794921875, 124.93458557128906, 5.677362442016602, 162.4185791015625, -11.570310592651367, 118.97052001953125, 99.41295623779297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000220.npy"} +{"epoch": 0.3325774754346183, "step": 221, "batch_size": 64, "mean": 57.523223876953125, "std": 87.9946517944336, "min": -136.72598266601562, "p10": -20.861550903320303, "median": 30.34050750732422, "p90": 158.95791320800782, "max": 359.4940490722656, "pos_frac": 0.71875, "sample": [8.147314071655273, -65.26730346679688, 69.20784759521484, 60.91558837890625, -9.202981948852539, 91.77085876464844, -0.4452342987060547, 161.00967407226562, 70.91641998291016, -77.6666488647461, -1.3357276916503906, 22.524398803710938, 75.40231323242188, 41.91368103027344, -63.251197814941406, 64.73575592041016, -1.7573089599609375, -4.499956130981445, 40.41526794433594, 93.52176666259766, 129.43353271484375, 154.17047119140625, 23.038578033447266, 105.392578125, 359.4940490722656, -12.45123291015625, 131.75303649902344, 139.3076629638672, -24.465972900390625, -5.965703964233398, 76.17304992675781, 142.44497680664062, -10.295520782470703, 98.72276306152344, 186.60008239746094, 31.612220764160156, 168.828369140625, -63.319419860839844, 71.4560317993164, 216.63250732421875, 92.081787109375, 7.585105895996094, 29.06879425048828, 1.046152114868164, 7.188447952270508, 19.84107208251953, -3.165924072265625, 140.2844696044922, 4.479820251464844, 0.10443496704101562, -2.8311920166015625, -136.72598266601562, 16.013702392578125, 183.07763671875, 129.3684539794922, -45.780067443847656, 27.776275634765625, 139.37850952148438, 317.93524169921875, 98.04591369628906, 15.749473571777344, 24.9334716796875, 120.78946685791016, -0.37534332275390625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000221.npy"} +{"epoch": 0.3340891912320484, "step": 222, "batch_size": 64, "mean": 70.68612670898438, "std": 97.96466064453125, "min": -141.71304321289062, "p10": -38.15948028564453, "median": 71.59761428833008, "p90": 177.34142608642583, "max": 401.8404846191406, "pos_frac": 0.75, "sample": [-0.7107448577880859, -97.82849884033203, 32.658878326416016, -40.490234375, 118.14511108398438, 112.3412094116211, 80.92449951171875, -14.524486541748047, 119.70365905761719, 146.45535278320312, 236.49569702148438, 15.812328338623047, -141.71304321289062, 12.059837341308594, 59.0902099609375, -32.72105407714844, 68.22228240966797, 21.495769500732422, 30.542194366455078, 126.04058837890625, 41.169410705566406, 1.9572772979736328, 134.23211669921875, 164.05267333984375, -0.24839019775390625, 141.15386962890625, 25.21141815185547, 401.8404846191406, -42.129150390625, 89.21187591552734, 73.82061004638672, -3.4139556884765625, 207.27978515625, 111.4556884765625, 84.00364685058594, 74.75270080566406, 131.07992553710938, -7.296775817871094, -18.5478458404541, 69.37461853027344, -112.08659362792969, 92.52039337158203, 78.07502746582031, -0.7404861450195312, 21.12667465209961, 368.42694091796875, 56.100074768066406, -49.10645294189453, 43.60894775390625, 82.25243377685547, 82.3482437133789, -12.460315704345703, 209.2076416015625, 156.77581787109375, 93.10317993164062, 37.91289520263672, 103.97669982910156, 147.5203857421875, -79.3653564453125, 183.03660583496094, 126.89601135253906, 120.2023696899414, 221.27346801757812, 22.348098754882812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000222.npy"} +{"epoch": 0.3356009070294785, "step": 223, "batch_size": 64, "mean": 59.26352310180664, "std": 81.85189056396484, "min": -78.36720275878906, "p10": -36.3345287322998, "median": 52.488685607910156, "p90": 158.26481018066406, "max": 271.44512939453125, "pos_frac": 0.671875, "sample": [75.41584014892578, 16.519145965576172, 93.98570251464844, 16.141082763671875, -24.781845092773438, 81.60890197753906, -78.36720275878906, 25.047958374023438, -48.318206787109375, 114.71437072753906, -0.001964569091796875, 204.6830596923828, 104.71959686279297, 139.267822265625, 47.105892181396484, -3.4102134704589844, -15.336767196655273, 50.85356903076172, 156.29779052734375, -36.16346740722656, 123.55526733398438, 271.44512939453125, -36.407840728759766, 109.52521514892578, -9.921958923339844, -29.374267578125, -5.060731887817383, 226.78367614746094, -10.09664535522461, -43.47986602783203, 83.78767395019531, 21.482751846313477, -70.57508850097656, 1.1306343078613281, 104.82077026367188, 117.10391998291016, 132.25018310546875, 76.02120971679688, -23.655303955078125, -37.7368278503418, -34.0267333984375, 27.626306533813477, 64.2552490234375, 94.85713958740234, 219.93341064453125, 82.82856750488281, 207.74636840820312, 127.7942123413086, 51.048004150390625, 137.4974365234375, 159.10781860351562, -59.48432159423828, 180.77256774902344, 148.1396484375, 53.92936706542969, 95.42124938964844, 131.45278930664062, 130.08978271484375, 69.49987030029297, -14.66850471496582, -14.773040771484375, -14.823333740234375, 14.675605773925781, 12.387069702148438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000223.npy"} +{"epoch": 0.3371126228269085, "step": 224, "batch_size": 64, "mean": 23.75539207458496, "std": 78.8778305053711, "min": -149.48001098632812, "p10": -87.75616455078125, "median": 17.716376304626465, "p90": 118.71300048828128, "max": 279.2500915527344, "pos_frac": 0.609375, "sample": [89.24635314941406, 193.517578125, 24.96828842163086, 78.896240234375, 40.544097900390625, 153.32579040527344, 43.89353942871094, 93.6920394897461, -12.012550354003906, 76.88983154296875, 85.81976318359375, 16.555234909057617, 107.94355773925781, 135.21397399902344, -82.07913208007812, -4.849676132202148, 11.779134750366211, 29.234962463378906, 24.86199951171875, 55.77915954589844, -15.647117614746094, 31.61181640625, -25.192306518554688, -105.2786865234375, -5.208992004394531, 18.932964324951172, -93.50630950927734, -18.484695434570312, -90.18917846679688, -26.731552124023438, 104.95075225830078, -134.52487182617188, -95.4784164428711, -46.629974365234375, -23.404571533203125, -0.7558155059814453, -10.351358413696289, -149.48001098632812, 11.938064575195312, 84.49360656738281, -36.05242919921875, 65.13704681396484, 111.31431579589844, 4.4185333251953125, -130.07261657714844, 37.21697998046875, 11.800552368164062, 27.496456146240234, -68.8801040649414, -9.68682861328125, -1.6011486053466797, 27.979787826538086, 87.30293273925781, 123.47254180908203, 149.34213256835938, 65.98328399658203, 12.628707885742188, 55.114654541015625, -7.879583358764648, 121.88386535644531, -12.81976318359375, 279.2500915527344, 18.877517700195312, 13.834636688232422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000224.npy"} +{"epoch": 0.3386243386243386, "step": 225, "batch_size": 64, "mean": 62.525047302246094, "std": 86.08789825439453, "min": -175.6297607421875, "p10": -12.244130706787104, "median": 46.50002479553223, "p90": 177.72565460205078, "max": 346.0932922363281, "pos_frac": 0.8125, "sample": [19.89874267578125, 30.974441528320312, 10.909553527832031, -14.481109619140625, 209.21820068359375, 170.775390625, 8.802982330322266, -3.1802978515625, 58.33049011230469, 35.35936737060547, 41.763458251953125, 205.56640625, 23.746986389160156, 178.57688903808594, 3.816883087158203, 44.85503005981445, 12.597381591796875, 21.077857971191406, 113.77694702148438, 110.54829406738281, 136.27291870117188, 28.27180290222168, 346.0932922363281, 179.55914306640625, 203.23812866210938, 12.568115234375, 60.349029541015625, 117.53935241699219, 141.35919189453125, -1.6328601837158203, -7.024513244628906, -129.27603149414062, 73.4980697631836, 8.451805114746094, 139.73095703125, 182.23875427246094, 6.232643127441406, -35.226593017578125, 175.73944091796875, 66.42413330078125, -3.6256179809570312, 55.88641357421875, 132.94161987304688, -37.840965270996094, 123.79139709472656, -43.2332763671875, -175.6297607421875, 3.8224105834960938, 74.87179565429688, 48.14501953125, 114.00850677490234, 2.066061019897461, 19.520896911621094, 144.95169067382812, 136.13284301757812, -4.075584411621094, 79.02810668945312, 37.48328399658203, 139.4613494873047, 60.65751647949219, 59.436614990234375, 6.8006439208984375, 99.66930389404297, -60.00807189941406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000225.npy"} +{"epoch": 0.3401360544217687, "step": 226, "batch_size": 64, "mean": 43.08776092529297, "std": 83.70706176757812, "min": -128.9068603515625, "p10": -76.24788513183593, "median": 42.580448150634766, "p90": 144.50977325439456, "max": 218.12936401367188, "pos_frac": 0.6875, "sample": [41.435325622558594, -80.67739868164062, 43.72557067871094, 34.08434295654297, 71.51811218261719, 114.116455078125, 146.83697509765625, 6.661491394042969, 99.69134521484375, -12.673336029052734, 111.8013687133789, 75.19127655029297, 25.196311950683594, 118.70903015136719, -78.20350646972656, -26.529380798339844, -5.772199630737305, -114.80494689941406, 129.2578125, 50.692237854003906, 119.18136596679688, -22.8350830078125, 63.59119415283203, 2.8502197265625, 62.748016357421875, 1.4611759185791016, -1.4552383422851562, 74.56319427490234, 191.81890869140625, 74.18158721923828, -38.529327392578125, 111.17173767089844, 63.8087272644043, 59.96754455566406, 150.08998107910156, 71.60769653320312, 95.17548370361328, -123.96366882324219, -21.20829200744629, 1.6459636688232422, -8.211776733398438, 139.0796356201172, -52.30608367919922, -116.25961303710938, 118.97560119628906, 199.46923828125, -71.68476867675781, 87.50450134277344, -128.9068603515625, 51.04546356201172, 25.46310806274414, 8.037460327148438, 27.69388198852539, 25.3972110748291, -99.72708129882812, 218.12936401367188, 215.03668212890625, 161.865234375, 14.196868896484375, -0.34649085998535156, 134.03311157226562, -1.036844253540039, -7.731555938720703, 131.77215576171875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000226.npy"} +{"epoch": 0.3416477702191988, "step": 227, "batch_size": 64, "mean": 42.79435729980469, "std": 81.88642120361328, "min": -242.68316650390625, "p10": -48.43519744873046, "median": 33.55092239379883, "p90": 140.3714797973633, "max": 187.53793334960938, "pos_frac": 0.703125, "sample": [131.81568908691406, 95.2822036743164, 16.865943908691406, 177.50421142578125, -30.43231201171875, 14.090892791748047, 129.011962890625, 131.61175537109375, -84.98146057128906, -4.832679748535156, 169.06063842773438, -50.942161560058594, -11.48823356628418, 140.2472686767578, 28.367233276367188, 41.51097106933594, 2.0015201568603516, -20.747108459472656, 109.00009155273438, -43.36082458496094, 139.0443572998047, 133.33184814453125, -19.58953857421875, 14.224864959716797, 150.08416748046875, 34.05669403076172, -5.450660705566406, 56.76580810546875, 9.377218246459961, -5.4808349609375, 141.6710968017578, 123.07524108886719, 92.73191833496094, 58.61445617675781, 9.688323974609375, 122.82681274414062, 187.53793334960938, 39.16901397705078, 11.252616882324219, 166.78329467773438, 24.747169494628906, 90.20359802246094, -27.39757537841797, 54.06196594238281, 86.21578979492188, -11.374710083007812, 22.740570068359375, 6.54052734375, -49.31993103027344, 52.30086898803711, -119.78716278076172, 112.79027557373047, -46.370819091796875, 91.34991455078125, 22.206205368041992, -242.68316650390625, 38.41802978515625, -19.60455322265625, 114.67513275146484, 33.04515075683594, -103.765869140625, -50.14624786376953, 120.26871490478516, 140.42471313476562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000227.npy"} +{"epoch": 0.3431594860166289, "step": 228, "batch_size": 64, "mean": 55.199974060058594, "std": 84.93769836425781, "min": -114.70399475097656, "p10": -35.91047897338867, "median": 50.03862190246582, "p90": 168.2085891723633, "max": 307.960205078125, "pos_frac": 0.75, "sample": [-8.573774337768555, 151.02398681640625, 25.600250244140625, 159.1151580810547, 11.795486450195312, 52.007965087890625, 64.12738800048828, 145.5587615966797, 9.0574951171875, -93.3749008178711, 24.65858268737793, 22.03271484375, 73.63763427734375, -114.70399475097656, 46.19416809082031, 12.574695587158203, 118.29710388183594, -36.29325866699219, -1.8186988830566406, 98.38510131835938, 16.208120346069336, 204.80630493164062, -100.6964111328125, 18.46747589111328, -98.20841979980469, 132.21778869628906, 122.2806396484375, 178.48045349121094, 80.63768005371094, 105.36127471923828, -9.233768463134766, -28.72100067138672, 74.19935607910156, 33.2655029296875, 86.35459899902344, 3.920572280883789, 51.477420806884766, 1.8853416442871094, -89.664794921875, -23.842052459716797, 64.58523559570312, 60.55780029296875, 172.10577392578125, -20.76670265197754, 183.70465087890625, 17.554494857788086, 200.11871337890625, 151.31263732910156, -22.513893127441406, -43.022186279296875, -6.924842834472656, 10.252147674560547, 207.62208557128906, 81.19548797607422, -35.01732635498047, 24.19831085205078, 307.960205078125, 129.2441864013672, 136.51168823242188, 55.02545928955078, 48.599822998046875, 77.89970397949219, 103.5825424194336, 110.51441192626953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000228.npy"} +{"epoch": 0.34467120181405897, "step": 229, "batch_size": 64, "mean": 48.13549041748047, "std": 90.708740234375, "min": -147.5291290283203, "p10": -26.896654510498045, "median": 34.75052833557129, "p90": 140.49569702148438, "max": 458.2738037109375, "pos_frac": 0.75, "sample": [63.38063049316406, 112.33154296875, 85.52661895751953, 65.49466705322266, 12.401500701904297, 27.799163818359375, 133.2144317626953, 27.652944564819336, 80.15904235839844, -28.113250732421875, 75.17829132080078, 151.30255126953125, 128.36984252929688, 33.27973175048828, 86.4766845703125, 35.192352294921875, 48.458709716796875, 20.851619720458984, 143.46002197265625, 133.53985595703125, 34.56521987915039, 5.693363189697266, 85.84745025634766, 52.580223083496094, -147.5291290283203, 4.632762908935547, -81.53028106689453, -7.084268569946289, 6.683774948120117, -24.05792999267578, 34.93583679199219, 458.2738037109375, -146.26895141601562, 111.2955093383789, 2.4554290771484375, 18.483976364135742, 147.27389526367188, -81.86326599121094, -10.867622375488281, 77.3177490234375, 128.90069580078125, 72.35536193847656, -2.0544986724853516, -8.964824676513672, 56.38480758666992, 1.9566497802734375, 143.23594665527344, 46.455833435058594, 109.04132080078125, 11.297908782958984, 2.3669662475585938, -14.545419692993164, 80.31149291992188, -48.02595520019531, 141.01878356933594, 9.748687744140625, -6.572349548339844, -4.545820236206055, 139.27516174316406, 247.97299194335938, 130.18321228027344, -7.326774597167969, 5.335018157958984, -129.92852783203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000229.npy"} +{"epoch": 0.34618291761148906, "step": 230, "batch_size": 64, "mean": 41.6307487487793, "std": 88.58717346191406, "min": -283.7603759765625, "p10": -19.84974288940429, "median": 25.333486557006836, "p90": 129.45039672851564, "max": 360.69482421875, "pos_frac": 0.6875, "sample": [-12.255443572998047, 82.64299011230469, 125.584716796875, 6.400417327880859, 47.09198760986328, -283.7603759765625, 23.477235794067383, 15.945556640625, -22.037933349609375, 131.10711669921875, 34.92204284667969, 115.33124542236328, 135.48590087890625, 109.6165771484375, 24.400543212890625, 35.30329895019531, 84.01692962646484, 134.76930236816406, -85.25118255615234, -1.9873809814453125, -147.763916015625, 8.961639404296875, 179.56558227539062, 39.65964889526367, 108.68665313720703, -2.67431640625, 119.1028060913086, -2.8491439819335938, 109.5738525390625, 102.82162475585938, 103.1377182006836, -5.618404388427734, -28.56671714782715, 26.677127838134766, -10.561325073242188, -14.743965148925781, 25.814918518066406, 38.972137451171875, 360.69482421875, 77.97563171386719, 0.2967987060546875, 12.806892395019531, -4.321224212646484, 53.53668975830078, 237.89093017578125, 99.11458587646484, -37.89073944091797, 120.70355987548828, 6.1360015869140625, -6.931545257568359, 115.93263244628906, -119.90003967285156, 113.98301696777344, 62.54895782470703, 24.852054595947266, -2.9286880493164062, 15.038301467895508, -0.9288959503173828, 32.952720642089844, -8.448554992675781, 6.690675735473633, 149.54360961914062, -0.7232666015625, 4.743556976318359], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000230.npy"} +{"epoch": 0.3476946334089191, "step": 231, "batch_size": 64, "mean": 64.99821472167969, "std": 78.74359130859375, "min": -126.68353271484375, "p10": -23.180092620849607, "median": 63.688629150390625, "p90": 161.55739746093752, "max": 267.5600280761719, "pos_frac": 0.828125, "sample": [168.6326141357422, 37.490516662597656, 109.27935791015625, 93.14519500732422, 30.860626220703125, 25.537307739257812, 35.08677291870117, 200.9560546875, 25.35535430908203, 28.09735107421875, 117.24258422851562, 26.973800659179688, 109.65017700195312, 11.803524017333984, -95.25064086914062, 88.32809448242188, 30.412582397460938, 91.88531494140625, 101.15528869628906, -7.433799743652344, 85.78526306152344, -21.10379409790039, 72.21794891357422, 94.53372192382812, 14.856864929199219, 225.5425567626953, 56.91351318359375, -99.16961669921875, 138.865478515625, 115.37138366699219, 154.433837890625, -67.36117553710938, 58.91749572753906, 102.99078369140625, 129.1339874267578, -96.04293823242188, 13.749298095703125, 64.72653198242188, 62.650726318359375, -46.05652618408203, 188.76258850097656, -9.783554077148438, 39.304931640625, 52.544952392578125, 129.30783081054688, 44.37554168701172, 95.20240020751953, -24.069934844970703, 8.17923355102539, -8.777652740478516, 122.07984924316406, 267.5600280761719, 70.4350814819336, 84.64702606201172, 116.43463897705078, 164.6103515625, 10.153076171875, 91.47935485839844, 36.99873352050781, 204.50050354003906, -126.68353271484375, 57.007362365722656, 121.8551025390625, 133.5986328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000231.npy"} +{"epoch": 0.3492063492063492, "step": 232, "batch_size": 64, "mean": 42.90275192260742, "std": 99.59446716308594, "min": -299.2731628417969, "p10": -48.513617706298824, "median": 34.04011917114258, "p90": 140.03483734130862, "max": 377.60693359375, "pos_frac": 0.671875, "sample": [0.7874069213867188, 124.40438842773438, 93.82179260253906, 208.59970092773438, -0.4623260498046875, -104.28900146484375, 32.68006134033203, 126.68610382080078, -25.846664428710938, -50.866973876953125, -29.987110137939453, 35.400177001953125, 1.8094940185546875, 15.411434173583984, 50.63125991821289, 141.46115112304688, 10.041801452636719, -66.16963958740234, 76.61692810058594, 40.20107650756836, -43.02245330810547, 89.81855010986328, 84.0584716796875, 55.318267822265625, 16.598220825195312, 60.379058837890625, -24.01487922668457, 136.70677185058594, -23.95859146118164, 38.495460510253906, 54.81682586669922, 183.6980438232422, -17.512937545776367, -4.090421676635742, 48.39459228515625, -9.430572509765625, 3.743907928466797, -70.60749053955078, 193.8822479248047, 41.95552062988281, -7.989959716796875, 170.76715087890625, -52.061622619628906, 377.515625, 102.14710998535156, 377.60693359375, -12.702415466308594, -39.11200714111328, -21.571151733398438, 52.047386169433594, 48.523109436035156, 119.94580078125, 75.55169677734375, 17.121437072753906, 0.9107627868652344, -52.75358581542969, 2.212158203125, 126.0445556640625, 16.744674682617188, 94.10511779785156, -17.08673858642578, -299.2731628417969, 93.98165893554688, 76.94195556640625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000232.npy"} +{"epoch": 0.3507180650037793, "step": 233, "batch_size": 64, "mean": 36.47694396972656, "std": 79.56900787353516, "min": -122.94329833984375, "p10": -50.46698570251464, "median": 18.280662536621094, "p90": 143.47256469726565, "max": 217.27728271484375, "pos_frac": 0.640625, "sample": [217.27728271484375, -18.478607177734375, 158.76873779296875, -53.5305290222168, 14.372013092041016, 94.84197998046875, 23.866050720214844, 19.330120086669922, -33.21831130981445, -43.31871795654297, 9.933151245117188, 200.9925994873047, -5.745113372802734, -17.295455932617188, 13.110542297363281, 59.51918029785156, 100.78341674804688, -0.7887248992919922, 36.19879913330078, 18.159500122070312, 166.58396911621094, 18.401824951171875, 138.041748046875, 109.69667053222656, -122.94329833984375, 51.90672302246094, -22.418670654296875, 90.47185516357422, -111.67726135253906, -102.55722045898438, 107.94251251220703, 20.20832061767578, 102.89781188964844, 126.85667419433594, 76.08511352539062, -107.25021362304688, 69.33094787597656, 15.992380142211914, 12.814132690429688, -2.4293289184570312, 45.78974914550781, -82.6711654663086, 139.34344482421875, -17.006103515625, 78.50287628173828, -25.11072540283203, -2.3580093383789062, 27.423187255859375, 116.47171020507812, 89.30965423583984, -34.276851654052734, -29.213281631469727, 1.4529895782470703, 145.2421875, 190.33995056152344, 171.36634826660156, 10.557823181152344, -34.40586471557617, 125.30760192871094, -10.827682495117188, 69.92000579833984, 10.962074279785156, -82.19705963134766, -2.1310653686523438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000233.npy"} +{"epoch": 0.35222978080120937, "step": 234, "batch_size": 64, "mean": 35.21763610839844, "std": 73.72842407226562, "min": -141.59515380859375, "p10": -59.13631172180175, "median": 28.660672187805176, "p90": 120.97908172607423, "max": 184.23760986328125, "pos_frac": 0.671875, "sample": [-33.330604553222656, -46.202362060546875, -5.98321533203125, -141.59515380859375, 78.01564025878906, -6.470760345458984, 28.681772232055664, -17.985748291015625, 105.63555908203125, -18.04296875, 86.61322021484375, 108.99537658691406, 102.6596450805664, 48.85028076171875, 138.7108612060547, 117.12025451660156, -56.190025329589844, -60.39900588989258, 98.47190856933594, -6.054410934448242, 2.7000389099121094, -1.8777656555175781, 117.7992172241211, 184.23760986328125, -1.489370346069336, -11.300430297851562, 126.94343566894531, 13.082056045532227, -113.04985809326172, 3.206146240234375, 102.09597778320312, 158.34255981445312, 122.34188079833984, 102.19914245605469, 3.794891357421875, -103.31423950195312, 17.066314697265625, 28.639572143554688, 6.04609489440918, -84.98385620117188, 117.23252868652344, 79.71194458007812, 45.2391471862793, 114.7275390625, 132.426513671875, 45.49134063720703, 15.940193176269531, 71.61178588867188, 106.35565185546875, -51.117347717285156, 15.6358642578125, 107.18087005615234, -78.3720474243164, 97.55372619628906, -74.4155502319336, -3.684162139892578, 50.43360900878906, 30.160743713378906, 29.375587463378906, 10.706657409667969, 14.961906433105469, 158.0866241455078, 80.41238403320312, -55.706451416015625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000234.npy"} +{"epoch": 0.35374149659863946, "step": 235, "batch_size": 64, "mean": 52.3753776550293, "std": 64.85591125488281, "min": -104.57485961914062, "p10": -24.03941230773925, "median": 43.837406158447266, "p90": 133.77388763427734, "max": 185.21263122558594, "pos_frac": 0.796875, "sample": [3.8991661071777344, 56.5322265625, 22.54418182373047, 67.6928939819336, 132.62721252441406, 12.705192565917969, 43.341949462890625, 141.43167114257812, 149.30316162109375, 86.88636779785156, -29.777175903320312, 121.2376937866211, 24.00176239013672, 18.88655662536621, 99.22175598144531, 102.57199096679688, 55.54204559326172, 9.658313751220703, 116.04440307617188, 112.92263793945312, 21.384719848632812, 16.974807739257812, -72.74874877929688, 95.51406860351562, 130.03347778320312, 119.65592956542969, -9.950172424316406, -38.7626953125, 137.9931640625, -104.57485961914062, 71.11830139160156, -0.41594886779785156, 23.123432159423828, 0.929718017578125, 134.30116271972656, 107.46481323242188, 44.979129791259766, 27.1221923828125, 178.10064697265625, -47.76869201660156, -18.628149032592773, 43.62786865234375, 134.26531982421875, 19.612796783447266, 68.285888671875, 26.325130462646484, -3.9043350219726562, 112.00775146484375, 185.21263122558594, 23.84182357788086, 44.04694366455078, -4.0149993896484375, 107.98966979980469, 91.04666137695312, 8.363021850585938, -77.35227966308594, 119.52762603759766, 117.0735092163086, 129.6597442626953, 31.88105010986328, 49.71842956542969, -26.113780975341797, -19.19921875, 7.002653121948242], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000235.npy"} +{"epoch": 0.35525321239606955, "step": 236, "batch_size": 64, "mean": 43.097129821777344, "std": 87.78821563720703, "min": -248.48500061035156, "p10": -37.84533081054686, "median": 33.84633445739746, "p90": 140.41542205810546, "max": 274.74566650390625, "pos_frac": 0.671875, "sample": [122.24295043945312, -25.90650177001953, 9.473289489746094, 106.31658935546875, 32.337608337402344, 64.16743469238281, 160.74951171875, 138.80799865722656, -13.693801879882812, 42.32530975341797, 89.40422058105469, 92.27606201171875, 1.154245376586914, -47.00914001464844, -17.997285842895508, -17.178977966308594, 99.26253509521484, 83.28660583496094, 155.7261962890625, 138.8921356201172, 8.583831787109375, -27.585250854492188, 48.465789794921875, -11.067768096923828, 1.3480453491210938, 56.83551025390625, 100.9646987915039, 112.98223876953125, 124.35850524902344, 84.84892272949219, 120.35206604003906, -26.748092651367188, -26.941177368164062, 25.354717254638672, 148.3578338623047, -15.254440307617188, -42.24250793457031, 274.74566650390625, -2.919269561767578, 6.554380416870117, -76.99957275390625, 35.35506057739258, 141.05374145507812, 154.35821533203125, 23.684722900390625, 71.73162841796875, 27.93939208984375, -94.21822357177734, 138.92601013183594, -7.237438201904297, 8.050922393798828, -202.33053588867188, -248.48500061035156, 120.16261291503906, 173.22378540039062, 128.21530151367188, 117.93682861328125, -6.88856315612793, -12.901718139648438, -13.17380142211914, 120.85455322265625, -69.7494125366211, 41.655242919921875, 11.421928405761719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000236.npy"} +{"epoch": 0.35676492819349964, "step": 237, "batch_size": 64, "mean": 48.854244232177734, "std": 76.14690399169922, "min": -164.65182495117188, "p10": -27.21184844970703, "median": 49.04531478881836, "p90": 132.63792419433597, "max": 228.16383361816406, "pos_frac": 0.75, "sample": [136.69137573242188, 85.50666046142578, 62.917449951171875, 139.76242065429688, 32.55235290527344, -4.760467529296875, 21.505081176757812, 24.421730041503906, -2.7753028869628906, 123.17987060546875, 106.50761413574219, 94.88676452636719, -27.91241455078125, -25.577194213867188, 117.3080825805664, 62.08686065673828, -7.560115814208984, 78.1978988647461, -29.29754638671875, 70.2886962890625, -151.49964904785156, 107.07421875, 85.66642761230469, 80.62680053710938, 228.16383361816406, -19.101757049560547, 42.046875, 92.12043762207031, -74.96858215332031, 52.476593017578125, 113.69025421142578, 49.356483459472656, 8.895759582519531, -164.65182495117188, 122.28491973876953, -7.629642486572266, 12.751060485839844, -118.04827117919922, 220.00985717773438, 20.1906681060791, 45.558380126953125, 59.73627471923828, 6.8920135498046875, 66.23377990722656, 21.347023010253906, -0.2521800994873047, 77.31623077392578, 123.12727355957031, 43.72845458984375, 107.24857330322266, 118.89253997802734, -14.495925903320312, 114.52534484863281, 212.7636260986328, 1.337493896484375, 9.52376937866211, -1.6788959503173828, 144.35165405273438, 23.088714599609375, 51.28852844238281, 1.5812797546386719, 48.73414611816406, -34.42803955078125, 142.8673858642578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000237.npy"} +{"epoch": 0.35827664399092973, "step": 238, "batch_size": 64, "mean": 49.93061828613281, "std": 87.34123229980469, "min": -111.36724090576172, "p10": -62.61242332458496, "median": 43.13732719421387, "p90": 141.85916137695315, "max": 393.26092529296875, "pos_frac": 0.703125, "sample": [-63.257816314697266, 25.121376037597656, 89.85516357421875, -60.85460662841797, 120.25423431396484, 129.9261474609375, 5.980602264404297, 9.100677490234375, 164.82711791992188, -73.19403839111328, -12.218132019042969, -24.335098266601562, 121.52167510986328, 32.974769592285156, 104.3671875, 66.45938110351562, -3.6601638793945312, -47.87055206298828, 44.35386657714844, 145.9744873046875, 77.79206085205078, 179.5796356201172, 109.25306701660156, 102.49047088623047, 24.818729400634766, 122.40631103515625, 39.39757537841797, -111.36724090576172, -64.00215148925781, 94.64134216308594, 115.32026672363281, 94.43447875976562, 29.368558883666992, 144.067138671875, -9.234500885009766, 41.9207878112793, 393.26092529296875, 8.040889739990234, 118.39940643310547, 81.03677368164062, -2.5444297790527344, -1.2628021240234375, -20.070560455322266, 26.236373901367188, 95.07085418701172, 74.68963623046875, 198.8333282470703, 104.40274047851562, 51.33588409423828, 46.56939697265625, 61.29693603515625, 73.39592742919922, 136.70721435546875, -68.41857147216797, -17.262964248657227, 3.8400535583496094, -109.7216796875, 40.74036407470703, 132.93115234375, -108.42774963378906, 183.10797119140625, -61.10650634765625, -31.641284942626953, 19.90763282775879], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000238.npy"} +{"epoch": 0.35978835978835977, "step": 239, "batch_size": 64, "mean": 58.59601974487305, "std": 91.04385375976562, "min": -126.38873291015625, "p10": -47.57139053344725, "median": 47.6013069152832, "p90": 179.36812133789064, "max": 291.98236083984375, "pos_frac": 0.703125, "sample": [36.425880432128906, -3.4597320556640625, 102.43075561523438, -5.438098907470703, 166.98548889160156, 15.03955078125, -33.215606689453125, 37.991737365722656, -61.01275634765625, 76.38875579833984, -78.28081512451172, 41.471221923828125, 12.680774688720703, 146.06651306152344, 75.35279846191406, 47.680999755859375, -30.29444122314453, -125.13358306884766, 235.6129150390625, 291.98236083984375, -21.391429901123047, 59.49884796142578, 131.2734375, 97.62352752685547, 176.77371215820312, 38.13214111328125, 78.55252075195312, 107.95515441894531, 100.03507232666016, 7.9880218505859375, 130.75392150878906, 93.49799346923828, -53.72386932373047, -29.369741439819336, 58.90520477294922, 146.69793701171875, -6.971202850341797, 28.699270248413086, 97.67337036132812, 93.18821716308594, -0.24961471557617188, 70.81836700439453, 72.49134063720703, 133.2989044189453, -116.73976135253906, 46.84077453613281, 180.48001098632812, 201.0401611328125, 196.81776428222656, -126.38873291015625, -3.0786075592041016, -6.9811859130859375, -117.17658233642578, 109.43165588378906, 214.3253173828125, 136.76129150390625, 126.27456665039062, -6.960674285888672, 44.35868835449219, 236.30056762695312, -0.6248130798339844, 47.52161407470703, 5.638099670410156, 20.87920379638672], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000239.npy"} +{"epoch": 0.36130007558578986, "step": 240, "batch_size": 64, "mean": 63.530635833740234, "std": 90.05450439453125, "min": -135.33544921875, "p10": -33.016103363037104, "median": 58.46670150756836, "p90": 165.94866180419922, "max": 326.3729553222656, "pos_frac": 0.78125, "sample": [106.52682495117188, -77.22529602050781, 326.3729553222656, -64.27490234375, 114.74392700195312, 267.73626708984375, 5.92669677734375, 72.47669219970703, -62.67396545410156, -35.613433837890625, -26.955665588378906, -14.286100387573242, 118.06785583496094, 47.069984436035156, 40.035091400146484, 91.0337905883789, 116.36573791503906, -77.21630859375, 140.80819702148438, 62.68601989746094, -6.848079681396484, 165.86134338378906, 65.58087158203125, -131.15625, 89.79530334472656, 72.2306137084961, 144.7401885986328, 164.45774841308594, 206.35238647460938, -11.428703308105469, 135.56549072265625, 224.26693725585938, 24.474777221679688, 27.20269775390625, 26.60223388671875, 82.19923400878906, 61.70726776123047, 204.40335083007812, 149.4783935546875, 2.8712635040283203, 154.11578369140625, 9.777667999267578, 19.28628921508789, 89.17182159423828, 173.94573974609375, -135.33544921875, 0.2992382049560547, -17.72048568725586, 87.27787780761719, -6.6634368896484375, 6.951732635498047, 130.74009704589844, -13.577590942382812, 55.22613525390625, 6.896335601806641, 13.270332336425781, 131.68333435058594, 165.986083984375, 114.74337005615234, 54.61241912841797, 117.59209442138672, 17.472755432128906, 15.570075988769531, 24.677169799804688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000240.npy"} +{"epoch": 0.36281179138321995, "step": 241, "batch_size": 64, "mean": 41.13740539550781, "std": 84.26365661621094, "min": -151.43634033203125, "p10": -48.22347145080566, "median": 22.08804988861084, "p90": 147.27003021240236, "max": 258.16461181640625, "pos_frac": 0.671875, "sample": [-29.01995849609375, -12.831062316894531, 34.97533416748047, -92.22821807861328, 116.35761260986328, 3.754638671875, 13.085319519042969, 103.60436248779297, 105.04114532470703, 154.2748565673828, 15.050765991210938, 54.426231384277344, -21.719615936279297, 37.10884094238281, 93.1557846069336, 85.53172302246094, -0.5764846801757812, 18.08647346496582, 22.25438690185547, 135.3728790283203, 28.05352783203125, 127.874755859375, -9.262008666992188, -19.773704528808594, 58.707435607910156, -11.022005081176758, 68.36491394042969, -79.49381256103516, -48.734336853027344, -39.506690979003906, 140.1071319580078, 210.6009521484375, 177.46180725097656, 30.921287536621094, -26.53124237060547, -93.07742309570312, -6.968101501464844, 150.33984375, 62.11543273925781, -4.046932220458984, -142.24102783203125, 20.659141540527344, -3.704500198364258, 187.61961364746094, 8.533945083618164, 19.863807678222656, -151.43634033203125, 206.42425537109375, 25.036148071289062, -92.05001831054688, 258.16461181640625, 9.2061767578125, 82.03736877441406, 94.34222412109375, 127.94110107421875, -47.03145217895508, 2.8432464599609375, 137.4324188232422, 93.6217269897461, 105.09400177001953, 21.92171287536621, 113.79443359375, 4.092397689819336, -1.2069683074951172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000241.npy"} +{"epoch": 0.36432350718065004, "step": 242, "batch_size": 64, "mean": 50.35685348510742, "std": 74.38533020019531, "min": -144.66360473632812, "p10": -20.536041641235343, "median": 28.952366828918457, "p90": 139.36548309326173, "max": 270.3138122558594, "pos_frac": 0.765625, "sample": [-109.67802429199219, 41.344940185546875, 89.1590805053711, 47.38854217529297, 18.16547393798828, -3.1320533752441406, 167.65826416015625, 87.05086517333984, 19.4862060546875, 68.9669418334961, 95.58370971679688, 52.549190521240234, 3.3715362548828125, 176.91156005859375, 106.56871032714844, -26.754135131835938, 139.71072387695312, -41.30615234375, 2.6890182495117188, 19.524608612060547, 34.33995819091797, 48.62451934814453, 138.04017639160156, 29.5685977935791, 7.407073974609375, 28.336135864257812, 24.5001220703125, 0.1577911376953125, 137.71673583984375, 27.192459106445312, 126.44660949707031, -0.11939620971679688, 200.5245361328125, 17.456151962280273, 126.57298278808594, 17.812583923339844, -0.17788314819335938, -6.467174530029297, 270.3138122558594, 8.080142974853516, -23.598907470703125, 75.4933853149414, 92.56956481933594, 3.597423553466797, 80.77994537353516, -0.7324066162109375, 98.1269760131836, -38.278038024902344, 162.05992126464844, 126.57859802246094, 138.55992126464844, 23.52616310119629, 191.23838806152344, 5.476024627685547, -9.780431747436523, 95.92655181884766, 107.63084411621094, -5.731409072875977, 4.467048645019531, -144.66360473632812, 81.98604583740234, -13.389354705810547, -53.290428161621094, 32.701629638671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000242.npy"} +{"epoch": 0.36583522297808013, "step": 243, "batch_size": 64, "mean": 41.56847381591797, "std": 72.43811798095703, "min": -156.49002075195312, "p10": -36.71948547363281, "median": 40.93012046813965, "p90": 134.32522277832032, "max": 170.62049865722656, "pos_frac": 0.734375, "sample": [19.068458557128906, 59.953033447265625, 47.13140106201172, -51.48777770996094, 120.53446197509766, 10.236787796020508, -156.49002075195312, 6.407661437988281, 150.72357177734375, -17.3939208984375, 85.35060119628906, 39.136756896972656, 21.6387939453125, 37.47625732421875, 4.078155517578125, 136.5897979736328, 8.290828704833984, 23.33740997314453, 99.5545654296875, 10.634857177734375, -34.54035949707031, 132.84715270996094, 49.302703857421875, -6.392942428588867, 59.120849609375, 108.02235412597656, 49.88481903076172, 140.6890869140625, 127.17817687988281, 60.640647888183594, -124.60052490234375, -55.56320571899414, 77.03968811035156, -24.694124221801758, 120.31200408935547, 46.00364685058594, 122.52171325683594, 129.06396484375, -150.87734985351562, 33.05589294433594, 170.62049865722656, 81.55065155029297, 63.508506774902344, 155.99166870117188, -15.189727783203125, -8.4613037109375, -8.14106559753418, -1.238260269165039, 134.69644165039062, 133.45904541015625, -23.674575805664062, 40.89430236816406, 23.257369995117188, 94.42887115478516, -73.81510925292969, 82.58648681640625, 91.64566040039062, 40.965938568115234, 6.765712738037109, -37.65339660644531, -28.251625061035156, 138.06863403320312, 50.1337890625, 34.447906494140625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000243.npy"} +{"epoch": 0.3673469387755102, "step": 244, "batch_size": 64, "mean": 59.724334716796875, "std": 71.23269653320312, "min": -91.85418701171875, "p10": -15.119539642333985, "median": 45.53956985473633, "p90": 166.9539276123047, "max": 210.08291625976562, "pos_frac": 0.8125, "sample": [69.76677703857422, 160.42391967773438, 90.62785339355469, 18.433326721191406, -1.3812713623046875, 17.34444236755371, 9.625089645385742, 110.54704284667969, 64.36482238769531, -35.139129638671875, 9.253704071044922, 78.39009094238281, -15.233612060546875, 171.6679229736328, 110.8884048461914, -91.85418701171875, 108.42163848876953, 145.03671264648438, -59.38758087158203, 136.82179260253906, 104.0897216796875, 38.597312927246094, -74.17296600341797, 90.26976776123047, 12.964263916015625, 133.070068359375, 52.48182678222656, 88.27879333496094, 169.75250244140625, 11.524194717407227, 210.08291625976562, 57.430076599121094, 5.79069709777832, -14.853370666503906, -32.51652526855469, 16.8630313873291, -4.579841613769531, 75.5841293334961, 135.51858520507812, 187.68304443359375, 18.607898712158203, 201.40060424804688, 4.162637710571289, 68.40863037109375, 2.710845947265625, 108.39628601074219, 37.477622985839844, 119.2729263305664, 18.669082641601562, 33.27495193481445, 4.147037506103516, -13.923332214355469, 126.05073547363281, 81.42024230957031, 175.0088653564453, 31.228439331054688, -24.771873474121094, 23.268646240234375, 9.831371307373047, -5.635169982910156, 90.51556396484375, 28.441862106323242, 132.7281494140625, 189.18936157226562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000244.npy"} +{"epoch": 0.3688586545729403, "step": 245, "batch_size": 64, "mean": 70.44686889648438, "std": 74.80469512939453, "min": -119.3932876586914, "p10": -7.644146728515622, "median": 79.47624969482422, "p90": 146.2614303588867, "max": 244.24874877929688, "pos_frac": 0.828125, "sample": [98.53740692138672, 145.754150390625, 89.40089416503906, 203.51612854003906, -1.9137744903564453, 27.015113830566406, -4.004695892333984, 97.7974624633789, 72.92414093017578, 35.00133514404297, -5.0807342529296875, 134.79046630859375, 133.32498168945312, 71.85575866699219, 36.366302490234375, 96.36376953125, 6.452827453613281, 125.53173065185547, -75.79676818847656, 169.74371337890625, 167.09237670898438, 20.396644592285156, 93.66513061523438, 118.970703125, 137.27943420410156, 114.01864624023438, 130.38189697265625, -8.742752075195312, 125.80375671386719, 8.286394119262695, 185.33444213867188, -119.3932876586914, 12.580188751220703, 122.67784118652344, -19.491172790527344, 26.907730102539062, -3.0749969482421875, 60.17437744140625, 140.49720764160156, 188.13021850585938, 86.02835845947266, 67.79383850097656, 91.98445892333984, 15.938316345214844, -16.897397994995117, 119.80282592773438, 146.4788360595703, 125.40596771240234, 20.087371826171875, -70.94198608398438, 26.839630126953125, 119.67060852050781, 20.692352294921875, 28.084423065185547, 143.35794067382812, 113.04733276367188, -109.99786376953125, 145.203857421875, 42.675968170166016, 51.19294357299805, 1.574432373046875, 244.24874877929688, 33.9832649230957, 103.27049255371094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000245.npy"} +{"epoch": 0.37037037037037035, "step": 246, "batch_size": 64, "mean": 50.74235916137695, "std": 90.95751190185547, "min": -184.67391967773438, "p10": -44.81165008544921, "median": 37.922101974487305, "p90": 146.22668914794923, "max": 381.5686950683594, "pos_frac": 0.6875, "sample": [118.00444030761719, 136.14112854003906, 142.5157470703125, 133.52005004882812, -0.173614501953125, 25.90296173095703, -85.333251953125, 155.3328399658203, 7.577949523925781, -20.3603458404541, 133.93470764160156, 140.1927490234375, 31.394851684570312, -37.18275451660156, 24.7901611328125, 127.80018615722656, 36.18053436279297, -31.55238914489746, -19.617271423339844, 0.4126129150390625, 146.82278442382812, 95.4464111328125, -29.30353546142578, 143.50830078125, 206.85067749023438, -89.46052551269531, 46.23236083984375, 381.5686950683594, -26.33831787109375, 57.02842712402344, 2.9359378814697266, 144.83580017089844, 33.1875, 5.061941146850586, -35.36822509765625, 91.03305053710938, -119.22588348388672, 57.90814971923828, 110.93183898925781, -7.4116973876953125, 46.42884063720703, -95.11498260498047, -11.749046325683594, 109.37162017822266, 4.226985931396484, -14.3720703125, 148.03118896484375, 104.92222595214844, 122.12590026855469, -184.67391967773438, -48.0811767578125, 28.507553100585938, 118.71078491210938, 169.4063720703125, 39.425106048583984, -58.15809631347656, 106.50484466552734, 154.5604248046875, -12.288543701171875, 68.12466430664062, 87.94864654541016, -7.9372711181640625, 99.4469985961914, 36.419097900390625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000246.npy"} +{"epoch": 0.37188208616780044, "step": 247, "batch_size": 64, "mean": 31.554338455200195, "std": 85.7979736328125, "min": -160.206298828125, "p10": -66.00669174194336, "median": 20.96255588531494, "p90": 130.45728149414063, "max": 298.39605712890625, "pos_frac": 0.59375, "sample": [-95.03330993652344, 121.64493560791016, 100.03135681152344, 47.82122039794922, 7.628986358642578, -24.209762573242188, 15.391632080078125, 119.7113037109375, 132.1813201904297, -11.388931274414062, 215.4120635986328, 52.42707824707031, 126.43452453613281, -10.691762924194336, 79.24803161621094, 262.59033203125, 53.374725341796875, 64.45858764648438, -26.66733741760254, -87.72686767578125, 298.39605712890625, 155.5249481201172, -138.94851684570312, -21.03484535217285, 39.66497802734375, -21.098114013671875, 66.95831298828125, -25.53546905517578, -43.912445068359375, 101.99117279052734, -31.966339111328125, -12.112701416015625, 72.941162109375, 31.3985595703125, 74.85629272460938, -4.1537933349609375, 96.39398193359375, -160.206298828125, -48.57843017578125, 7.125062942504883, -3.086780548095703, 58.778167724609375, -43.0986328125, 136.01373291015625, 18.76934814453125, 14.54025650024414, -18.052738189697266, -68.39730072021484, 15.420124053955078, 56.784820556640625, -10.425247192382812, 76.91673278808594, -57.19580078125, 23.155763626098633, -82.05021667480469, 110.18617248535156, 68.18472290039062, 73.01789093017578, -60.42860412597656, -73.41865539550781, -38.36358642578125, 139.5284881591797, 38.329742431640625, 64.02749633789062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000247.npy"} +{"epoch": 0.37339380196523053, "step": 248, "batch_size": 64, "mean": 49.87766647338867, "std": 92.13677978515625, "min": -188.01504516601562, "p10": -74.02083587646483, "median": 61.109012603759766, "p90": 154.62276458740237, "max": 248.86968994140625, "pos_frac": 0.734375, "sample": [16.549713134765625, 161.70346069335938, -78.21832275390625, -177.28787231445312, 150.45108032226562, 83.12640380859375, 142.41268920898438, 124.97769165039062, 16.56104850769043, -43.293312072753906, -8.929485321044922, 137.53985595703125, 64.65257263183594, -60.02198028564453, 70.12661743164062, 84.94989776611328, 248.86968994140625, -89.06481170654297, -27.160850524902344, 101.79011535644531, -6.527519226074219, 30.462181091308594, 75.412841796875, 70.21760559082031, 128.34999084472656, 110.17430114746094, 108.74703216552734, 10.061576843261719, 73.85551452636719, 156.41062927246094, -175.500732421875, 129.3932342529297, -10.470802307128906, -85.54368591308594, 141.05392456054688, 133.91986083984375, 32.36537170410156, 39.302879333496094, 61.91224670410156, 39.20021057128906, 51.12251281738281, 138.03887939453125, 79.71965026855469, 167.21701049804688, 192.73558044433594, 94.22956848144531, 60.30577850341797, -3.398670196533203, 26.195886611938477, 73.55362701416016, 195.02999877929688, -5.140666961669922, 171.80455017089844, 32.75422668457031, -64.22669982910156, 1.2775440216064453, -101.36541748046875, 146.61705017089844, 98.76836395263672, 34.216392517089844, 0.3536701202392578, 57.100101470947266, -188.01504516601562, -49.254241943359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000248.npy"} +{"epoch": 0.3749055177626606, "step": 249, "batch_size": 64, "mean": 15.575267791748047, "std": 88.95543670654297, "min": -200.1453094482422, "p10": -107.95203857421875, "median": 15.431601524353027, "p90": 124.88851623535159, "max": 233.41061401367188, "pos_frac": 0.65625, "sample": [-9.749748229980469, -99.21083068847656, 31.538055419921875, 127.90377807617188, 75.61463928222656, -83.49873352050781, -137.37554931640625, 68.05733489990234, 233.41061401367188, -0.65283203125, 80.88311004638672, 106.34588623046875, 17.057159423828125, 102.95155334472656, -159.543212890625, -39.93601989746094, 94.20169067382812, -103.29613494873047, -56.164825439453125, -109.94742584228516, 13.063661575317383, 1.761505126953125, -112.42316436767578, 15.42673110961914, 89.83448791503906, 13.854850769042969, 5.482950210571289, -28.435546875, 131.51473999023438, -51.246917724609375, -46.148460388183594, 133.16163635253906, 4.003782272338867, 74.88095092773438, 36.268192291259766, -167.54608154296875, 33.500648498535156, -61.34562683105469, 101.42992401123047, -139.6287841796875, 15.436471939086914, 148.22348022460938, 91.57550048828125, 74.89407348632812, 38.85343933105469, 7.007045745849609, -102.69596862792969, 130.45289611816406, 15.977027893066406, 29.713287353515625, 8.989229202270508, 111.41209411621094, 82.61624145507812, 4.513069152832031, 34.488441467285156, 68.33052062988281, -65.37918853759766, 115.39293670654297, 129.31924438476562, -200.1453094482422, 117.8529052734375, -29.58431053161621, -29.05810546875, 12.634063720703125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000249.npy"} +{"epoch": 0.3764172335600907, "step": 250, "batch_size": 64, "mean": 57.26470947265625, "std": 88.70121002197266, "min": -221.0893096923828, "p10": -19.491240882873534, "median": 48.109758377075195, "p90": 148.48563995361332, "max": 333.9278564453125, "pos_frac": 0.75, "sample": [66.38029479980469, -10.500818252563477, -14.06303596496582, 15.941339492797852, 64.89620971679688, 7.3809051513671875, 89.46127319335938, 186.3347625732422, 114.54644012451172, -7.537384033203125, 153.47787475585938, -85.3498764038086, 151.96536254882812, 21.475830078125, 133.02444458007812, 110.21923828125, -18.1754150390625, -221.0893096923828, 51.76849365234375, 76.18383026123047, 13.971799850463867, -20.055166244506836, 92.00064086914062, -100.31043243408203, 88.10724639892578, 91.76412963867188, 140.3662872314453, 93.96116638183594, -5.6632080078125, 120.33181762695312, -13.565479278564453, 102.94505310058594, 103.0247573852539, 11.468189239501953, 52.788482666015625, 23.91714096069336, 11.566539764404297, 128.4656524658203, 197.74441528320312, -23.68511962890625, -10.300346374511719, -3.879810333251953, 33.70502853393555, 2.0716400146484375, 230.29310607910156, 119.80244445800781, 278.5453186035156, 134.8721466064453, 34.24239730834961, 119.94181823730469, -46.83448791503906, -64.87100982666016, 139.3485870361328, 0.7463569641113281, -6.905853271484375, 42.073143005371094, 333.9278564453125, 89.72635650634766, 44.45102310180664, 1.0954704284667969, 4.674610137939453, 120.07869720458984, 53.227935791015625, 19.424880981445312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000250.npy"} +{"epoch": 0.3779289493575208, "step": 251, "batch_size": 64, "mean": 39.39385986328125, "std": 74.32669067382812, "min": -131.26136779785156, "p10": -62.34320220947265, "median": 32.70829963684082, "p90": 133.82658843994142, "max": 194.44561767578125, "pos_frac": 0.71875, "sample": [2.6018409729003906, 125.41820526123047, 137.05120849609375, 1.4682083129882812, 24.190704345703125, 95.4865493774414, -131.26136779785156, 23.93100357055664, 78.67192840576172, 182.7064208984375, -7.746086120605469, -71.62052154541016, 72.75773620605469, -32.74726104736328, 102.25193786621094, -8.115924835205078, 23.646595001220703, 194.44561767578125, 7.394903182983398, -96.04411315917969, -32.08930206298828, 167.390625, 7.435823440551758, -27.804153442382812, 21.22460174560547, -64.1783676147461, 49.09540557861328, 39.16796875, -44.79883575439453, -4.524566650390625, 124.28297424316406, 130.9325408935547, 28.54494857788086, 85.90886688232422, 135.06689453125, 36.882041931152344, 102.27388763427734, 44.525238037109375, -76.37374877929688, -2.7076339721679688, 32.4043083190918, 9.549957275390625, 122.12976837158203, 48.103458404541016, 42.771392822265625, 69.04452514648438, 101.15423583984375, 3.436920166015625, 160.7272186279297, 88.55294799804688, 180.08700561523438, 3.8924407958984375, 93.54176330566406, -64.92772674560547, 12.096885681152344, -5.318140029907227, 65.11405944824219, -117.842041015625, 33.012290954589844, -9.235435485839844, -58.06114959716797, 60.3685302734375, 118.64700317382812, 87.21379852294922], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000251.npy"} +{"epoch": 0.3794406651549509, "step": 252, "batch_size": 64, "mean": 35.724945068359375, "std": 101.17364501953125, "min": -215.81964111328125, "p10": -78.74767608642578, "median": 24.09097385406494, "p90": 146.96088256835944, "max": 405.42431640625, "pos_frac": 0.703125, "sample": [77.37325286865234, -95.93891906738281, -3.6836185455322266, -215.81964111328125, 84.99850463867188, -44.23230743408203, 47.43098831176758, 299.99212646484375, 1.410726547241211, 183.45120239257812, 107.6221694946289, 119.10113525390625, 54.579002380371094, 0.5837478637695312, 153.94924926757812, -32.16627502441406, 154.96636962890625, -55.03174591064453, 22.56291961669922, 0.24057579040527344, -78.81538391113281, 24.034311294555664, 81.19039916992188, 24.14763641357422, 26.73321533203125, 20.422531127929688, 13.66168212890625, -29.235610961914062, 120.83982849121094, 16.380714416503906, 70.97587585449219, 175.9673309326172, 104.82084655761719, 11.181777954101562, -13.509391784667969, -10.908027648925781, 125.80465698242188, 38.82952880859375, 37.653602600097656, -17.607318878173828, 67.98698425292969, -56.55651092529297, 3.8385009765625, 124.99447631835938, -40.63859558105469, 217.59963989257812, -124.53641510009766, 20.412567138671875, -174.01800537109375, -106.73208618164062, -39.8355712890625, 34.65814208984375, 91.2581558227539, -78.58969116210938, 21.968971252441406, 130.65469360351562, 31.18853759765625, 125.6277084350586, 2.514862060546875, 54.285430908203125, 405.42431640625, 32.0555419921875, -99.5819320678711, 38.45906066894531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000252.npy"} +{"epoch": 0.38095238095238093, "step": 253, "batch_size": 64, "mean": 41.101593017578125, "std": 73.30992889404297, "min": -111.92584228515625, "p10": -47.033260345458984, "median": 30.05757427215576, "p90": 135.90968017578126, "max": 266.6619567871094, "pos_frac": 0.765625, "sample": [115.50289154052734, 11.378387451171875, 154.86045837402344, 28.718040466308594, 0.9131240844726562, -2.11456298828125, 34.149322509765625, 145.36141967773438, 94.52640533447266, 58.99750518798828, 32.2476806640625, 2.2724990844726562, 123.80207824707031, 9.041709899902344, 2.011026382446289, -101.37521362304688, -8.845748901367188, 117.6502456665039, -44.46746826171875, 90.82623291015625, 95.30522155761719, 69.52821350097656, -92.22758483886719, 87.97808837890625, 31.39710807800293, -31.463409423828125, 125.87052917480469, 13.878677368164062, 47.059486389160156, 100.86103820800781, 54.935699462890625, 42.087059020996094, 130.38449096679688, 23.335987091064453, 49.63676452636719, 266.6619567871094, 13.335617065429688, -111.92584228515625, 77.41081237792969, 16.129724502563477, 34.772552490234375, -58.204612731933594, 74.55877685546875, -8.220041275024414, 160.85989379882812, -68.37798309326172, -45.86199188232422, 25.011627197265625, 149.4895477294922, 70.6334457397461, 19.98822021484375, 98.44793701171875, 170.2579345703125, 18.23279571533203, 112.321533203125, -79.43576049804688, -14.081222534179688, -36.93962097167969, 138.27761840820312, -47.53523254394531, 6.291770935058594, 6.39613151550293, 4.490581512451172, 23.492557525634766], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000253.npy"} +{"epoch": 0.382464096749811, "step": 254, "batch_size": 64, "mean": 45.05519485473633, "std": 73.46517944335938, "min": -106.64405059814453, "p10": -40.48672409057617, "median": 24.654330253601074, "p90": 142.4369369506836, "max": 210.6800537109375, "pos_frac": 0.71875, "sample": [0.858489990234375, -5.0634307861328125, 153.3951416015625, -13.27069091796875, 23.958755493164062, 106.19877624511719, 116.79496765136719, 135.7644500732422, 26.82895851135254, 1.8585681915283203, 13.6478271484375, 14.577709197998047, 42.53563690185547, 133.73724365234375, 135.76266479492188, 137.32110595703125, 172.97671508789062, 45.122703552246094, -39.53654479980469, 89.63546752929688, -13.104423522949219, 183.5701446533203, 6.370094299316406, 143.45648193359375, -64.58777618408203, -106.64405059814453, -63.79150390625, 142.71737670898438, 25.349905014038086, 11.489742279052734, 135.27102661132812, -20.447891235351562, -5.4251861572265625, -1.4514350891113281, -14.70205307006836, 30.926971435546875, -20.09957504272461, 96.74951171875, 11.434123992919922, 117.16252136230469, 33.65489196777344, -20.270915985107422, 11.116077423095703, -10.742416381835938, -40.893943786621094, 122.45255279541016, -44.17669677734375, 156.19537353515625, 124.152587890625, 19.10383415222168, 5.1621551513671875, -42.89771270751953, 8.706275939941406, 141.78257751464844, 131.94964599609375, 30.75548553466797, 63.26366424560547, 2.868724822998047, 64.88570404052734, 82.64373779296875, 2.8426361083984375, 210.6800537109375, 31.375946044921875, -88.42642974853516], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000254.npy"} +{"epoch": 0.3839758125472411, "step": 255, "batch_size": 64, "mean": 50.30660629272461, "std": 80.94517517089844, "min": -140.37315368652344, "p10": -28.804345703124998, "median": 35.281867027282715, "p90": 164.09116973876954, "max": 244.48861694335938, "pos_frac": 0.734375, "sample": [142.4705810546875, 5.92523193359375, 219.1217041015625, 244.48861694335938, 119.24676513671875, 136.59805297851562, -21.860679626464844, -22.8570556640625, -1.1689109802246094, 102.7177734375, -71.16348266601562, -11.997282028198242, 39.589481353759766, 99.50927734375, -55.27062225341797, 10.488861083984375, 70.22383880615234, -5.024452209472656, 38.59178161621094, -124.8826904296875, 130.13218688964844, -29.86687469482422, 42.12696838378906, 10.712165832519531, 201.8341064453125, 114.4395523071289, -54.8284912109375, 6.6317596435546875, 186.16671752929688, 7.7825927734375, 175.52438354492188, 94.49999237060547, -11.517982482910156, 26.01401138305664, 87.92807006835938, 31.971952438354492, 12.712175369262695, 125.67385864257812, 159.72463989257812, -1.3294925689697266, 8.847455978393555, 87.6959228515625, 88.72064208984375, -6.1903228759765625, 21.089082717895508, 4.573049545288086, 60.111961364746094, 192.35110473632812, -101.0009994506836, 94.95110321044922, 28.831817626953125, 61.74394226074219, -4.277320861816406, 165.96253967285156, 68.15766906738281, 120.25408172607422, 17.426586151123047, 48.75551986694336, 64.26895141601562, -140.37315368652344, 10.767349243164062, 24.35651397705078, 97.84535217285156, -26.325111389160156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000255.npy"} +{"epoch": 0.3854875283446712, "step": 256, "batch_size": 64, "mean": 49.77001953125, "std": 98.31653594970703, "min": -199.84439086914062, "p10": -48.72127990722656, "median": 47.49924087524414, "p90": 158.49270477294925, "max": 424.234375, "pos_frac": 0.625, "sample": [-72.28917694091797, -30.132186889648438, 96.10653686523438, 103.71754455566406, 117.78895568847656, -18.815383911132812, -8.972959518432617, 112.80586242675781, 53.233856201171875, -9.869283676147461, -100.0147476196289, 2.525453567504883, -5.242095947265625, 161.3705596923828, 53.267791748046875, 242.02590942382812, -23.898773193359375, -15.473876953125, -199.84439086914062, 90.88967895507812, 11.492259979248047, 203.47512817382812, -1.3294811248779297, 46.916778564453125, -63.93534851074219, -119.034912109375, 74.59970092773438, -17.02948760986328, 48.081703186035156, 113.66838836669922, -13.610931396484375, 46.01792907714844, 97.84491729736328, 129.19268798828125, 122.69900512695312, 164.62155151367188, 117.51509094238281, 121.3192367553711, 56.67059326171875, 151.7777099609375, 62.844261169433594, 73.54986572265625, 238.02081298828125, -8.995101928710938, 138.56283569335938, 13.404792785644531, 137.4700469970703, 33.935302734375, 51.691070556640625, 424.234375, -51.123046875, 91.92858123779297, -36.385009765625, -20.229782104492188, 133.65322875976562, -13.077686309814453, 18.370452880859375, -16.58692169189453, -13.979013442993164, 162.5078125, -112.45220184326172, 8.88568115234375, -43.117156982421875, 72.03638458251953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000256.npy"} +{"epoch": 0.3869992441421013, "step": 257, "batch_size": 64, "mean": 64.13785552978516, "std": 86.61418914794922, "min": -129.94232177734375, "p10": -17.64556198120117, "median": 52.18703842163086, "p90": 149.5365737915039, "max": 357.8067321777344, "pos_frac": 0.8125, "sample": [5.278163909912109, 65.02662658691406, -89.88584899902344, 357.8067321777344, 190.27670288085938, 48.2543830871582, 16.975862503051758, 147.20263671875, 54.20257568359375, 128.504150390625, 119.17572021484375, 26.51238250732422, 42.15184783935547, -13.967117309570312, 128.70156860351562, 117.07300567626953, 129.90838623046875, 16.257568359375, -82.29397583007812, 2.2699966430664062, 3.177888870239258, 97.99992370605469, 79.274169921875, 22.610118865966797, 123.17125701904297, -5.0526275634765625, 21.315288543701172, 129.7117919921875, 150.44522094726562, -2.0947418212890625, 121.46894836425781, 144.02679443359375, 98.37094116210938, 17.6278076171875, 62.220054626464844, 68.0761947631836, 59.926025390625, 147.41639709472656, 133.73898315429688, -102.85299682617188, -129.94232177734375, 323.6890869140625, 27.88465118408203, 1.1560592651367188, -16.06329345703125, -33.40826416015625, 50.17150115966797, 17.855880737304688, 117.4323501586914, 84.88690948486328, 133.5332489013672, 37.28163146972656, 166.27139282226562, 9.499500274658203, 39.55169677734375, 178.35557556152344, -9.19003677368164, -25.62786102294922, 157.0201873779297, 14.640853881835938, 76.84696960449219, 7.9863128662109375, 113.30558776855469, -18.32367706298828], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000257.npy"} +{"epoch": 0.3885109599395314, "step": 258, "batch_size": 64, "mean": 48.21418380737305, "std": 73.20758056640625, "min": -160.86138916015625, "p10": -7.668728828430176, "median": 44.38903617858887, "p90": 141.43824157714846, "max": 220.508056640625, "pos_frac": 0.84375, "sample": [77.61972045898438, 37.84965896606445, 57.80131530761719, 2.261880874633789, 32.15171813964844, 128.0550537109375, 187.2850341796875, 220.508056640625, 61.251731872558594, -7.545221328735352, 4.517072677612305, 76.22992706298828, -89.02861022949219, 20.107515335083008, 107.63282775878906, 172.87841796875, 53.270591735839844, -45.93576431274414, 61.926788330078125, -121.75932312011719, 47.4048957824707, 54.948272705078125, -115.79060363769531, 56.09545135498047, 16.402542114257812, 26.12688446044922, 29.7220458984375, 59.13249206542969, 18.322589874267578, 7.221534729003906, 61.766357421875, -7.721660614013672, 19.70092010498047, 80.12601470947266, 72.52998352050781, 59.786643981933594, -2.2554092407226562, 24.603866577148438, 109.4158935546875, 205.197021484375, 18.834190368652344, -75.86929321289062, 164.1419677734375, 90.34565734863281, 79.96163940429688, 143.89312744140625, 96.46847534179688, 102.61663818359375, 25.598167419433594, 129.99940490722656, 7.880668640136719, 34.48161315917969, 104.05725860595703, 5.00701904296875, 20.76397705078125, 3.734701156616211, 147.08172607421875, -160.86138916015625, -0.324859619140625, 4.48747444152832, 94.81937408447266, 41.37317657470703, 135.71017456054688, 11.692930221557617], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000258.npy"} +{"epoch": 0.3900226757369615, "step": 259, "batch_size": 64, "mean": 71.63987731933594, "std": 94.18498992919922, "min": -197.71731567382812, "p10": -33.80566177368164, "median": 85.9200439453125, "p90": 178.98960876464844, "max": 237.28367614746094, "pos_frac": 0.78125, "sample": [-29.878768920898438, -197.71731567382812, -85.06407165527344, 123.04510498046875, 118.772705078125, 119.5201416015625, 119.10163116455078, 237.28367614746094, 144.58261108398438, -120.2894287109375, 121.28141784667969, -33.165855407714844, 28.55961036682129, 232.658935546875, 103.77534484863281, -17.195327758789062, -34.079864501953125, 1.8890838623046875, 217.47366333007812, 113.3900146484375, 51.06942367553711, 139.34719848632812, 149.0022735595703, 128.60317993164062, 92.95360565185547, 0.5062980651855469, -104.38368225097656, 28.778961181640625, 12.171775817871094, 28.86119270324707, 135.34324645996094, 157.72207641601562, -14.506832122802734, 140.56991577148438, 109.06742095947266, 79.4949951171875, 164.85366821289062, -12.124847412109375, 176.76707458496094, 215.5186309814453, 161.48260498046875, 73.38058471679688, 8.406951904296875, 203.35354614257812, 8.895820617675781, -7.596160888671875, -49.2982177734375, -15.610601425170898, -112.37583923339844, 10.26540756225586, 179.94212341308594, 157.74014282226562, 185.60629272460938, 35.393157958984375, 52.39811706542969, 109.2407455444336, 92.3450927734375, 77.84877014160156, 16.759902954101562, 54.95325469970703, 176.40834045410156, 170.20193481445312, 110.36880493164062, 41.28247833251953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000259.npy"} +{"epoch": 0.3915343915343915, "step": 260, "batch_size": 64, "mean": 59.23049545288086, "std": 84.48526000976562, "min": -142.31871032714844, "p10": -36.32335586547851, "median": 55.41683578491211, "p90": 157.7618865966797, "max": 223.605224609375, "pos_frac": 0.71875, "sample": [143.81365966796875, -35.289756774902344, 133.20700073242188, -19.935405731201172, 19.734302520751953, 200.54733276367188, 121.74842834472656, 56.375267028808594, 154.0963897705078, -36.766326904296875, 156.59518432617188, 112.0172119140625, 123.438720703125, 88.29788208007812, 130.77777099609375, -39.098506927490234, 142.17828369140625, 102.20121765136719, 44.05622100830078, 153.8322296142578, 167.2286376953125, -19.03216552734375, 181.87185668945312, 146.50360107421875, -24.982177734375, 33.41313552856445, 125.70350646972656, 147.879638671875, -13.558998107910156, 18.133529663085938, 42.36576843261719, 54.458404541015625, 158.26190185546875, 70.3885726928711, -13.196508407592773, 73.07023620605469, 1.0533218383789062, -31.00653076171875, -142.31871032714844, 16.55065155029297, 7.183406829833984, 102.26127624511719, -16.54656410217285, 21.881195068359375, 15.296958923339844, 24.431503295898438, 162.33331298828125, -21.273887634277344, 143.91712951660156, 43.524566650390625, -49.90149688720703, 82.51905059814453, -8.744178771972656, 59.5869140625, 136.41424560546875, 131.76992797851562, 178.9517822265625, 109.01483154296875, 223.605224609375, -49.68024444580078, 2.146442413330078, -142.00735473632812, -105.03178405761719, -5.515403747558594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000260.npy"} +{"epoch": 0.3930461073318216, "step": 261, "batch_size": 64, "mean": 40.98463821411133, "std": 80.22522735595703, "min": -177.41140747070312, "p10": -52.265621948242185, "median": 34.662601470947266, "p90": 147.5194046020508, "max": 187.950439453125, "pos_frac": 0.65625, "sample": [1.7592658996582031, -32.441558837890625, 56.15155792236328, -24.193201065063477, -43.60746765136719, 83.85282897949219, -10.140848159790039, 132.5863494873047, 23.666778564453125, -3.350900650024414, -39.764915466308594, -54.52198791503906, -42.098480224609375, 166.95724487304688, 125.00830078125, -61.79048156738281, -2.6832351684570312, 186.88011169433594, 5.528696060180664, 168.33346557617188, 38.351043701171875, 37.41737365722656, -36.185916900634766, -177.41140747070312, 187.950439453125, 119.41817474365234, 160.96484375, 107.85978698730469, -40.889503479003906, 16.13987159729004, 61.30069351196289, 117.97496795654297, 129.91702270507812, 166.5753936767578, 66.56036376953125, 35.795921325683594, 149.6250762939453, -75.4078369140625, -41.84588623046875, 89.164794921875, 77.23363494873047, 18.7120418548584, 122.65039825439453, 101.67206573486328, 13.514583587646484, -53.92919921875, -6.4966888427734375, -64.723388671875, -114.04176330566406, 10.912578582763672, 45.44438171386719, -4.9577789306640625, 117.00466918945312, 142.60617065429688, 33.52928161621094, -48.383941650390625, 51.65873336791992, 18.684066772460938, -3.6210174560546875, 115.28855895996094, 45.394775390625, 134.92681884765625, 107.20626831054688, 13.324800491333008], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000261.npy"} +{"epoch": 0.3945578231292517, "step": 262, "batch_size": 64, "mean": 46.487091064453125, "std": 73.33285522460938, "min": -99.57328033447266, "p10": -39.849420166015626, "median": 32.27462959289551, "p90": 145.55225219726563, "max": 240.83631896972656, "pos_frac": 0.734375, "sample": [-39.34529113769531, 48.95130920410156, 11.950393676757812, -3.282480239868164, 63.75459289550781, 111.91688537597656, -24.541336059570312, 0.486053466796875, 83.68824005126953, 128.87413024902344, -24.676422119140625, 105.85371398925781, -56.2213134765625, 61.644187927246094, 152.680908203125, -68.00173950195312, 25.508607864379883, 101.83505249023438, 197.49795532226562, 96.42694091796875, -99.57328033447266, -14.116752624511719, 46.527626037597656, -54.68983459472656, 17.38982391357422, 131.7530517578125, -7.027587890625, 162.89279174804688, -22.912765502929688, 4.768617630004883, 120.89424896240234, 49.54741668701172, 21.638107299804688, 144.52310180664062, -62.87683868408203, 145.99331665039062, 1.7422733306884766, 25.919391632080078, 230.65054321289062, 44.95640563964844, -5.818517684936523, 20.10931396484375, -22.517723083496094, 97.84095764160156, 117.54833984375, 45.19227600097656, 80.43083190917969, 19.396425247192383, -43.05345153808594, 31.638675689697266, 52.99784851074219, 6.515907287597656, 88.18537902832031, -30.590044021606445, 22.51508140563965, 32.91058349609375, 40.932830810546875, 164.98757934570312, 17.615222930908203, 240.83631896972656, 74.68862915039062, 9.442985534667969, 90.4336929321289, -40.06547546386719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000262.npy"} +{"epoch": 0.3960695389266818, "step": 263, "batch_size": 64, "mean": 50.09217071533203, "std": 74.78292846679688, "min": -123.7565689086914, "p10": -32.83917026519775, "median": 51.28580856323242, "p90": 138.2490203857422, "max": 221.37295532226562, "pos_frac": 0.671875, "sample": [33.93940353393555, 132.45729064941406, -5.24859619140625, -35.74596405029297, -7.518426895141602, -30.231185913085938, 135.99087524414062, -3.1890411376953125, 133.4921875, 110.67488098144531, 135.83599853515625, 10.594770431518555, -17.16815948486328, 53.040348052978516, -21.145401000976562, -64.8430404663086, 132.0119171142578, -37.432952880859375, 67.27801513671875, -8.241844177246094, 104.14007568359375, 134.31256103515625, 110.949951171875, 52.85850143432617, 25.9931640625, 3.1759605407714844, 130.81735229492188, 80.57572937011719, 179.22906494140625, 90.70430755615234, 141.2822723388672, 80.85081481933594, -13.5185546875, 48.151458740234375, -8.777481079101562, 56.83319854736328, -115.21778869628906, 143.6659393310547, 107.18179321289062, 129.7371368408203, 52.234710693359375, -7.503349304199219, -33.63108825683594, 139.35714721679688, -105.88880920410156, 17.630142211914062, 50.47071838378906, 139.216796875, 65.51549530029297, 15.134788513183594, 33.88421630859375, 157.9843292236328, -18.64849853515625, 23.593158721923828, 221.37295532226562, -0.7859630584716797, 26.65180206298828, 126.12521362304688, 82.4056396484375, -30.991361618041992, -1.8069610595703125, -123.7565689086914, 127.73709106445312, 52.10089874267578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000263.npy"} +{"epoch": 0.3975812547241119, "step": 264, "batch_size": 64, "mean": 48.76706314086914, "std": 79.2889404296875, "min": -202.26039123535156, "p10": -39.85978317260742, "median": 48.49640655517578, "p90": 145.67537231445314, "max": 223.00787353515625, "pos_frac": 0.71875, "sample": [87.25149536132812, -69.45799255371094, 34.517250061035156, 23.496021270751953, 50.76140594482422, -26.211755752563477, -6.5553436279296875, 6.128822326660156, 122.71755981445312, 17.267684936523438, -94.35661315917969, -14.662311553955078, -202.26039123535156, 125.67266082763672, -2.408935546875, 86.75291442871094, 31.09930419921875, 99.88847351074219, 16.04438018798828, 153.1522674560547, 119.50111389160156, 118.96328735351562, 69.68276977539062, 106.05288696289062, 55.81549072265625, 149.96900939941406, -10.074281692504883, 47.34925842285156, 3.14520263671875, 137.92247009277344, 43.42478942871094, -40.685096740722656, 155.29080200195312, 19.354583740234375, 4.342174530029297, 73.35181427001953, 147.2960662841797, -15.011772155761719, 110.4395980834961, 119.30328369140625, -37.934051513671875, -10.025192260742188, 72.51691436767578, 208.12379455566406, 25.41912078857422, 118.61943054199219, -148.3218231201172, 70.47274780273438, -13.028228759765625, -12.956695556640625, -40.861846923828125, 27.509056091308594, 162.35086059570312, 42.14535140991211, 101.5645751953125, -56.441715240478516, 52.89661407470703, 49.6435546875, 97.75035095214844, -3.3232040405273438, 85.54702758789062, 223.00787353515625, 110.25340270996094, 141.8937530517578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000264.npy"} +{"epoch": 0.39909297052154197, "step": 265, "batch_size": 64, "mean": 54.456111907958984, "std": 82.28515625, "min": -107.93063354492188, "p10": -49.97450637817382, "median": 39.842018127441406, "p90": 163.78381958007813, "max": 225.1114501953125, "pos_frac": 0.765625, "sample": [6.5547943115234375, 0.9928646087646484, 14.323410034179688, -53.3250732421875, 10.323793411254883, -30.272789001464844, 92.64189910888672, 68.99994659423828, -104.24116516113281, 178.5768280029297, 19.442787170410156, 21.1475830078125, 99.49146270751953, 101.76295471191406, -107.93063354492188, -42.156517028808594, 105.0138168334961, 3.679931640625, 27.144262313842773, -107.11408996582031, 35.832855224609375, 225.1114501953125, 134.2783203125, 59.620819091796875, 3.816183090209961, -0.8359794616699219, 150.93035888671875, 11.697074890136719, 43.65760803222656, -4.742013931274414, 169.90533447265625, 182.34341430664062, -31.948944091796875, 144.21791076660156, 92.58180236816406, 174.93496704101562, 14.638227462768555, 20.411563873291016, -13.076385498046875, 2.2929534912109375, 93.1533432006836, 146.6924591064453, 128.54339599609375, 164.8607177734375, 145.14605712890625, 93.48086547851562, 148.56338500976562, 64.33841705322266, 79.13277435302734, -61.09246826171875, 34.80558776855469, 73.35406494140625, -15.72237777709961, 177.44027709960938, 161.27105712890625, 153.45494079589844, 151.94871520996094, -89.2073974609375, 36.02642822265625, -11.057353973388672, 97.04865264892578, 19.232933044433594, 65.3321762084961, -92.27899169921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000265.npy"} +{"epoch": 0.40060468631897206, "step": 266, "batch_size": 64, "mean": 48.454750061035156, "std": 85.87370300292969, "min": -237.03933715820312, "p10": -37.74685249328613, "median": 41.838993072509766, "p90": 150.34238891601564, "max": 248.86859130859375, "pos_frac": 0.75, "sample": [-38.8780517578125, 151.4490966796875, 189.89053344726562, 18.299531936645508, 62.93357849121094, -5.236377716064453, 27.63092803955078, -33.221466064453125, 7.875131607055664, 55.09529113769531, 61.07314682006836, 48.918426513671875, 13.114425659179688, 100.20237731933594, 106.99723052978516, 107.60549926757812, 19.524444580078125, 248.86859130859375, -35.10738754272461, 1.33868408203125, 138.35011291503906, 32.20521545410156, -4.66009521484375, 147.76007080078125, 85.15155792236328, 179.9928741455078, -21.694183349609375, 158.41885375976562, 87.82308197021484, -55.46110534667969, 137.33856201171875, 2.1260528564453125, 147.179931640625, -17.661697387695312, 40.962989807128906, -24.013832092285156, 87.0802001953125, 126.19740295410156, 156.4804229736328, -33.12446212768555, 7.49700927734375, -92.57032775878906, -41.21601867675781, -6.759273529052734, -237.03933715820312, 42.714996337890625, 8.142623901367188, 48.0294189453125, 67.35114288330078, 75.95203399658203, -132.83395385742188, 21.521085739135742, 28.731613159179688, 58.18011474609375, 141.4285888671875, 81.48377990722656, 238.73870849609375, -82.99986267089844, 87.9156265258789, 15.257789611816406, 134.45046997070312, 8.336294174194336, 138.3358917236328, 11.630016326904297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000266.npy"} +{"epoch": 0.4021164021164021, "step": 267, "batch_size": 64, "mean": 37.34331512451172, "std": 82.76095581054688, "min": -158.36399841308594, "p10": -81.84037933349609, "median": 29.376426696777344, "p90": 148.93453979492188, "max": 200.39524841308594, "pos_frac": 0.6875, "sample": [120.74069213867188, 4.2215118408203125, 3.4615135192871094, -125.19349670410156, 34.084564208984375, -48.747432708740234, 77.10800170898438, 162.67652893066406, 91.26213073730469, -5.861019134521484, -8.116506576538086, 58.06632995605469, 44.460018157958984, 179.78338623046875, 4.277812957763672, -97.04881286621094, 8.880966186523438, -77.98936462402344, 12.774917602539062, -124.15966796875, -66.66179656982422, -34.310585021972656, -34.008872985839844, 92.48085021972656, 104.27613830566406, 152.1763916015625, -10.810043334960938, 9.160884857177734, 200.39524841308594, -8.194450378417969, 56.0074462890625, -0.7068691253662109, 127.89769744873047, 128.02992248535156, -90.3555679321289, 15.710880279541016, 75.36320495605469, -83.49081420898438, -5.3568267822265625, 168.690673828125, 71.75779724121094, -2.488069534301758, 175.15866088867188, 11.72869873046875, 135.09152221679688, 127.87078857421875, 147.74978637695312, 39.339378356933594, -158.36399841308594, 149.44229125976562, 95.92425537109375, -94.93353271484375, 9.711742401123047, 24.668289184570312, 46.589866638183594, -3.1419715881347656, 88.07420349121094, 105.63463592529297, 134.82940673828125, 49.77989196777344, 2.2505149841308594, 70.09516906738281, 42.95580291748047, 9.27137565612793], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000267.npy"} +{"epoch": 0.4036281179138322, "step": 268, "batch_size": 64, "mean": 45.79230880737305, "std": 74.77279663085938, "min": -146.50169372558594, "p10": -18.320091247558594, "median": 22.92098045349121, "p90": 156.84528350830078, "max": 251.10836791992188, "pos_frac": 0.734375, "sample": [34.69517517089844, 157.55905151367188, -5.207221984863281, 2.893798828125, 85.25599670410156, 147.3216552734375, -3.132843017578125, -1.71417236328125, -33.923160552978516, 97.079833984375, 184.60501098632812, 9.013259887695312, 4.007268905639648, 65.72856140136719, 13.771759033203125, 20.766620635986328, 26.740922927856445, -18.104598999023438, 1.2779788970947266, 156.83230590820312, 119.46044921875, 149.06253051757812, -0.43802833557128906, 12.8841552734375, 29.47344207763672, 50.10443115234375, 19.0565185546875, 109.47695922851562, 48.376129150390625, -52.96095275878906, 154.4624786376953, -12.759525299072266, 6.274188995361328, 3.9118499755859375, -55.70530700683594, 178.85977172851562, -30.592567443847656, -15.629650115966797, -84.06216430664062, 26.804759979248047, 71.76597595214844, 14.962865829467773, 57.967933654785156, 171.3002471923828, 90.85404205322266, 86.78118896484375, 156.85084533691406, -13.41191291809082, 21.746734619140625, -146.50169372558594, -16.238433837890625, -9.252876281738281, 44.6240234375, 3.565990447998047, 251.10836791992188, 130.23233032226562, -18.412445068359375, 26.894927978515625, 126.4855728149414, 24.095226287841797, 6.343605041503906, 173.58103942871094, 72.2041015625, 1.6334953308105469], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000268.npy"} +{"epoch": 0.4051398337112623, "step": 269, "batch_size": 64, "mean": 53.730224609375, "std": 80.40789794921875, "min": -146.7100372314453, "p10": -21.393760681152337, "median": 33.320390701293945, "p90": 146.87196197509766, "max": 291.0481262207031, "pos_frac": 0.796875, "sample": [-10.54132080078125, -23.74407958984375, 12.368576049804688, 34.873538970947266, 113.86819458007812, -15.909683227539062, -37.05738830566406, 146.90797424316406, 4.3437347412109375, -14.720108032226562, -53.71904754638672, 22.486839294433594, 72.13040161132812, 104.24162292480469, 130.8306427001953, 112.91671752929688, 2.2829322814941406, 70.10074615478516, 214.44281005859375, -129.17300415039062, 25.124221801757812, 89.5750732421875, 122.49334716796875, 31.767242431640625, 21.615234375, 146.32261657714844, -1.4608020782470703, 206.2835693359375, 85.26852416992188, 150.93374633789062, 11.075551986694336, 21.53072738647461, 2.5434741973876953, 56.05320739746094, 37.42622375488281, 2.820669174194336, 1.9581413269042969, 30.764328002929688, 18.44451904296875, 146.78793334960938, 136.93551635742188, 97.86470031738281, 44.671409606933594, -0.4791069030761719, 73.45398712158203, -93.41786193847656, 113.9776840209961, 26.724166870117188, 11.948516845703125, 9.402572631835938, 291.0481262207031, 0.8776359558105469, 234.5404815673828, 44.95777893066406, -9.612602233886719, -146.7100372314453, -27.803466796875, 116.57037353515625, 84.51274108886719, 118.06672668457031, 100.8692398071289, 80.38833618164062, 148.03321838378906, 17.656524658203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000269.npy"} +{"epoch": 0.40665154950869237, "step": 270, "batch_size": 64, "mean": 45.27813720703125, "std": 71.67630767822266, "min": -140.67002868652344, "p10": -23.01998786926269, "median": 34.6138801574707, "p90": 135.40808563232423, "max": 226.65921020507812, "pos_frac": 0.765625, "sample": [111.4062728881836, 139.3276824951172, -4.613395690917969, 205.04592895507812, 28.54183578491211, 137.2877960205078, 16.605913162231445, 7.857166290283203, 131.0220947265625, 33.56785583496094, 167.9768524169922, 121.76168823242188, -3.9552154541015625, 113.53949737548828, 9.798049926757812, -83.53282165527344, 101.99034118652344, 115.37557220458984, 16.557682037353516, 76.95610046386719, 50.498291015625, 88.68531799316406, 2.2387161254882812, -25.283916473388672, 1.8467254638671875, 28.70294952392578, 125.0104751586914, -70.84185791015625, 60.15275192260742, -4.7041168212890625, 60.632049560546875, 80.81271362304688, 35.65990447998047, 71.36321258544922, 62.90925598144531, 9.025238037109375, 57.83794403076172, -4.958351135253906, 8.096370697021484, -140.67002868652344, 226.65921020507812, 147.2548828125, 98.57798767089844, 1.5061321258544922, 23.039932250976562, 47.348175048828125, 29.03460693359375, -42.28605651855469, 145.6853485107422, 21.320192337036133, 118.80899810791016, 7.72601318359375, -80.62290954589844, 120.95875549316406, -17.73748779296875, -15.501808166503906, -3.1774826049804688, -112.04570770263672, 93.203857421875, 8.872589111328125, 44.90440368652344, -11.50973892211914, 48.71796417236328, 57.531951904296875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000270.npy"} +{"epoch": 0.40816326530612246, "step": 271, "batch_size": 64, "mean": 39.6859130859375, "std": 93.87278747558594, "min": -133.32003784179688, "p10": -54.00547561645507, "median": 16.940227508544922, "p90": 149.09632720947266, "max": 392.64630126953125, "pos_frac": 0.671875, "sample": [92.70425415039062, 28.608909606933594, 13.292610168457031, -4.413505554199219, 163.3681640625, 148.8195037841797, -3.196939468383789, 22.352754592895508, 161.07861328125, -56.35394287109375, 94.521728515625, 6.004266738891602, 243.53460693359375, -47.77790832519531, 104.46060943603516, 249.5558319091797, 3.5109081268310547, 0.2662811279296875, 7.204107284545898, 183.0253143310547, 19.00518035888672, 2.346912384033203, 131.84432983398438, -9.22364616394043, -99.34490966796875, -5.496669769287109, -33.048980712890625, 87.84394073486328, 21.708938598632812, -26.817834854125977, 35.44518280029297, 94.87799072265625, 17.213729858398438, -133.32003784179688, 19.44549560546875, 137.06570434570312, 7.024299621582031, -0.6905746459960938, 135.4416046142578, 138.4482421875, 18.40093994140625, -63.25066375732422, 392.64630126953125, -48.525718688964844, -30.7398681640625, 36.393646240234375, -18.11003875732422, -119.80474090576172, -84.83334350585938, 16.666725158691406, 38.60593032836914, -22.661338806152344, 149.2149658203125, 135.80125427246094, 55.01106262207031, -4.35369873046875, -103.94708251953125, 99.86901092529297, 1.7962112426757812, 21.377498626708984, -47.427764892578125, 4.7671661376953125, 147.67784118652344, 14.9891357421875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000271.npy"} +{"epoch": 0.40967498110355255, "step": 272, "batch_size": 64, "mean": 63.5083122253418, "std": 71.85926818847656, "min": -141.87388610839844, "p10": -12.422914886474608, "median": 63.52162551879883, "p90": 155.58891906738285, "max": 230.0770263671875, "pos_frac": 0.828125, "sample": [37.274925231933594, -37.290626525878906, -24.050888061523438, 147.70155334472656, 95.26322174072266, 158.36874389648438, 37.144378662109375, 165.010498046875, 230.0770263671875, 12.795486450195312, 87.95487213134766, 103.84974670410156, 7.778388977050781, -1.0624542236328125, 90.44022369384766, 56.68626403808594, 87.1197509765625, 23.477943420410156, 137.24227905273438, 93.01812744140625, 140.8999481201172, 8.827564239501953, 107.7740478515625, 70.35698699951172, 189.2495574951172, -11.333549499511719, 52.326507568359375, -54.451637268066406, 24.737022399902344, 149.1026611328125, 80.23436737060547, 167.63511657714844, 143.90354919433594, -3.293783187866211, 121.02738189697266, 11.497295379638672, 14.737117767333984, 3.241802215576172, 53.452964782714844, -141.87388610839844, 102.40825653076172, 158.92974853515625, 139.74801635742188, 102.91210174560547, 10.36187744140625, 7.254783630371094, -12.889785766601562, 103.20536041259766, 80.77178955078125, 86.20551300048828, 53.23051452636719, 125.66165161132812, -50.579559326171875, -48.85456848144531, 13.313972473144531, 164.64700317382812, 116.8714370727539, 109.36293029785156, -5.845119476318359, 5.311939239501953, 0.1476268768310547, 3.4412689208984375, 141.24932861328125, 20.815126419067383], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000272.npy"} +{"epoch": 0.41118669690098264, "step": 273, "batch_size": 64, "mean": 50.846221923828125, "std": 97.86784362792969, "min": -232.51792907714844, "p10": -70.22895278930663, "median": 47.30039405822754, "p90": 153.4188461303711, "max": 377.052001953125, "pos_frac": 0.75, "sample": [1.6511726379394531, 95.67792510986328, 143.59246826171875, -74.90856170654297, 150.632080078125, 5.657798767089844, 60.012847900390625, 125.20036315917969, -132.82260131835938, 65.11984252929688, 117.81622314453125, -62.86857604980469, 64.72161865234375, -2.359670639038086, 21.918357849121094, 40.57905960083008, 115.45552825927734, 43.54909133911133, 129.4628143310547, 90.58551788330078, 115.78992462158203, 15.051700592041016, -1.765594482421875, -5.889446258544922, -232.51792907714844, 144.0391845703125, 79.56571960449219, 5.589393615722656, -84.85071563720703, 17.005207061767578, 66.62652587890625, 303.30645751953125, 161.02932739257812, -123.85342407226562, 9.834060668945312, 122.1005630493164, 48.984657287597656, 117.19345092773438, -2.0893173217773438, 97.32865905761719, -73.3833999633789, 36.89087677001953, 141.80484008789062, -43.597999572753906, 46.420169830322266, 377.052001953125, 160.39999389648438, 130.97210693359375, 48.18061828613281, 87.20474243164062, -4.163400650024414, 36.981651306152344, 1.489419937133789, -33.188812255859375, 21.007465362548828, 156.94747924804688, 0.3005523681640625, 52.36449432373047, 7.140918731689453, -38.505897521972656, 177.68341064453125, 154.61317443847656, -121.11286926269531, 79.50491333007812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000273.npy"} +{"epoch": 0.4126984126984127, "step": 274, "batch_size": 64, "mean": 51.99079895019531, "std": 86.860107421875, "min": -194.97177124023438, "p10": -45.248044586181635, "median": 49.91904067993164, "p90": 152.6188507080078, "max": 208.14064025878906, "pos_frac": 0.734375, "sample": [101.63819885253906, -27.92943572998047, -5.857006072998047, -18.451095581054688, -68.40216064453125, -149.87318420410156, 148.7147979736328, 152.63528442382812, 43.15984344482422, 1.5957012176513672, 125.56615447998047, 128.39553833007812, -21.728256225585938, -47.985565185546875, 55.707359313964844, 191.3938446044922, 150.1757354736328, -123.87251281738281, 14.97909927368164, 11.866401672363281, 208.14064025878906, 147.47805786132812, 43.16459274291992, 105.23430633544922, 113.32621765136719, 9.818092346191406, 148.96847534179688, 165.79856872558594, 54.451210021972656, 31.573150634765625, 9.973411560058594, 7.577629089355469, 124.07193756103516, -24.4833984375, 15.878133773803711, 51.65440368652344, -13.884757995605469, -31.247825622558594, 115.59530639648438, 38.50830078125, -66.70600891113281, 138.330078125, -5.10552978515625, 58.45158386230469, -38.860496520996094, 107.1725845336914, 17.697006225585938, -194.97177124023438, 65.30972290039062, 84.68183898925781, 152.58050537109375, 138.55844116210938, 15.272274017333984, 63.64643859863281, 57.256378173828125, -20.973512649536133, -82.51304626464844, 22.91293716430664, 173.86129760742188, 162.22671508789062, 103.45426940917969, 48.183677673339844, 194.60504150390625, 149.01560974121094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000274.npy"} +{"epoch": 0.41421012849584277, "step": 275, "batch_size": 64, "mean": 55.013675689697266, "std": 73.2053451538086, "min": -141.51071166992188, "p10": -35.12450275421142, "median": 49.09889221191406, "p90": 145.78184509277344, "max": 198.051513671875, "pos_frac": 0.796875, "sample": [49.03971862792969, 27.697885513305664, -56.22868347167969, 87.70287322998047, 129.15408325195312, 0.2894439697265625, 124.4969482421875, -84.7652359008789, 146.10159301757812, 86.014404296875, 28.797134399414062, 40.79307556152344, 3.0970993041992188, 145.0357666015625, -8.468303680419922, 30.438617706298828, 128.06631469726562, 12.593528747558594, 128.51649475097656, -41.02794647216797, -141.51071166992188, 177.5727081298828, -0.2945556640625, 29.163612365722656, 120.30968475341797, 7.3494415283203125, -36.65769577026367, 8.546501159667969, 59.09672164916992, 69.05598449707031, 198.051513671875, 187.5166015625, -2.0144882202148438, 53.681304931640625, 169.03668212890625, 104.25286865234375, 104.66905975341797, 21.113401412963867, 3.6964111328125, -20.428909301757812, 100.73214721679688, -2.0776290893554688, 68.78470611572266, 7.298881530761719, 14.99302864074707, 186.67794799804688, 49.15806579589844, 126.69921112060547, -75.96968078613281, 111.83174133300781, 20.19442367553711, 11.333030700683594, 12.210409164428711, 33.05915832519531, 148.8354034423828, 139.87686157226562, -52.00031661987305, 95.96967315673828, 80.46748352050781, 96.51431274414062, 96.47960662841797, 54.50559997558594, -31.54705238342285, 137.29739379882812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000275.npy"} +{"epoch": 0.41572184429327286, "step": 276, "batch_size": 64, "mean": 43.20240783691406, "std": 73.01844787597656, "min": -123.50225830078125, "p10": -35.37391738891601, "median": 34.49895668029785, "p90": 144.15117797851562, "max": 165.40777587890625, "pos_frac": 0.71875, "sample": [108.0811767578125, 8.497108459472656, 25.80388832092285, -7.712181091308594, -2.346811294555664, 21.909202575683594, 164.666748046875, -118.40206146240234, 79.17349243164062, 16.292844772338867, 12.994823455810547, 51.29138946533203, 33.241180419921875, 72.68701171875, -29.599136352539062, 139.0425262451172, 6.321300506591797, 2.3817901611328125, 36.76789855957031, -1.7626380920410156, 35.75673294067383, 140.66116333007812, 111.9911880493164, 165.40777587890625, -17.500350952148438, -91.6478271484375, 142.54806518554688, -24.175643920898438, 110.65946960449219, 146.8798828125, 11.776630401611328, -2.325164794921875, -27.69530487060547, -112.04154205322266, 132.16651916503906, -77.53746032714844, 144.83822631835938, 7.583213806152344, 90.34158325195312, 112.71908569335938, -123.50225830078125, 38.23817443847656, 156.1166229248047, -2.1461944580078125, -39.527435302734375, 78.333984375, 126.66404724121094, 82.74064636230469, 125.73918151855469, 37.487213134765625, -29.565162658691406, 63.00584411621094, 31.356727600097656, 69.3829345703125, 29.088699340820312, 155.13433837890625, 150.4239501953125, 94.56954956054688, -37.84882354736328, 23.413490295410156, 5.018754959106445, 77.31166076660156, 43.66939926147461, -9.886810302734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000276.npy"} +{"epoch": 0.41723356009070295, "step": 277, "batch_size": 64, "mean": 23.73241424560547, "std": 77.73023986816406, "min": -137.93450927734375, "p10": -60.33757629394531, "median": 8.832293510437012, "p90": 130.4497581481934, "max": 198.26284790039062, "pos_frac": 0.609375, "sample": [134.42884826660156, -60.06785583496094, 106.52708435058594, 6.238288879394531, 121.16521453857422, -106.78184509277344, 32.73141098022461, -2.8139495849609375, 135.62400817871094, 57.873138427734375, -65.64759063720703, -39.43671417236328, 101.55096435546875, 4.6912689208984375, -37.430564880371094, 61.30201721191406, 10.40949821472168, -22.95526123046875, -7.47369384765625, -42.978885650634766, 54.58311462402344, 69.65656280517578, 2.5287322998046875, 40.23097229003906, -20.699352264404297, -47.861968994140625, -45.79579162597656, -81.30647277832031, -60.45317077636719, 108.84199523925781, 0.4918098449707031, 77.52603912353516, -13.622880935668945, 198.26284790039062, 11.273860931396484, 90.09353637695312, -39.35132598876953, -137.93450927734375, 7.255088806152344, 34.445316314697266, -53.77619934082031, -55.07394027709961, 140.65521240234375, 116.9014892578125, 66.37379455566406, 5.130130767822266, -45.87999725341797, 97.9756851196289, -110.76935577392578, 180.0927276611328, 95.97078704833984, -26.102258682250977, 120.72500610351562, 62.689453125, 13.547378540039062, -1.6721954345703125, 116.57047271728516, -137.01986694335938, 153.86639404296875, 137.3494415283203, -27.41905975341797, 11.297798156738281, 4.429416656494141, 17.892501831054688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000277.npy"} +{"epoch": 0.41874527588813304, "step": 278, "batch_size": 64, "mean": 51.824073791503906, "std": 77.42402648925781, "min": -170.75692749023438, "p10": -22.812322425842282, "median": 58.45055389404297, "p90": 147.58020477294923, "max": 199.3203582763672, "pos_frac": 0.734375, "sample": [-20.403913497924805, -170.75692749023438, 102.64128112792969, 101.94429016113281, 10.352706909179688, -43.055702209472656, -147.59063720703125, 96.14520263671875, 185.39712524414062, 199.3203582763672, 44.26155090332031, -7.70361328125, 150.43753051757812, 150.27207946777344, 66.04723358154297, -20.182950973510742, 36.27333450317383, 102.1871337890625, 79.10386657714844, 128.92234802246094, 76.67634582519531, 37.611488342285156, 58.039093017578125, 17.946128845214844, -67.01451110839844, 156.736572265625, -23.844497680664062, 109.07939147949219, 136.69781494140625, 17.038467407226562, 9.379318237304688, 79.08690643310547, -7.86297607421875, -6.639606475830078, 84.43305969238281, 45.05376434326172, 61.467445373535156, 58.86201477050781, 113.10321044921875, 95.8564682006836, 141.29916381835938, 186.76290893554688, -2.6054420471191406, 124.22972106933594, 98.33199310302734, 45.142677307128906, -0.718505859375, 5.015386581420898, -63.71520233154297, 26.353134155273438, 77.96305847167969, 137.94049072265625, -142.5336151123047, -14.768453598022461, -10.720375061035156, 152.77920532226562, 68.991455078125, 128.89537048339844, 71.88887023925781, 29.412391662597656, 42.42084503173828, -0.2627906799316406, 97.04479217529297, 22.275466918945312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000278.npy"} +{"epoch": 0.42025699168556313, "step": 279, "batch_size": 64, "mean": 66.63082122802734, "std": 72.85562133789062, "min": -124.13832092285156, "p10": -10.401191329956044, "median": 71.50853729248047, "p90": 140.06930084228517, "max": 300.52166748046875, "pos_frac": 0.890625, "sample": [1.4248714447021484, 13.560161590576172, 148.2487335205078, 20.631149291992188, -105.34107971191406, 135.41102600097656, 34.407386779785156, -15.005226135253906, -76.44113159179688, 119.62113952636719, 55.17662811279297, 123.254150390625, 87.7484359741211, 125.46644592285156, 103.29734802246094, 300.52166748046875, 88.35696411132812, 133.80967712402344, -31.9327449798584, 0.3415565490722656, -124.13832092285156, 101.08123779296875, 100.40579986572266, 58.330421447753906, 20.47724151611328, 29.81207275390625, 33.939186096191406, 3.3789901733398438, 26.464744567871094, 115.41970825195312, 35.232147216796875, 117.26072692871094, 192.49545288085938, 78.62303924560547, -33.20404052734375, 131.395263671875, 75.47149658203125, 104.42080688476562, 18.30854034423828, 166.092041015625, 129.55511474609375, 21.274799346923828, 125.94818115234375, 23.983230590820312, 43.900787353515625, 62.86993408203125, 7.567020416259766, 19.32843017578125, 125.00558471679688, 149.24436950683594, 75.28842163085938, -35.00730895996094, 67.72865295410156, 142.06570434570312, 3.0678749084472656, 18.255096435546875, 110.66189575195312, 17.221939086914062, 124.66375732421875, 90.3441390991211, 120.33700561523438, 107.64694213867188, 177.33038330078125, 22.26679229736328], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000279.npy"} +{"epoch": 0.4217687074829932, "step": 280, "batch_size": 64, "mean": 74.03085327148438, "std": 74.29737091064453, "min": -36.64679718017578, "p10": -2.9567310333251937, "median": 63.25364685058594, "p90": 165.5797897338867, "max": 272.5832214355469, "pos_frac": 0.84375, "sample": [112.37501525878906, 191.87344360351562, 15.026937484741211, 139.61109924316406, -0.070465087890625, 163.6031036376953, 1.1998043060302734, -12.051513671875, 110.07271575927734, 144.10833740234375, 43.029396057128906, 56.38191223144531, 104.46443176269531, 105.0655517578125, 111.65835571289062, 128.1632080078125, 103.91254425048828, 37.76285171508789, 121.34794616699219, 29.650169372558594, 24.758346557617188, 10.188587188720703, 4.182720184326172, -3.5458717346191406, 215.87692260742188, 114.42637634277344, -0.8837699890136719, 93.21329498291016, 272.5832214355469, 262.07135009765625, 10.678363800048828, -7.5460205078125, 19.048477172851562, 105.59324645996094, 14.647014617919922, 141.75115966796875, 197.05445861816406, 18.946670532226562, 156.4896240234375, 166.42694091796875, -36.64679718017578, -29.810199737548828, 13.069679260253906, -33.98310089111328, 103.46484375, 70.12538146972656, 77.64633178710938, 2.3749237060546875, 42.3651123046875, 1.2218360900878906, 161.04490661621094, 78.17106628417969, 5.726810455322266, 130.97640991210938, 0.19455528259277344, -5.1573028564453125, 70.40180206298828, 41.78681945800781, 27.720834732055664, 39.94415283203125, -1.5820693969726562, 134.6446075439453, 139.73440551757812, 181.39378356933594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000280.npy"} +{"epoch": 0.42328042328042326, "step": 281, "batch_size": 64, "mean": 52.31721878051758, "std": 81.36494445800781, "min": -132.03431701660156, "p10": -39.28792266845703, "median": 39.505577087402344, "p90": 146.42080535888672, "max": 187.16651916503906, "pos_frac": 0.765625, "sample": [-34.34273147583008, 128.31317138671875, 8.956314086914062, 30.64520263671875, 124.90010070800781, 84.09780883789062, 68.69718170166016, 88.82244873046875, 186.78436279296875, 103.90066528320312, 26.17681121826172, 147.84805297851562, 113.70332336425781, 79.92571258544922, -107.66511535644531, 3.1544055938720703, 48.36595153808594, 132.48095703125, 6.901643753051758, 129.46401977539062, 123.67884826660156, -4.138660430908203, -37.90162658691406, 86.78411102294922, 142.67022705078125, 0.4843559265136719, 11.650627136230469, 6.099651336669922, 26.998817443847656, -132.03431701660156, 5.629142761230469, 9.255973815917969, -8.260498046875, 19.713876724243164, 164.1364288330078, -94.46479797363281, 109.7696533203125, 187.16651916503906, 10.737823486328125, -113.16358947753906, 150.941650390625, 150.63197326660156, 166.29071044921875, 89.54513549804688, 143.09056091308594, 25.06163787841797, 138.11837768554688, 18.424434661865234, 120.97439575195312, 92.10122680664062, -39.882049560546875, 24.7823486328125, 134.4547119140625, -110.40493774414062, -6.249835968017578, 137.81520080566406, 103.43692016601562, -59.464508056640625, 139.1661834716797, 129.142333984375, -37.188106536865234, -17.420839309692383, 6.128366470336914, -37.13677978515625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000281.npy"} +{"epoch": 0.42479213907785335, "step": 282, "batch_size": 64, "mean": 50.54930877685547, "std": 89.01667785644531, "min": -112.96331787109375, "p10": -68.97111053466796, "median": 47.82140922546387, "p90": 150.67366943359377, "max": 246.94845581054688, "pos_frac": 0.6875, "sample": [17.173194885253906, -41.81475067138672, -81.5206069946289, 4.9460296630859375, 27.170303344726562, 130.9830780029297, 79.25306701660156, 19.075170516967773, 2.8090171813964844, 152.67138671875, 139.3955078125, 146.0123291015625, 9.182504653930664, 131.41439819335938, 203.80206298828125, 57.741912841796875, 153.18881225585938, 72.39051818847656, -9.73114013671875, 100.3509521484375, 36.9481201171875, 79.3046875, -2.572366714477539, 237.95111083984375, 3.890970230102539, -33.09577178955078, -70.74600219726562, 120.58494567871094, 143.0646209716797, 46.570491790771484, -18.26376724243164, -63.80833435058594, -112.96331787109375, 138.35791015625, -5.808832168579102, -75.48719787597656, 144.397705078125, 8.979730606079102, -108.94326782226562, 65.12632751464844, 140.35494995117188, 122.50882720947266, -25.153182983398438, 84.8612060546875, 144.1721649169922, 246.94845581054688, 94.01240539550781, 106.79657745361328, -34.29960632324219, 96.63357543945312, -64.82969665527344, -13.818450927734375, -53.18434143066406, 8.194286346435547, -87.1645736694336, 197.7130889892578, 157.4185333251953, 112.27182006835938, -88.18795776367188, -49.36151123046875, 49.07232666015625, 93.40233612060547, 132.48110961914062, 16.332069396972656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000282.npy"} +{"epoch": 0.42630385487528344, "step": 283, "batch_size": 64, "mean": 64.23751831054688, "std": 115.67066192626953, "min": -152.89453125, "p10": -51.35517272949218, "median": 49.42268753051758, "p90": 154.94285125732424, "max": 527.3154907226562, "pos_frac": 0.78125, "sample": [193.8938446044922, -71.90841674804688, -104.01698303222656, -62.18841552734375, 107.82301330566406, 64.4039306640625, 16.52727508544922, 35.633506774902344, 122.6041030883789, -0.6893119812011719, -10.55645751953125, 77.21585083007812, 31.160842895507812, 77.70799255371094, 117.14984130859375, 22.57040023803711, 66.4951171875, 151.03884887695312, -54.03010559082031, 244.1866455078125, 126.82411193847656, 64.87771606445312, 121.38465118408203, 17.797950744628906, 94.58721923828125, 152.14822387695312, 100.1394271850586, 2.4800796508789062, 202.09957885742188, 36.186546325683594, 31.369102478027344, 125.68077087402344, -106.60762023925781, 47.327857971191406, -45.11366271972656, 156.88140869140625, -135.14993286132812, -36.822479248046875, 94.18452453613281, 127.97917175292969, 153.31407165527344, -36.72888946533203, 110.62838745117188, 140.36924743652344, 23.625194549560547, 3.7994956970214844, 73.08769989013672, -152.89453125, 3.526304244995117, 75.56179809570312, 122.9134750366211, 10.80276107788086, 518.9971923828125, -4.470134735107422, 527.3154907226562, 71.43620300292969, 8.30078125, 32.57529067993164, 6.752069473266602, 49.75654602050781, -44.88114929199219, 155.64089965820312, 9.408126831054688, 49.088829040527344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000283.npy"} +{"epoch": 0.42781557067271353, "step": 284, "batch_size": 64, "mean": 50.47001266479492, "std": 76.65125274658203, "min": -132.99368286132812, "p10": -29.330748748779296, "median": 33.68660926818848, "p90": 152.21505584716797, "max": 214.8868408203125, "pos_frac": 0.734375, "sample": [-32.841064453125, 10.529930114746094, 24.744352340698242, 24.6400146484375, 214.8868408203125, 96.269287109375, 12.091909408569336, -4.1688079833984375, 211.8555450439453, 19.43138885498047, -4.505680084228516, 19.682842254638672, -6.4628143310546875, -41.28239440917969, 132.58798217773438, -29.129371643066406, 7.462751388549805, 89.57672882080078, 3.6058197021484375, 86.97491455078125, 33.258331298828125, 63.31013488769531, 27.864181518554688, -34.14057922363281, -46.71227264404297, 36.86955261230469, 51.42522430419922, -29.41705322265625, 65.55938720703125, -15.541500091552734, 130.15835571289062, 109.5824203491211, 46.15168762207031, 62.91044616699219, 23.492996215820312, -0.01519012451171875, 147.7028045654297, 151.24118041992188, 181.42636108398438, 34.91138458251953, 209.45025634765625, 197.35670471191406, -1.3503570556640625, 37.08272171020508, -4.649751663208008, 0.32767295837402344, 77.186767578125, 34.11488723754883, 125.87354278564453, 116.92933654785156, 6.311016082763672, 106.99177551269531, 142.05276489257812, 6.177314758300781, 29.172744750976562, 152.63243103027344, 202.99105834960938, 66.23846435546875, 36.39091873168945, -132.99368286132812, 86.26432037353516, -132.97018432617188, -5.861778259277344, -1.6261730194091797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000284.npy"} +{"epoch": 0.4293272864701436, "step": 285, "batch_size": 64, "mean": 44.26246643066406, "std": 78.71546936035156, "min": -140.04067993164062, "p10": -43.477692413330075, "median": 33.903560638427734, "p90": 166.6105484008789, "max": 204.70516967773438, "pos_frac": 0.671875, "sample": [42.264930725097656, 18.441810607910156, -41.1407470703125, 115.42557525634766, 26.2904052734375, -91.93386840820312, -105.46332550048828, -4.637519836425781, 80.21049499511719, -10.252866744995117, 88.73175048828125, 135.61737060546875, 124.3718032836914, 36.441734313964844, 188.5118865966797, -32.96382141113281, 95.0131607055664, 141.1302490234375, 2.5026893615722656, -0.20986557006835938, -11.057624816894531, 16.285892486572266, -61.951236724853516, -57.208187103271484, 7.8703765869140625, -140.04067993164062, -43.756500244140625, 66.12055969238281, 132.22332763671875, -32.117950439453125, -17.013580322265625, 172.40541076660156, -21.40142059326172, 171.95260620117188, 122.60784912109375, 59.78262710571289, 34.13066101074219, 75.83805847167969, 33.67646026611328, 103.14813232421875, 41.5286865234375, 37.707855224609375, -12.339683532714844, 162.3206024169922, 187.57794189453125, 19.82538604736328, 123.51472473144531, 204.70516967773438, -45.070159912109375, 1.2070503234863281, 39.400489807128906, 121.98954772949219, 23.75967025756836, 94.80059814453125, -2.8066253662109375, 35.3739013671875, 172.98870849609375, 168.4490966796875, -42.82714080810547, 23.557334899902344, 7.72613525390625, -7.2752227783203125, -26.10626220703125, 82.94366455078125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000285.npy"} +{"epoch": 0.4308390022675737, "step": 286, "batch_size": 64, "mean": 72.19083404541016, "std": 86.62715911865234, "min": -192.19810485839844, "p10": -22.747727584838866, "median": 58.01033973693848, "p90": 190.69710235595707, "max": 314.2630310058594, "pos_frac": 0.828125, "sample": [107.19166564941406, -2.6406917572021484, 0.9466724395751953, 36.908714294433594, 57.59339904785156, 98.057373046875, 20.488357543945312, 5.103708267211914, 28.838144302368164, -44.44639587402344, 101.88946533203125, 9.662139892578125, 57.37896728515625, 129.0819549560547, 151.7289581298828, 201.69619750976562, 144.19992065429688, 63.10279083251953, -23.648193359375, 42.09491729736328, 66.90180206298828, -19.544204711914062, -2.808277130126953, 179.44595336914062, -32.01359939575195, -39.78791809082031, 105.90068054199219, 89.23432922363281, 18.757980346679688, 112.91960144042969, 159.9197998046875, 77.02809143066406, 129.6721954345703, 182.61351013183594, 54.228355407714844, 314.2630310058594, 61.598567962646484, 128.01443481445312, -58.07040786743164, 200.2986297607422, 194.1614990234375, 238.82843017578125, 27.9151611328125, -44.06690979003906, 97.88874053955078, 29.412673950195312, 19.285409927368164, 118.32839965820312, 134.9925537109375, 148.61463928222656, 230.5965576171875, 49.642601013183594, 58.42728042602539, 9.970464706420898, -20.64664077758789, 49.32942199707031, 168.4625244140625, 15.59027099609375, -192.19810485839844, 210.3870086669922, 56.62816619873047, 17.46072769165039, 80.89543151855469, 6.506378173828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000286.npy"} +{"epoch": 0.4323507180650038, "step": 287, "batch_size": 64, "mean": 61.681575775146484, "std": 88.73485565185547, "min": -329.9035949707031, "p10": -16.295737457275386, "median": 59.9429931640625, "p90": 171.5631820678711, "max": 215.4415283203125, "pos_frac": 0.796875, "sample": [68.83061981201172, -33.60572814941406, 24.655975341796875, 117.93438720703125, 207.782470703125, 179.87741088867188, 46.34588623046875, 76.06140899658203, -6.039543151855469, -24.898635864257812, 72.6366958618164, 4.736690521240234, 74.88033294677734, 118.49156188964844, 194.27525329589844, 180.80319213867188, 177.08795166015625, 65.35813903808594, 143.4635467529297, 110.07000732421875, 69.8501205444336, -329.9035949707031, 118.49609375, 10.277402877807617, 28.414775848388672, 107.8208999633789, -12.523971557617188, -46.13013458251953, 117.85979461669922, 13.128711700439453, 92.92765045166016, 54.52784729003906, -126.53079986572266, 10.526817321777344, -1.8798351287841797, 129.0025634765625, 3.9107131958007812, 10.75186538696289, -2.7161636352539062, 133.2125701904297, 99.28545379638672, -17.912208557128906, 158.5379638671875, 174.0633544921875, 7.704399108886719, 18.1649169921875, 165.7294464111328, 12.669368743896484, 153.02197265625, 138.56167602539062, -4.508434295654297, 51.909454345703125, 140.0923309326172, 14.190862655639648, 16.317249298095703, 28.07532501220703, 143.67112731933594, 13.076148986816406, 92.14893341064453, -19.605979919433594, -2.1019973754882812, 215.4415283203125, 155.47848510742188, 13.838302612304688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000287.npy"} +{"epoch": 0.43386243386243384, "step": 288, "batch_size": 64, "mean": 49.44476318359375, "std": 85.84101104736328, "min": -140.23577880859375, "p10": -55.50985260009765, "median": 41.13745880126953, "p90": 156.500065612793, "max": 241.21673583984375, "pos_frac": 0.71875, "sample": [74.33070373535156, 0.8685531616210938, -124.68048095703125, -49.60498046875, 106.13626861572266, 12.721324920654297, 26.87098503112793, -30.092315673828125, 87.10389709472656, 78.24677276611328, 33.002506256103516, 82.40209197998047, 87.64688873291016, 132.57928466796875, -31.00531005859375, 96.28573608398438, -67.87857055664062, 73.70613098144531, -22.933074951171875, 108.89630126953125, -85.83394622802734, 115.84358215332031, 239.21261596679688, 141.745361328125, 3.289398193359375, 217.02105712890625, 66.52008819580078, 90.74370574951172, 7.796257019042969, 15.743860244750977, 161.3035125732422, -58.04051208496094, 101.09234619140625, 23.377376556396484, 146.4506072998047, 68.24868774414062, 37.91435241699219, -2.7580718994140625, 7.102634429931641, -36.054908752441406, -104.86173248291016, -140.23577880859375, 189.22943115234375, -19.11316680908203, 19.279747009277344, -6.8650665283203125, 90.2568588256836, -89.1390380859375, 112.88490295410156, 126.38861083984375, 91.57162475585938, -14.817916870117188, 241.21673583984375, 44.360565185546875, -26.77483367919922, 4.8986968994140625, 121.091064453125, 34.81659698486328, -4.32111930847168, 160.80697631835938, 20.92283058166504, 90.30561828613281, 219.6975860595703, 67.54464721679688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000288.npy"} +{"epoch": 0.43537414965986393, "step": 289, "batch_size": 64, "mean": 49.082454681396484, "std": 93.54439544677734, "min": -166.89068603515625, "p10": -40.30742073059082, "median": 31.683273315429688, "p90": 160.1910385131836, "max": 391.924072265625, "pos_frac": 0.65625, "sample": [43.288848876953125, -6.6043548583984375, -144.79505920410156, -32.523094177246094, -29.935462951660156, -17.94830322265625, -9.441097259521484, 31.263702392578125, 98.75621032714844, 59.24474334716797, 0.7861785888671875, 41.86744689941406, 73.75148010253906, 145.39169311523438, 44.60954284667969, 57.871826171875, 280.5439758300781, -79.69696044921875, 12.807979583740234, 143.0995330810547, -9.516731262207031, 187.725341796875, 32.10284423828125, -21.982177734375, 170.41073608398438, 14.803321838378906, 137.09445190429688, 12.430889129638672, 76.77581787109375, 149.1600799560547, -29.738296508789062, 45.60516357421875, -40.34053421020508, -166.89068603515625, 181.63929748535156, -13.83913803100586, 15.322992324829102, 391.924072265625, 11.256423950195312, 17.848472595214844, 5.742643356323242, 76.68001556396484, -42.02960968017578, 67.70706176757812, 39.994529724121094, -6.214502334594727, 22.766841888427734, -4.696910858154297, -41.10615539550781, -54.65761947631836, 134.77133178710938, 76.72909545898438, -5.569183349609375, 100.41398620605469, 161.39068603515625, 143.7537078857422, 177.9451904296875, 138.0660858154297, 157.39186096191406, -1.53680419921875, 40.5019416809082, -40.23015594482422, 150.14768981933594, -30.815876007080078], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000289.npy"} +{"epoch": 0.436885865457294, "step": 290, "batch_size": 64, "mean": 56.224632263183594, "std": 85.86273193359375, "min": -143.0043182373047, "p10": -29.943990325927732, "median": 38.693843841552734, "p90": 168.37927093505863, "max": 280.6770324707031, "pos_frac": 0.71875, "sample": [92.51823425292969, 42.25757598876953, 147.01446533203125, 280.6770324707031, 41.99375915527344, 13.598546981811523, -35.2725830078125, -4.917139053344727, 11.374481201171875, 190.64495849609375, 71.99201965332031, 77.25581359863281, -30.631393432617188, -121.45926666259766, 52.362945556640625, -15.361351013183594, 27.74681854248047, -28.340049743652344, -2.6142539978027344, 58.54553985595703, 30.582731246948242, -6.005287170410156, 142.030517578125, 175.34555053710938, -11.817306518554688, 10.748771667480469, 47.47762680053711, -10.646453857421875, 5.932744979858398, -71.15087890625, 172.67266845703125, 140.7431182861328, 132.2501983642578, 29.882259368896484, -3.175140380859375, 216.78945922851562, 120.3834457397461, 110.20684814453125, 69.18769073486328, -76.31304931640625, 149.3135528564453, 119.73454284667969, 15.070369720458984, -143.0043182373047, 4.987937927246094, -15.120559692382812, -2.6338367462158203, 187.1055908203125, 153.89822387695312, 158.36134338378906, 49.01155471801758, 142.12548828125, -12.676605224609375, 11.826858520507812, -88.40557861328125, 210.36935424804688, 21.631568908691406, 147.60552978515625, 109.2906494140625, 35.39392852783203, 22.736568450927734, 135.08868408203125, 79.56227111816406, 12.591590881347656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000290.npy"} +{"epoch": 0.4383975812547241, "step": 291, "batch_size": 64, "mean": 65.0680923461914, "std": 94.27042388916016, "min": -149.51351928710938, "p10": -41.19237213134765, "median": 42.23838424682617, "p90": 176.71834259033204, "max": 395.5391845703125, "pos_frac": 0.734375, "sample": [246.96786499023438, 152.5561981201172, -12.867767333984375, -45.46327209472656, 216.5675506591797, 35.78227233886719, -39.399391174316406, 10.11398696899414, 118.60829162597656, 2.6578903198242188, 139.96475219726562, 90.25334930419922, -19.635013580322266, -13.563592910766602, 99.41117858886719, 157.55630493164062, -79.17217254638672, -28.92005157470703, 193.680908203125, 98.5992660522461, -21.329666137695312, 33.120201110839844, 71.61729431152344, 46.77104949951172, 94.84135437011719, 144.2428741455078, 205.710205078125, 14.096321105957031, 194.73858642578125, 4.358184814453125, 79.56644439697266, 177.3212127685547, -6.193248748779297, 28.098724365234375, -61.03857421875, -16.36659049987793, 111.11149597167969, -13.025924682617188, 151.77137756347656, 15.067710876464844, 43.78827667236328, 175.3116455078125, -149.51351928710938, 26.09003448486328, 172.87774658203125, 141.31727600097656, 67.9215087890625, 137.03460693359375, 114.48912811279297, 395.5391845703125, -58.145816802978516, 24.120437622070312, -37.375545501708984, 39.47583770751953, -53.99396514892578, 4.965145111083984, 35.339378356933594, 147.15426635742188, 40.68849182128906, 129.79830932617188, 100.837890625, 111.45052337646484, 18.970306396484375, -41.960792541503906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000291.npy"} +{"epoch": 0.4399092970521542, "step": 292, "batch_size": 64, "mean": 51.952178955078125, "std": 91.63832092285156, "min": -139.25125122070312, "p10": -65.98717193603515, "median": 38.75035095214844, "p90": 165.1946563720704, "max": 358.4529724121094, "pos_frac": 0.734375, "sample": [-101.45213317871094, 15.893119812011719, 126.99214935302734, -2.3217105865478516, 176.30874633789062, 84.54341888427734, 34.0884895324707, 358.4529724121094, 111.0911636352539, -95.96485900878906, -21.10845947265625, 6.47480583190918, 36.240814208984375, 55.519126892089844, -76.77940368652344, -5.407928466796875, 70.16192626953125, 92.32484436035156, 133.22848510742188, 79.33795166015625, 41.2598876953125, 202.1771697998047, 144.74163818359375, -139.25125122070312, -49.7222900390625, 13.071861267089844, 0.7518348693847656, 135.7188720703125, -66.82817077636719, 24.617109298706055, 48.25121307373047, 117.51304626464844, 11.439180374145508, -17.142501831054688, 33.590660095214844, -63.98204803466797, 138.97622680664062, -121.76535034179688, 0.6535720825195312, -1.1020584106445312, -64.02484130859375, 136.44149780273438, 129.275634765625, -13.381006240844727, 180.04698181152344, 116.341552734375, 29.213348388671875, 133.21421813964844, 34.73879623413086, 22.064682006835938, 217.52825927734375, 77.69632720947266, 14.020593643188477, 143.2379150390625, 44.25833511352539, 26.996238708496094, 89.12004089355469, 41.98866653442383, 132.93704223632812, 54.46051788330078, 173.96023559570312, -70.18569946289062, -33.22007751464844, 177.61822509765625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000292.npy"} +{"epoch": 0.4414210128495843, "step": 293, "batch_size": 64, "mean": 71.62772369384766, "std": 97.91943359375, "min": -165.1810760498047, "p10": -42.742181777954094, "median": 70.17009735107422, "p90": 169.98149719238285, "max": 348.12115478515625, "pos_frac": 0.8125, "sample": [31.728404998779297, 7.401885986328125, 31.393421173095703, 152.84854125976562, 97.78964233398438, 35.512508392333984, 1.0151252746582031, 11.74405288696289, 90.8433609008789, 31.942291259765625, 158.41294860839844, 6.227418899536133, 163.36300659179688, 126.3333969116211, 88.98323059082031, 16.52552032470703, 142.00425720214844, 104.71238708496094, 79.03584289550781, 129.225341796875, 110.09284973144531, -85.73968505859375, -20.26604461669922, -105.36991882324219, 135.30316162109375, 15.775604248046875, 128.58197021484375, -72.48390197753906, 42.07964324951172, 343.6218566894531, 7.373546600341797, 224.66424560546875, 244.37014770507812, -82.52326965332031, 63.769073486328125, 9.124378204345703, 149.23223876953125, -165.1810760498047, 14.425411224365234, -6.037208557128906, 65.17829895019531, 190.2803192138672, -37.174564361572266, -48.49031066894531, -24.90020751953125, 21.966285705566406, 75.16189575195312, 136.2857666015625, 348.12115478515625, 160.94471740722656, 162.27769470214844, 83.02883911132812, 94.66875457763672, 3.190074920654297, 147.61676025390625, -45.12830352783203, 130.76905822753906, 92.4610595703125, 61.13780212402344, 156.90618896484375, 14.3447265625, -26.720890045166016, 172.8179931640625, 191.57553100585938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000293.npy"} +{"epoch": 0.4429327286470144, "step": 294, "batch_size": 64, "mean": 50.03569793701172, "std": 82.72636413574219, "min": -140.69720458984375, "p10": -23.82930068969726, "median": 30.75413990020752, "p90": 149.69373474121093, "max": 428.3789367675781, "pos_frac": 0.71875, "sample": [159.20872497558594, 43.86656188964844, 57.50614929199219, 104.35586547851562, 147.9402618408203, 77.67252349853516, 155.31658935546875, -29.95916748046875, -5.105781555175781, 16.421607971191406, 64.63897705078125, -140.69720458984375, -2.1925792694091797, 43.579071044921875, 31.63816261291504, 133.83084106445312, 145.423095703125, 17.609657287597656, 147.11131286621094, 51.35002899169922, -17.197784423828125, 56.245033264160156, -0.04029655456542969, 90.73784637451172, -15.792678833007812, 167.44110107421875, -5.947063446044922, 89.5216064453125, -1.12896728515625, 428.3789367675781, 29.8701171875, 2.325580596923828, 4.370325088500977, 205.79666137695312, 49.58741760253906, -4.5714569091796875, 7.1314239501953125, -39.222740173339844, -4.001617431640625, 19.68121337890625, -17.401100158691406, 41.9896240234375, 150.07705688476562, 4.389232635498047, 11.915426254272461, 76.09783172607422, 114.58386993408203, 46.48219680786133, 6.419429779052734, 148.79931640625, -12.776752471923828, 84.81329345703125, 192.21517944335938, 13.453056335449219, 4.10505485534668, 76.91827392578125, -26.584243774414062, -39.99259948730469, 6.6455535888671875, 23.138885498046875, 76.73320770263672, -52.0124397277832, 38.07658004760742, -48.50056457519531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000294.npy"} +{"epoch": 0.4444444444444444, "step": 295, "batch_size": 64, "mean": 48.536399841308594, "std": 78.06909942626953, "min": -133.546875, "p10": -31.624574279785154, "median": 30.575824737548828, "p90": 150.9400390625, "max": 217.20196533203125, "pos_frac": 0.734375, "sample": [187.6041259765625, 31.711868286132812, -17.552993774414062, -33.397979736328125, 97.54796600341797, 120.16189575195312, 126.6614761352539, 26.675296783447266, 8.923431396484375, 61.054595947265625, 197.22427368164062, 38.33036422729492, 217.20196533203125, 29.439781188964844, 8.97244644165039, 36.836708068847656, 3.13330078125, 59.85747528076172, -9.201522827148438, 118.95822143554688, 152.4076385498047, -103.28195190429688, 0.5821952819824219, 25.384916305541992, 211.3697509765625, 91.95343017578125, 7.391984939575195, -27.02503204345703, 107.914306640625, -14.081748962402344, -33.17485046386719, -20.767803192138672, 164.72059631347656, 151.4733428955078, 11.349044799804688, -102.72846221923828, 110.6024169921875, 8.89914321899414, 73.85737609863281, 149.69566345214844, 93.382568359375, -133.546875, -13.2686767578125, 95.8466796875, 116.85676574707031, 70.86072540283203, -44.36357116699219, -19.660362243652344, -92.69723510742188, 101.76519775390625, -4.285627365112305, 11.105215072631836, 13.720001220703125, 13.999481201171875, 81.41006469726562, 6.444023132324219, 99.6053466796875, -28.00726318359375, 63.89606475830078, 29.160018920898438, 124.67134857177734, 119.88079833984375, 139.0775146484375, -16.207149505615234], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000295.npy"} +{"epoch": 0.4459561602418745, "step": 296, "batch_size": 64, "mean": 60.04237365722656, "std": 96.9103775024414, "min": -175.51766967773438, "p10": -69.96699218749998, "median": 54.349557876586914, "p90": 187.56978912353517, "max": 267.1705322265625, "pos_frac": 0.703125, "sample": [47.34922790527344, 258.42095947265625, 121.79827880859375, -2.359376907348633, 8.211189270019531, 89.688232421875, -2.6199283599853516, 267.1705322265625, -6.668903350830078, 110.59965515136719, 54.07851791381836, 84.77742004394531, -76.87315368652344, 139.8656768798828, -2.5265731811523438, 91.54130554199219, 180.76417541503906, 99.37149810791016, 14.08349609375, 76.41131591796875, -1.24847412109375, 30.60501480102539, 44.16357421875, -112.22061157226562, -128.77316284179688, -53.85261535644531, 18.490325927734375, 67.37828826904297, 40.59680938720703, 44.11073303222656, 54.62059783935547, -88.76261901855469, 95.932373046875, 22.750457763671875, 257.2815856933594, 9.770858764648438, 6.897254943847656, -49.45505142211914, 136.6648406982422, -99.34684753417969, 67.08690643310547, 75.48574829101562, 190.48648071289062, -175.51766967773438, 210.82054138183594, 237.4942626953125, 130.11978149414062, 127.77640533447266, 121.72697448730469, 107.71825408935547, 140.40696716308594, 21.011001586914062, 143.76829528808594, -6.257463455200195, 214.96981811523438, -2.78106689453125, -12.9488525390625, 177.223388671875, -15.19879150390625, -80.93913269042969, -12.30780029296875, 111.57518005371094, 140.05319213867188, 82.25240325927734], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000296.npy"} +{"epoch": 0.4474678760393046, "step": 297, "batch_size": 64, "mean": 61.6826171875, "std": 66.61918640136719, "min": -98.70687866210938, "p10": -19.516017150878906, "median": 51.02478790283203, "p90": 150.5565643310547, "max": 176.0115509033203, "pos_frac": 0.796875, "sample": [137.36264038085938, -45.100563049316406, 153.37893676757812, 88.26558685302734, 37.614288330078125, 63.15687561035156, 35.22899627685547, 144.0661163330078, -20.022811889648438, -98.70687866210938, 113.78578186035156, 51.28428649902344, 19.335407257080078, 24.477291107177734, -1.168710708618164, 155.0975341796875, 35.81745910644531, 108.34478759765625, 50.765289306640625, 34.78282165527344, 89.69917297363281, 62.24855041503906, -41.92405700683594, -67.37810516357422, -18.33349609375, 146.47622680664062, 72.5447769165039, -2.9439849853515625, 121.11332702636719, 20.883338928222656, 151.9346466064453, 165.53221130371094, -25.334091186523438, 13.64552116394043, 45.30949783325195, 33.51500701904297, -6.2923736572265625, -31.588775634765625, 97.73970031738281, 150.28195190429688, 145.58221435546875, -11.690010070800781, 116.91017150878906, 20.531343460083008, 30.35071563720703, 107.23871612548828, 103.15709686279297, 1.0844087600708008, 5.6380157470703125, 123.88187408447266, 27.65579605102539, 86.39888000488281, 109.48759460449219, 129.6755828857422, 11.417682647705078, 102.7108154296875, 176.0115509033203, 150.67425537109375, -1.15887451171875, 175.9693603515625, 18.662757873535156, 47.87607955932617, 137.67398071289062, 67.05345153808594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000297.npy"} +{"epoch": 0.4489795918367347, "step": 298, "batch_size": 64, "mean": 45.52552032470703, "std": 81.97398376464844, "min": -152.5054931640625, "p10": -56.13061752319336, "median": 36.2421875, "p90": 148.75076751708986, "max": 265.751708984375, "pos_frac": 0.75, "sample": [50.074073791503906, -107.64610290527344, 164.34396362304688, 28.464874267578125, 160.63140869140625, 59.56712341308594, 141.53761291503906, 18.241180419921875, -60.36151123046875, 114.59676361083984, 150.00929260253906, 121.40843963623047, -51.66754150390625, -57.874969482421875, -19.34613800048828, 29.931873321533203, 125.22189331054688, -42.61731719970703, 134.90687561035156, 88.77877807617188, -7.862857818603516, 14.05343246459961, 144.67330932617188, 79.20571899414062, 36.65765380859375, 26.401641845703125, -106.03681945800781, 50.26844024658203, 130.01023864746094, 152.99359130859375, 8.948675155639648, 157.44000244140625, 20.050704956054688, 51.072044372558594, 29.446918487548828, 130.96847534179688, 11.62592887878418, 141.67857360839844, 6.591363906860352, 6.975547790527344, 58.362754821777344, -52.060462951660156, -47.766639709472656, 33.69816589355469, 23.33972930908203, 265.751708984375, 122.27340698242188, -38.65138244628906, -13.41354751586914, 35.82672119140625, 93.72100830078125, 66.43826293945312, -57.931488037109375, -33.27326965332031, 2.7717132568359375, 131.27618408203125, -91.22776794433594, 41.14521789550781, 41.512786865234375, -152.5054931640625, 156.29464721679688, 46.96614456176758, 1.9074325561523438, 145.814208984375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000298.npy"} +{"epoch": 0.4504913076341648, "step": 299, "batch_size": 64, "mean": 39.53830337524414, "std": 95.00577545166016, "min": -147.179931640625, "p10": -66.18877143859862, "median": 25.295761108398438, "p90": 138.6361831665039, "max": 398.5435791015625, "pos_frac": 0.65625, "sample": [69.431396484375, 88.653564453125, -30.951383590698242, -15.940528869628906, 3.556253433227539, 114.47265625, 45.86988830566406, 94.31520080566406, 200.85009765625, -10.635244369506836, -80.22821807861328, 88.34822082519531, -147.179931640625, 5.257667541503906, -57.17246627807617, -19.768775939941406, 24.768768310546875, 136.64268493652344, 80.15008544921875, -98.08551788330078, 110.64535522460938, -41.421443939208984, 42.7205696105957, 76.69573974609375, -72.23966979980469, -102.74983978271484, -5.068824768066406, 17.969852447509766, 350.94189453125, 4.952232360839844, 188.92251586914062, -19.97046661376953, 48.273826599121094, -49.04646682739258, 9.07952880859375, 65.0283203125, 25.82275390625, 149.8646697998047, 30.9140625, 0.00267791748046875, 37.195281982421875, -46.303916931152344, 123.3448715209961, 56.31549835205078, 398.5435791015625, -51.788490295410156, 19.763113021850586, 118.80123138427734, 72.76388549804688, 77.77261352539062, 6.448793411254883, 84.87876892089844, 77.2787857055664, -92.47270202636719, -17.09674072265625, 139.49053955078125, -10.7635498046875, 86.417236328125, 1.0668315887451172, 42.46940612792969, -5.9235687255859375, -11.343467712402344, -70.05290222167969, 169.9546661376953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000299.npy"} +{"epoch": 0.4520030234315949, "step": 300, "batch_size": 64, "mean": 50.885990142822266, "std": 85.1492691040039, "min": -138.8787078857422, "p10": -67.51495056152343, "median": 46.955081939697266, "p90": 173.5278549194336, "max": 227.70323181152344, "pos_frac": 0.75, "sample": [28.186071395874023, 125.37980651855469, 12.767501831054688, 24.303115844726562, -3.553699493408203, 12.269546508789062, 108.77176666259766, 39.25146484375, -52.96107482910156, -121.59977722167969, 187.65065002441406, 112.03832244873047, 123.64459991455078, 212.0109100341797, 170.8567657470703, 82.40062713623047, 178.80352783203125, -81.359619140625, 14.568885803222656, 72.19176483154297, 7.05925178527832, 46.052284240722656, 47.857879638671875, -3.8436660766601562, -69.47158813476562, 56.6627197265625, 106.05616760253906, 20.29381561279297, 67.80065155029297, 53.317325592041016, 7.497249603271484, -138.8787078857422, 83.23495483398438, -5.073772430419922, 106.09326171875, 57.27677536010742, 23.34712791442871, 227.70323181152344, 131.49314880371094, 174.672607421875, 190.002685546875, 52.94113540649414, -19.31982421875, 215.72557067871094, 72.85291290283203, -62.949462890625, 9.976016998291016, 107.55111694335938, -84.17874145507812, 71.74981689453125, 113.16571044921875, -0.3894386291503906, 45.89166259765625, 63.644737243652344, -16.221473693847656, -92.5633773803711, 16.374282836914062, 158.8060760498047, 94.61849975585938, -111.17142486572266, 1.9024734497070312, 36.363487243652344, 160.1234130859375, -12.964126586914062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000300.npy"} +{"epoch": 0.45351473922902497, "step": 301, "batch_size": 64, "mean": 41.7857551574707, "std": 89.09607696533203, "min": -131.447265625, "p10": -64.34572067260741, "median": 29.2493896484375, "p90": 144.31641235351563, "max": 395.5388488769531, "pos_frac": 0.703125, "sample": [119.75685119628906, 64.7499771118164, 80.2496109008789, 19.402515411376953, 1.3470458984375, -49.725311279296875, 134.98455810546875, 140.17730712890625, 44.85661315917969, 59.16687774658203, 11.022851943969727, 21.957666397094727, 59.55564880371094, -33.12508010864258, -105.7045669555664, 85.78164672851562, 35.959014892578125, 123.0941162109375, 123.35318756103516, 15.024219512939453, -40.382476806640625, 61.301334381103516, 146.088134765625, -25.26526641845703, 74.62286376953125, 15.486221313476562, -12.168882369995117, 144.72003173828125, 71.72563171386719, -70.61161041259766, -72.35798645019531, 4.967201232910156, 57.19990158081055, 217.61996459960938, -3.995330810546875, 8.471412658691406, -98.91168212890625, 395.5388488769531, -32.322509765625, 13.976070404052734, -39.92157745361328, 108.40359497070312, 25.98609161376953, -107.15934753417969, 32.51268768310547, 121.47261047363281, 132.4567108154297, 52.308258056640625, -5.435455322265625, 162.52032470703125, 56.65843963623047, 2.86102294921875, 115.49234008789062, -30.02133560180664, -102.91853332519531, 5.324893951416016, 164.5140380859375, 148.50076293945312, 37.21630859375, -18.865760803222656, 6.96856689453125, -131.447265625, 143.3746337890625, -14.100227355957031], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000301.npy"} +{"epoch": 0.455026455026455, "step": 302, "batch_size": 64, "mean": 31.421663284301758, "std": 82.98118591308594, "min": -142.64796447753906, "p10": -79.08861007690427, "median": 23.872801780700684, "p90": 137.56866302490235, "max": 268.7679443359375, "pos_frac": 0.640625, "sample": [117.83453369140625, 207.10574340820312, -14.627288818359375, 268.7679443359375, 37.92265319824219, -3.014852523803711, 136.30319213867188, -60.03376007080078, 179.02142333984375, 26.6845703125, 57.94563293457031, 31.48369598388672, -2.6875362396240234, 4.337680816650391, 31.149234771728516, 132.52801513671875, 4.079132080078125, 5.8301239013671875, -20.60387420654297, 109.83735656738281, 33.282161712646484, -91.9511489868164, -142.64796447753906, 59.74632263183594, -19.685562133789062, -38.115562438964844, 71.97135925292969, 51.00897979736328, 20.280345916748047, -33.04768371582031, -29.350608825683594, 138.1110076904297, 119.35586547851562, 58.66376495361328, -19.015233993530273, 153.64349365234375, 175.8092041015625, -41.68894958496094, -21.8653564453125, 39.63893508911133, 78.09538269042969, 119.45159149169922, 8.891361236572266, 18.714279174804688, 99.29281616210938, 70.50624084472656, -126.0953369140625, -36.99205780029297, -17.617935180664062, 17.03174591064453, 94.34326934814453, 89.66209411621094, 1.6168594360351562, 23.52923583984375, 32.61484146118164, -87.25497436523438, -41.42622375488281, 145.46884155273438, -99.86944580078125, -100.0445556640625, -112.91217041015625, -2.055419921875, 77.81272888183594, 24.216367721557617], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000302.npy"} +{"epoch": 0.4565381708238851, "step": 303, "batch_size": 64, "mean": 60.011985778808594, "std": 96.16905975341797, "min": -124.29476928710938, "p10": -79.85811462402343, "median": 82.21207427978516, "p90": 162.3596969604492, "max": 333.20965576171875, "pos_frac": 0.71875, "sample": [17.523605346679688, 89.90477752685547, 148.84085083007812, 5.290155410766602, 167.77325439453125, 90.18702697753906, 90.61109924316406, -93.62071990966797, 137.90423583984375, 89.17097473144531, 160.68870544433594, -48.39745330810547, 102.82077026367188, -9.22899055480957, -96.64266967773438, -106.1185531616211, -86.25450134277344, 191.53903198242188, -124.29476928710938, -64.93321228027344, 0.8945636749267578, 42.644775390625, 333.20965576171875, 130.69403076171875, -121.25813293457031, 75.253173828125, 141.67149353027344, -29.434791564941406, -9.738235473632812, -39.36517333984375, 93.07792663574219, 32.69169616699219, 11.562095642089844, 19.037567138671875, -25.369407653808594, -13.62295150756836, 163.07583618164062, 5.383644104003906, 120.58290100097656, 93.48951721191406, 25.928543090820312, 131.11102294921875, 116.44641876220703, 137.04989624023438, 7.822822570800781, 150.00282287597656, 195.788330078125, 56.368988037109375, 104.88496398925781, 159.6239013671875, -95.99620056152344, -3.2615833282470703, -59.07295608520508, 224.91506958007812, 150.8115997314453, 95.47749328613281, 41.34007263183594, 134.48635864257812, 96.89801025390625, 168.4165496826172, 145.834716796875, -44.63145446777344, 56.27447509765625, 157.0031280517578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000303.npy"} +{"epoch": 0.4580498866213152, "step": 304, "batch_size": 64, "mean": 62.22607421875, "std": 76.59178161621094, "min": -111.92120361328125, "p10": -38.80787734985351, "median": 56.62826347351074, "p90": 167.2241195678711, "max": 212.08407592773438, "pos_frac": 0.75, "sample": [147.9782257080078, 19.73467254638672, 173.95294189453125, 166.94056701660156, 31.673690795898438, 56.9244270324707, 61.06095886230469, 45.60900115966797, -54.519317626953125, 44.523441314697266, -12.489471435546875, 151.60125732421875, 139.74087524414062, 40.05494689941406, 142.73362731933594, 40.08695983886719, -5.627246856689453, 108.23126220703125, 157.25082397460938, -111.92120361328125, 42.395904541015625, 115.54847717285156, 13.70892333984375, 30.902130126953125, -41.10356903076172, 0.0380706787109375, -56.70287322998047, 43.33124923706055, 65.60652160644531, 167.34564208984375, 118.8572006225586, -3.965456008911133, 202.64633178710938, 107.72708892822266, 56.33209991455078, -33.451263427734375, 59.68462371826172, 107.56100463867188, 94.26660919189453, -7.22900390625, -6.732461929321289, -43.79786682128906, 190.681640625, 139.0599365234375, 0.7607002258300781, 0.990203857421875, 57.643890380859375, 65.64493560791016, 106.54204559326172, 159.33486938476562, -42.61505126953125, -5.026390075683594, 212.08407592773438, -18.019428253173828, 110.93582916259766, -54.69467544555664, 28.799049377441406, 173.80606079101562, 183.92039489746094, 37.34455871582031, 62.52619934082031, -32.757625579833984, 121.04873657226562, 107.94908142089844], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000304.npy"} +{"epoch": 0.4595616024187453, "step": 305, "batch_size": 64, "mean": 47.7900276184082, "std": 92.92848205566406, "min": -206.5340576171875, "p10": -69.72310409545898, "median": 52.73374938964844, "p90": 145.2412811279297, "max": 362.70648193359375, "pos_frac": 0.75, "sample": [140.0016326904297, 5.516838073730469, 60.34129333496094, -80.6049575805664, 5.3531646728515625, 3.049732208251953, 179.27178955078125, -66.84864044189453, 76.99510192871094, 172.01931762695312, 1.1324043273925781, 220.54425048828125, -92.48318481445312, 126.12796020507812, -95.12907409667969, -48.928504943847656, 31.79668426513672, 3.031177520751953, -70.95501708984375, 75.79962158203125, 30.047115325927734, 75.18474578857422, 36.95146942138672, 62.48997497558594, 76.63922882080078, 93.19853210449219, 50.709197998046875, 35.42021179199219, 120.91751098632812, -206.5340576171875, -20.4068603515625, 100.52953338623047, 130.10792541503906, 138.639892578125, -27.746246337890625, 23.907865524291992, 152.02249145507812, -64.8062744140625, 71.18179321289062, 54.75830078125, 0.5993881225585938, 71.92816162109375, 362.70648193359375, 155.24957275390625, 106.13932037353516, -22.959644317626953, 23.837661743164062, -80.07560729980469, 134.6616668701172, 98.32997131347656, 113.7063980102539, 39.546287536621094, 36.62871170043945, 117.63299560546875, 70.76399230957031, 142.90875244140625, -65.20559692382812, 35.182830810546875, -131.55027770996094, 146.24093627929688, -2.2017269134521484, 57.871543884277344, 126.90873718261719, -59.532623291015625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000305.npy"} +{"epoch": 0.46107331821617537, "step": 306, "batch_size": 64, "mean": 53.01700210571289, "std": 90.66442108154297, "min": -141.2439727783203, "p10": -22.903686714172363, "median": 35.06906700134277, "p90": 145.36186981201172, "max": 467.36248779296875, "pos_frac": 0.8125, "sample": [136.8734893798828, 106.16863250732422, 0.3167400360107422, 163.8807373046875, 5.997772216796875, 177.2242431640625, 59.24235534667969, 51.97622299194336, 41.89636993408203, 102.50530242919922, 177.68429565429688, 115.11935424804688, -82.56812286376953, 34.76372146606445, 0.8161354064941406, -127.05018615722656, 117.87571716308594, 145.51080322265625, 16.298099517822266, -0.051143646240234375, -8.461631774902344, 72.35346221923828, 124.1533432006836, 8.270788192749023, 11.616209030151367, 35.374412536621094, -23.058401107788086, 106.47215270996094, 21.209815979003906, 15.750732421875, -74.05662536621094, 145.0143585205078, -0.39478302001953125, 6.502595901489258, 57.323341369628906, 64.1216049194336, 173.03602600097656, 467.36248779296875, 29.370834350585938, 24.385101318359375, 51.62322235107422, 22.4586124420166, 2.509632110595703, 46.48900604248047, -39.385337829589844, 19.371337890625, 3.4376392364501953, 30.0224609375, 28.540130615234375, 52.80564880371094, 74.4966049194336, 98.4834976196289, -141.2439727783203, 3.32794189453125, 104.0235595703125, -16.538585662841797, 48.855934143066406, -22.542686462402344, 90.6838607788086, 274.8438415527344, -68.67485809326172, 92.46591186523438, 135.41273498535156, 0.7957305908203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000306.npy"} +{"epoch": 0.46258503401360546, "step": 307, "batch_size": 64, "mean": 29.929576873779297, "std": 90.49095153808594, "min": -176.2705535888672, "p10": -76.31387252807617, "median": 17.845256805419922, "p90": 154.4036361694336, "max": 264.37762451171875, "pos_frac": 0.53125, "sample": [96.59027862548828, -30.45789337158203, 45.8238525390625, -77.99986267089844, -82.82920837402344, 152.42515563964844, 264.37762451171875, -58.599281311035156, 177.59039306640625, 101.83741760253906, -21.504776000976562, 50.90141296386719, 26.52090835571289, 173.501953125, -176.2705535888672, -23.198257446289062, 96.81327819824219, -7.072479248046875, 142.9479522705078, -72.49580383300781, 74.0115966796875, -11.786073684692383, -32.62660217285156, -56.05889892578125, 60.90782165527344, -66.9184341430664, -25.413589477539062, 80.62250518798828, 116.56370544433594, -0.16656112670898438, -42.311866760253906, 61.01374816894531, 9.169605255126953, -29.236492156982422, -92.27391815185547, 155.25155639648438, -66.46998596191406, -18.59009552001953, 42.85197448730469, -64.62663269042969, 9.031803131103516, -34.624656677246094, -14.667915344238281, 178.35719299316406, 149.72193908691406, -70.22520446777344, 54.34951400756836, 149.98965454101562, 172.53610229492188, 40.62506866455078, -101.46356201171875, 70.39472198486328, 81.27735900878906, 92.20846557617188, 109.98651123046875, 85.5439224243164, -38.604469299316406, 169.8727569580078, -77.95018768310547, -23.02483367919922, -108.77456665039062, 49.45403289794922, 99.6212387084961, -0.9575614929199219], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000307.npy"} +{"epoch": 0.46409674981103555, "step": 308, "batch_size": 64, "mean": 54.86457824707031, "std": 100.05693817138672, "min": -195.01385498046875, "p10": -71.05446548461913, "median": 37.09834671020508, "p90": 164.5246002197266, "max": 417.73486328125, "pos_frac": 0.71875, "sample": [156.43463134765625, 110.20137786865234, 87.97853088378906, -13.070377349853516, 149.7068634033203, 94.53750610351562, -25.13355255126953, -131.7650146484375, 20.58721923828125, -64.9111099243164, 167.99172973632812, 210.75564575195312, 192.78749084472656, 36.05154037475586, -3.632232666015625, -73.68733215332031, 19.35462188720703, 132.6083984375, 153.96986389160156, 56.76552963256836, 149.99224853515625, -23.735729217529297, 175.49642944335938, 94.53224182128906, 96.3392562866211, 117.29055786132812, 125.637451171875, 26.47785186767578, 106.89154052734375, -83.48239135742188, 230.25234985351562, 25.31999969482422, 36.966590881347656, -38.388858795166016, 79.29345703125, -3.929595947265625, 131.49960327148438, -19.853591918945312, 37.2301025390625, -195.01385498046875, 24.02447509765625, 16.85704803466797, 85.56407165527344, 153.6380157470703, 34.389183044433594, 75.25857543945312, 75.18688201904297, -77.59523010253906, 114.76005554199219, 29.38235855102539, 149.05494689941406, 20.424896240234375, -38.82939147949219, 70.97333526611328, 417.73486328125, 14.633628845214844, 188.26541137695312, -114.07073974609375, -50.157867431640625, 24.757429122924805, 18.67983055114746, -104.35069274902344, 41.55104064941406, -5.146429061889648], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000308.npy"} +{"epoch": 0.4656084656084656, "step": 309, "batch_size": 64, "mean": 49.8757209777832, "std": 78.40351867675781, "min": -198.09930419921875, "p10": -39.3169937133789, "median": 45.73637771606445, "p90": 152.50174865722659, "max": 218.82325744628906, "pos_frac": 0.734375, "sample": [106.20091247558594, 182.54269409179688, 23.176513671875, 147.4957733154297, 36.609336853027344, 1.4322891235351562, 116.49395751953125, -42.42633056640625, 85.82638549804688, 9.096565246582031, 80.50625610351562, -2.991575241088867, -61.72224807739258, -15.290487289428711, 38.79547119140625, -96.71197509765625, 26.907941818237305, 149.98910522460938, -20.31800079345703, 12.404874801635742, 10.92156982421875, 139.58413696289062, 153.5623321533203, 2.7893505096435547, 55.304351806640625, 73.35855102539062, -18.959640502929688, 31.047117233276367, 77.86283111572266, -17.702316284179688, -6.55815315246582, -32.06187438964844, -17.904373168945312, 150.0270538330078, 72.41847229003906, 39.80697250366211, 58.87351989746094, 23.596134185791016, 6.191549301147461, 119.03087615966797, -24.485000610351562, 175.5968017578125, 142.83714294433594, 71.32262420654297, 218.82325744628906, -198.09930419921875, 98.4834213256836, 55.89374923706055, 65.3073501586914, 105.68987274169922, -26.397010803222656, 7.169136047363281, 75.56970977783203, 49.26513671875, 165.43875122070312, 126.75363159179688, 46.0673828125, 89.45918273925781, -45.65974426269531, -46.94073486328125, 183.0458984375, -60.42938232421875, 45.405372619628906, 172.72291564941406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000309.npy"} +{"epoch": 0.4671201814058957, "step": 310, "batch_size": 64, "mean": 28.416515350341797, "std": 85.15790557861328, "min": -135.06814575195312, "p10": -71.25369110107421, "median": 12.954124450683594, "p90": 145.83884124755863, "max": 298.598388671875, "pos_frac": 0.5625, "sample": [111.33222961425781, -26.946945190429688, 21.693279266357422, 77.24236297607422, 57.16340637207031, -43.59770965576172, -80.38722229003906, 2.9818572998046875, 159.43368530273438, -0.7908229827880859, -55.14125061035156, -104.53919982910156, 36.017051696777344, 150.04490661621094, -15.477752685546875, 4.077663421630859, 7.47076416015625, 9.079010009765625, 212.02928161621094, -45.772911071777344, 33.86394119262695, -49.763179779052734, -78.37459564208984, 298.598388671875, -33.98546600341797, -45.761661529541016, 99.6594467163086, -1.9954681396484375, 30.802947998046875, 68.44441986083984, -135.06814575195312, 59.8602294921875, -27.898422241210938, 134.8592529296875, -51.62040710449219, 72.97488403320312, -2.4918289184570312, 106.29051208496094, -4.099418640136719, 136.02468872070312, 47.557029724121094, 68.10038757324219, -72.4349365234375, -68.49745178222656, 85.70636749267578, 16.829238891601562, -33.135040283203125, 120.23530578613281, 22.02143096923828, -2.25299072265625, 37.091209411621094, 151.28123474121094, 90.35232543945312, -108.63032531738281, 50.985504150390625, -14.149148941040039, 204.48590087890625, 157.1594696044922, 26.536657333374023, -13.671493530273438, -116.01620483398438, 95.33563995361328, -10.593019485473633, -1.8718490600585938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000310.npy"} +{"epoch": 0.46863189720332576, "step": 311, "batch_size": 64, "mean": 53.201560974121094, "std": 96.9644775390625, "min": -189.71595764160156, "p10": -50.28153839111328, "median": 43.81389236450195, "p90": 163.94167785644532, "max": 495.5222473144531, "pos_frac": 0.75, "sample": [109.55903625488281, 16.89891815185547, 33.092506408691406, -9.575904846191406, -12.376174926757812, -57.742156982421875, 41.00627517700195, 113.06529998779297, 10.798616409301758, 89.83535766601562, 52.687660217285156, -189.71595764160156, 10.705429077148438, 79.27794647216797, 68.8604736328125, -60.846710205078125, 123.62338256835938, 56.333396911621094, 209.78274536132812, 72.10159301757812, -17.41692352294922, 72.96426391601562, 0.6993618011474609, 42.933143615722656, 137.20578002929688, 11.446369171142578, -71.5386962890625, -39.10017013549805, -45.70843505859375, 59.332122802734375, -60.504241943359375, 247.97171020507812, 256.85882568359375, 44.996971130371094, 33.63932800292969, 27.525419235229492, 32.42433166503906, 106.2393798828125, 161.01901245117188, 78.64053344726562, 495.5222473144531, 47.71449279785156, -8.361412048339844, 161.70916748046875, -52.24143981933594, 14.414718627929688, 41.005859375, 4.9597625732421875, 59.237091064453125, 74.12362670898438, 164.89846801757812, 175.9071044921875, 44.69464111328125, -57.150238037109375, 48.012969970703125, 89.38069152832031, 6.401140213012695, -41.53600311279297, -20.121673583984375, 60.15606689453125, -11.951072692871094, 40.01270294189453, 179.58055114746094, 51.530487060546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000311.npy"} +{"epoch": 0.47014361300075586, "step": 312, "batch_size": 64, "mean": 66.05711364746094, "std": 88.56623077392578, "min": -90.38433074951172, "p10": -27.401418304443354, "median": 50.422420501708984, "p90": 166.5578826904297, "max": 405.8057861328125, "pos_frac": 0.828125, "sample": [34.3189811706543, 18.160980224609375, -43.632171630859375, 32.52247619628906, 54.62016296386719, 50.992225646972656, 56.61015701293945, -29.654739379882812, -5.001739501953125, 32.74726104736328, 177.75396728515625, 108.45930480957031, 124.6921157836914, 74.56243133544922, 6.787994384765625, 91.65227508544922, 17.211769104003906, -7.949487686157227, 17.684358596801758, 231.47189331054688, 159.92404174804688, 22.173484802246094, 36.6132926940918, 34.214759826660156, 54.597991943359375, 168.2308349609375, 20.188087463378906, -11.508392333984375, 65.58839416503906, 125.28093719482422, 170.67909240722656, -63.95454406738281, 48.051414489746094, 162.65432739257812, -90.38433074951172, 82.6212158203125, 117.7933120727539, 55.396270751953125, 405.8057861328125, 344.10736083984375, 221.03591918945312, 11.560497283935547, 24.845504760742188, 69.2479248046875, 143.0106201171875, 49.85261535644531, 4.937507629394531, 128.03189086914062, 58.881591796875, -50.975887298583984, -52.00189208984375, 96.4356460571289, 123.08552551269531, 31.876434326171875, 22.68939208984375, 113.1910400390625, 69.35399627685547, 25.029680252075195, -75.781005859375, 142.62948608398438, 74.10765838623047, -22.14366912841797, 47.86046600341797, 18.810409545898438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000312.npy"} +{"epoch": 0.47165532879818595, "step": 313, "batch_size": 64, "mean": 34.13251495361328, "std": 88.44979095458984, "min": -179.20399475097656, "p10": -76.66719970703123, "median": 30.807894706726074, "p90": 138.57220458984375, "max": 279.2695007324219, "pos_frac": 0.625, "sample": [-60.07041931152344, 33.210166931152344, 87.5873031616211, -9.076251983642578, 81.18424987792969, 86.18885803222656, 27.21164321899414, 28.405622482299805, 135.54891967773438, -84.58888244628906, -5.273433685302734, 67.81060791015625, 134.44482421875, 53.446624755859375, 25.863304138183594, -37.474510192871094, 16.272315979003906, -39.533451080322266, -167.2003936767578, -28.79639434814453, 70.64459991455078, 80.03046417236328, -0.2693767547607422, 197.2165069580078, -135.25112915039062, -119.77800750732422, -39.270782470703125, 174.77935791015625, 139.20584106445312, 63.13097381591797, 172.19834899902344, 79.87419128417969, -110.904541015625, -2.192964553833008, 193.53358459472656, 57.69999694824219, 43.75251770019531, -9.32352066040039, -28.740982055664062, 175.41378784179688, 72.94970703125, 10.222797393798828, 85.26692199707031, -22.006698608398438, 279.2695007324219, 40.30731964111328, 5.20391845703125, -45.707298278808594, 56.45744323730469, 34.233741760253906, 137.09371948242188, 55.72468566894531, -2.9065093994140625, -0.9801750183105469, 8.583118438720703, -179.20399475097656, 108.2972640991211, -0.005863189697265625, 127.33917236328125, -25.479598999023438, -83.78010559082031, 15.051361083984375, 74.56848907470703, 87.07234954833984], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000313.npy"} +{"epoch": 0.47316704459561604, "step": 314, "batch_size": 64, "mean": 55.66606903076172, "std": 76.95528411865234, "min": -78.25279235839844, "p10": -36.16242523193359, "median": 39.26466751098633, "p90": 158.44515686035157, "max": 238.1497802734375, "pos_frac": 0.6875, "sample": [39.278350830078125, 55.41659164428711, 36.80266571044922, 130.153564453125, 66.90544128417969, 77.64019012451172, -17.989181518554688, 114.87271881103516, -42.19054412841797, 88.81739807128906, 14.501258850097656, -26.096935272216797, 231.08767700195312, 88.78343200683594, -16.061206817626953, 238.1497802734375, 118.714599609375, 140.39212036132812, 160.29574584960938, -4.682853698730469, 18.91412353515625, -35.402557373046875, 119.75053405761719, 159.34446716308594, 170.3964080810547, -77.44329833984375, 31.364639282226562, 194.6086883544922, -3.3184356689453125, -36.48808288574219, 74.78374481201172, 93.80032348632812, 11.326923370361328, -6.624359130859375, -4.465120315551758, 31.673931121826172, -57.5579833984375, -23.870712280273438, 71.67851257324219, 116.46470642089844, -21.83083152770996, -13.720428466796875, 141.13308715820312, 46.3883171081543, 114.94617462158203, -5.9896697998046875, 136.99752807617188, 149.6986846923828, 12.861305236816406, 151.80984497070312, 73.85594177246094, 157.07568359375, 84.72018432617188, 22.152740478515625, 90.36151123046875, -22.194747924804688, -38.35015869140625, 39.25098419189453, 34.43418884277344, 9.895896911621094, 34.54346466064453, -59.917816162109375, -78.25279235839844, 159.03207397460938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000314.npy"} +{"epoch": 0.47467876039304613, "step": 315, "batch_size": 64, "mean": 47.285831451416016, "std": 73.2562255859375, "min": -114.96312713623047, "p10": -48.854835510253906, "median": 47.73725509643555, "p90": 136.3002151489258, "max": 217.63668823242188, "pos_frac": 0.703125, "sample": [-36.551513671875, 80.99859619140625, 141.04261779785156, -8.26701545715332, -16.15036964416504, 48.100990295410156, -113.87065124511719, -41.84406280517578, -80.01943969726562, 107.00104522705078, -114.96312713623047, 57.56378936767578, -20.93826675415039, 134.91799926757812, 14.264484405517578, 68.65962982177734, 58.65110778808594, -56.123451232910156, 29.44735336303711, 4.56920051574707, 40.42870330810547, 136.89259338378906, 28.448854446411133, 41.84852600097656, 45.41499328613281, 62.866580963134766, -41.880279541015625, -0.3655242919921875, 14.298030853271484, 119.03153228759766, 99.21229553222656, 102.70803833007812, 137.33090209960938, -1.5554962158203125, 59.31651306152344, 99.29570007324219, 100.39683532714844, -49.14622497558594, -65.77969360351562, 47.37351989746094, 125.28410339355469, 19.08135986328125, 106.64334106445312, 145.03842163085938, -27.978973388671875, 131.0550079345703, 80.67729949951172, 166.22915649414062, -73.01539611816406, 21.205703735351562, 110.31455993652344, 16.170791625976562, 141.94277954101562, -2.700725555419922, 109.98554992675781, 40.568260192871094, 124.74366760253906, 77.83628845214844, 124.54542541503906, -48.1749267578125, 101.05799865722656, -11.303983688354492, 217.63668823242188, 96.8255615234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000315.npy"} +{"epoch": 0.47619047619047616, "step": 316, "batch_size": 64, "mean": 65.56693267822266, "std": 90.29814910888672, "min": -116.26366424560547, "p10": -39.19088554382324, "median": 51.91573143005371, "p90": 184.68939208984378, "max": 294.4326171875, "pos_frac": 0.734375, "sample": [-23.232315063476562, -3.19952392578125, 110.18165588378906, -73.09479522705078, -116.26366424560547, -42.1856689453125, 114.02877807617188, 40.683738708496094, 44.25000762939453, 134.05795288085938, 145.03179931640625, 283.2088623046875, 93.70492553710938, 195.41946411132812, 32.796356201171875, -36.36378479003906, -70.9697036743164, -20.508888244628906, -56.15049743652344, 152.27850341796875, 21.960588455200195, 187.53732299804688, 3.316162109375, -25.80883026123047, 20.136749267578125, 49.293521881103516, 102.14935302734375, 6.5004730224609375, -20.421131134033203, 31.13117218017578, 157.00709533691406, 176.43089294433594, 62.4475212097168, 294.4326171875, -10.638406753540039, 27.630279541015625, 3.898590087890625, 154.55697631835938, 103.96944427490234, 113.47885131835938, 135.4495849609375, 70.48151397705078, 72.17164611816406, 2.852947235107422, 230.1729278564453, 39.32521057128906, -40.40250015258789, 209.1407470703125, 24.063278198242188, 54.537940979003906, -9.487930297851562, -43.746498107910156, 0.3745574951171875, -24.379547119140625, 88.74058532714844, 60.93086242675781, -0.13650131225585938, 161.69740295410156, 245.1876678466797, 100.35411834716797, 80.40332794189453, 82.26136779785156, 178.04421997070312, 115.56424713134766], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000316.npy"} +{"epoch": 0.47770219198790626, "step": 317, "batch_size": 64, "mean": 62.915794372558594, "std": 78.48897552490234, "min": -175.43487548828125, "p10": -32.12705612182617, "median": 64.71905517578125, "p90": 170.98050689697266, "max": 209.798583984375, "pos_frac": 0.78125, "sample": [99.00934600830078, 96.24299621582031, 35.92631149291992, 209.798583984375, 87.15869140625, 197.9918212890625, -44.63594055175781, 21.551109313964844, -55.89686584472656, 56.39911651611328, 15.602882385253906, 42.25637435913086, -64.59362030029297, 3.4127197265625, 75.16743469238281, 36.79813766479492, 103.17776489257812, -4.022125244140625, 146.87509155273438, 179.12623596191406, -32.25242614746094, -9.439750671386719, 71.13482666015625, -47.89851379394531, 77.4581069946289, 89.87913513183594, 143.0243377685547, 82.45952606201172, 119.6788558959961, -175.43487548828125, 207.96392822265625, 16.053726196289062, 95.9102554321289, 161.91526794433594, 165.69049072265625, -48.18475341796875, 55.25300598144531, -4.592723846435547, 89.80160522460938, 169.47024536132812, 28.900470733642578, 69.87513732910156, 204.3343505859375, -22.27442169189453, 12.78546142578125, 32.52315139770508, 38.02216339111328, 171.6277618408203, 59.56297302246094, -5.3729095458984375, 109.6885986328125, 20.210254669189453, 53.538536071777344, 154.29617309570312, -31.83452606201172, 93.22665405273438, -26.85570526123047, 72.737060546875, 178.42198181152344, 124.78118896484375, 3.8162765502929688, 120.90776062011719, 23.858078002929688, 74.59800720214844], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000317.npy"} +{"epoch": 0.47921390778533635, "step": 318, "batch_size": 64, "mean": 45.56207275390625, "std": 99.90242004394531, "min": -157.14163208007812, "p10": -81.48614807128905, "median": 45.94303512573242, "p90": 170.20726928710943, "max": 347.734130859375, "pos_frac": 0.671875, "sample": [50.772762298583984, 110.11443328857422, 27.85980224609375, -157.14163208007812, 145.37881469726562, 148.70094299316406, 70.94427490234375, -89.780029296875, 53.112274169921875, 160.77145385742188, 6.207553863525391, -58.31127166748047, 120.98609924316406, 101.40361022949219, 157.41285705566406, -2.8810081481933594, 41.77958679199219, -42.527259826660156, 75.86040496826172, 11.89643669128418, -75.77294921875, 103.01477813720703, 174.25119018554688, -31.832252502441406, 46.15473175048828, 113.36801147460938, 188.46653747558594, -119.53498840332031, 118.51516723632812, 100.71272277832031, -44.173919677734375, 16.271621704101562, -63.516845703125, 47.8006477355957, 22.393081665039062, 347.734130859375, 269.33349609375, -35.93737030029297, 177.4918670654297, 64.37184143066406, -83.93466186523438, 16.6544189453125, 89.4703598022461, -69.74999237060547, -12.086944580078125, 114.99348449707031, 4.956626892089844, 198.87400817871094, -26.78866958618164, 110.50450134277344, 3.027761459350586, 183.42001342773438, 51.858062744140625, 86.94488525390625, -110.12016296386719, -25.140533447265625, 103.56849670410156, -101.25184631347656, 45.73133850097656, -29.483505249023438, 146.69725036621094, -120.03478240966797, -56.89359664916992, 43.08434295654297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000318.npy"} +{"epoch": 0.48072562358276644, "step": 319, "batch_size": 64, "mean": 65.69465637207031, "std": 95.99524688720703, "min": -175.11146545410156, "p10": -35.40869216918945, "median": 58.77956008911133, "p90": 174.7969299316407, "max": 348.3725891113281, "pos_frac": 0.75, "sample": [108.50756072998047, 156.3595733642578, 3.6673812866210938, 348.3725891113281, 32.99763488769531, 197.28292846679688, -8.630897521972656, 57.786590576171875, -1.1039886474609375, 71.68270874023438, 141.6097412109375, 127.30860137939453, 2.431415557861328, 28.049293518066406, 40.65058898925781, 145.126953125, 17.72052764892578, 255.86351013183594, 26.699913024902344, 134.16452026367188, 16.505054473876953, -22.970870971679688, 56.52813720703125, 74.6927490234375, 289.3685302734375, -21.27851104736328, 137.43310546875, 103.7895736694336, 153.44532775878906, 182.6986541748047, -62.65348815917969, -62.81414794921875, 93.4298095703125, 318.311767578125, -11.95876693725586, 84.83647155761719, -16.45843505859375, 95.44242858886719, 102.34943389892578, -68.58070373535156, 127.46066284179688, 24.958358764648438, -29.16815948486328, 15.824501037597656, 25.90198516845703, 28.46259117126465, -175.11146545410156, -38.08320617675781, -70.0269775390625, 112.652587890625, -16.053775787353516, 58.363861083984375, 30.459671020507812, -18.0567626953125, 72.32775115966797, 105.1634521484375, 85.8387451171875, 103.93266296386719, 142.21087646484375, 59.19525909423828, 63.46356201171875, -92.28112030029297, 189.18167114257812, 69.17826843261719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000319.npy"} +{"epoch": 0.48223733938019653, "step": 320, "batch_size": 64, "mean": 56.679443359375, "std": 107.01319885253906, "min": -171.9517059326172, "p10": -40.87000503540038, "median": 31.42711067199707, "p90": 174.0344970703125, "max": 551.3162841796875, "pos_frac": 0.6875, "sample": [68.55113220214844, -73.4581298828125, 2.308124542236328, 105.95097351074219, 19.747169494628906, 48.62255096435547, 153.80299377441406, 1.68560791015625, 103.58161163330078, -32.87992858886719, -71.49946594238281, 124.07403564453125, -6.891111373901367, -9.847530364990234, -43.98870086669922, 266.41326904296875, 109.15718841552734, 8.668418884277344, -24.88501739501953, 115.83441162109375, -56.434417724609375, 30.626705169677734, -171.9517059326172, 9.643442153930664, 28.031570434570312, 551.3162841796875, 220.40304565429688, 132.34994506835938, 162.58338928222656, 4.83306884765625, 37.000335693359375, -26.647424697875977, 32.227516174316406, 176.02157592773438, 3.097749710083008, 169.39797973632812, 20.070030212402344, -27.601173400878906, -7.897403717041016, -4.350772857666016, 42.93783950805664, 162.7767791748047, 0.6694259643554688, 70.85920715332031, 39.865264892578125, 57.06193542480469, 226.24014282226562, 58.364784240722656, 15.005935668945312, -20.699392318725586, -0.296051025390625, -76.28954315185547, 233.12326049804688, -33.593048095703125, -23.782238006591797, 77.9424057006836, -56.06512451171875, 107.14738464355469, 199.35272216796875, 123.69701385498047, 78.21269226074219, -31.218833923339844, 112.25398254394531, 116.25044250488281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000320.npy"} +{"epoch": 0.4837490551776266, "step": 321, "batch_size": 64, "mean": 51.9813232421875, "std": 77.42710876464844, "min": -128.46124267578125, "p10": -59.88950729370114, "median": 54.563758850097656, "p90": 157.38953552246093, "max": 206.09033203125, "pos_frac": 0.796875, "sample": [75.62631225585938, 136.143798828125, 2.6624374389648438, 128.82107543945312, 112.25497436523438, -21.462501525878906, 155.57034301757812, 92.04850006103516, 158.84263610839844, 38.418548583984375, 155.13125610351562, 61.7050895690918, 43.605960845947266, 110.61952209472656, -122.59777069091797, 6.766414642333984, 10.613319396972656, 80.8577880859375, 52.99684143066406, 188.99050903320312, 24.238067626953125, 52.501792907714844, -103.7643051147461, -28.461471557617188, 87.32425689697266, -109.97883605957031, -71.7382583618164, -74.84945678710938, 95.37920379638672, 3.4676971435546875, 14.819320678710938, 120.00543212890625, 97.37289428710938, 77.07060241699219, 56.13067626953125, 92.83746337890625, 78.62165832519531, -7.005830764770508, 33.07405471801758, 123.38956451416016, 7.313865661621094, -11.223697662353516, 64.87460327148438, 82.43111419677734, 73.26710510253906, -128.46124267578125, 135.02056884765625, 160.9884033203125, 158.169189453125, 56.28623580932617, 22.91179656982422, 0.09650611877441406, 48.60101318359375, 165.9681854248047, 206.09033203125, 36.398399353027344, 47.06001281738281, 44.73248291015625, 166.15695190429688, -71.7322769165039, 20.63225555419922, -16.647796630859375, 62.077430725097656, -32.256378173828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000321.npy"} +{"epoch": 0.4852607709750567, "step": 322, "batch_size": 64, "mean": 68.19315338134766, "std": 71.71847534179688, "min": -145.9407501220703, "p10": -21.567730712890622, "median": 61.97246170043945, "p90": 160.15111236572267, "max": 195.03207397460938, "pos_frac": 0.84375, "sample": [-32.888832092285156, 91.85332489013672, 153.73553466796875, 35.043853759765625, 106.6611328125, 32.14601135253906, 195.03207397460938, 140.29873657226562, 133.5382080078125, 36.31224060058594, 71.7251968383789, 78.40103149414062, 167.18707275390625, 162.87689208984375, 157.77395629882812, 28.079483032226562, 63.79998779296875, 16.859115600585938, 1.9914360046386719, 46.12152099609375, 109.1357192993164, -1.1500930786132812, -71.50975799560547, 147.2653045654297, 154.3779754638672, 45.38335418701172, 53.63153076171875, 11.573160171508789, 6.1676483154296875, -3.6336822509765625, -19.740890502929688, 120.35411071777344, 14.926803588867188, 127.69532012939453, 161.03138732910156, 79.75971221923828, 38.561561584472656, 183.57907104492188, 29.739669799804688, -39.24334716796875, 83.30921936035156, 145.48226928710938, 8.134384155273438, 147.2440948486328, 15.641273498535156, 135.35092163085938, 165.75390625, 158.09713745117188, -22.350662231445312, -145.9407501220703, 75.74713134765625, 54.30793762207031, 35.86514663696289, 54.42064666748047, 131.17466735839844, 41.635162353515625, 60.144935607910156, 68.57453918457031, 102.988525390625, -42.73055648803711, 176.583251953125, -33.03771209716797, 89.92814636230469, 23.585786819458008], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000322.npy"} +{"epoch": 0.48677248677248675, "step": 323, "batch_size": 64, "mean": 55.214439392089844, "std": 111.17933654785156, "min": -183.20947265625, "p10": -61.09738426208495, "median": 19.696334838867188, "p90": 193.69477081298828, "max": 426.5660095214844, "pos_frac": 0.6875, "sample": [44.41790008544922, -69.48431396484375, 65.81625366210938, 17.100570678710938, -15.742256164550781, 213.87725830078125, 54.96080017089844, 9.489391326904297, 17.632896423339844, 186.34007263183594, 38.3385009765625, -21.581876754760742, 166.8323974609375, 18.360740661621094, 142.12557983398438, -50.54704666137695, -65.61895751953125, 149.24862670898438, 88.62958526611328, 17.271240234375, 194.71531677246094, 79.39120483398438, 38.535064697265625, 47.824928283691406, 191.62887573242188, -33.79261016845703, -79.11805725097656, 9.667436599731445, -7.117891311645508, -72.24295043945312, 307.76220703125, 9.552032470703125, -6.196990966796875, -44.70364761352539, 194.5801544189453, 21.03192901611328, -129.73773193359375, 163.16375732421875, 135.33872985839844, -3.029043197631836, 202.183349609375, 13.623374938964844, -6.536865234375, 24.91199493408203, 184.4243927001953, 426.5660095214844, 145.52590942382812, -125.57237243652344, 351.30999755859375, 23.27837371826172, -183.20947265625, -24.58942413330078, 90.38352966308594, 16.757667541503906, 14.3480224609375, 87.17214965820312, 170.92385864257812, 94.30024719238281, -11.354904174804688, -17.34659194946289, 27.597278594970703, -11.1075439453125, 13.81744384765625, 1.597412109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000323.npy"} +{"epoch": 0.48828420256991684, "step": 324, "batch_size": 64, "mean": 55.36581802368164, "std": 73.31997680664062, "min": -111.17984771728516, "p10": -29.13925037384033, "median": 50.00861930847168, "p90": 148.1215911865235, "max": 242.81561279296875, "pos_frac": 0.8125, "sample": [78.5421142578125, 102.764404296875, 16.515884399414062, 4.942634582519531, -25.48526382446289, 179.63111877441406, 95.50299835205078, 97.99026489257812, -46.00123596191406, -20.51825714111328, -0.34131622314453125, 110.02082061767578, 68.92880249023438, 63.2936897277832, 110.6583251953125, 17.681591033935547, 153.36825561523438, -45.629852294921875, 83.27313995361328, 1.993194580078125, 95.28211975097656, 136.3768310546875, 14.993335723876953, 76.424072265625, 35.89872741699219, 49.62957763671875, -97.87201690673828, 11.569328308105469, 242.81561279296875, 14.952770233154297, 46.13392639160156, -111.17984771728516, 63.77648162841797, 111.16761016845703, 36.43544006347656, 65.94937133789062, 33.621009826660156, -30.048871994018555, 82.1336669921875, 73.12403106689453, 51.733070373535156, -27.016799926757812, 219.44158935546875, 26.990867614746094, 233.28579711914062, -8.40549087524414, 21.285736083984375, 128.1324462890625, 53.4616584777832, 122.47281646728516, 180.8909454345703, 14.69759750366211, 9.123184204101562, 93.03846740722656, 99.12261962890625, 15.165481567382812, 47.218040466308594, -35.03105163574219, -71.3989028930664, 50.38766098022461, 153.15505981445312, 132.3545379638672, 21.917259216308594, 13.075407028198242], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000324.npy"} +{"epoch": 0.4897959183673469, "step": 325, "batch_size": 64, "mean": 61.9903564453125, "std": 96.8448486328125, "min": -166.0819549560547, "p10": -34.34350357055664, "median": 49.036346435546875, "p90": 163.6188446044922, "max": 402.3328857421875, "pos_frac": 0.734375, "sample": [26.50253677368164, 118.35609436035156, 5.2336883544921875, 116.44957733154297, -21.95172882080078, 70.51844787597656, 106.57279968261719, 79.8754653930664, 50.11265563964844, 112.27368927001953, 86.70790100097656, 136.32199096679688, 11.806501388549805, -32.958953857421875, 264.80963134765625, -27.336483001708984, 134.7477569580078, 140.75135803222656, 101.52549743652344, -128.74917602539062, 205.10531616210938, -22.844100952148438, 100.31073760986328, -0.06438255310058594, -5.800724029541016, 275.4031066894531, 17.764110565185547, 136.9763641357422, -62.034732818603516, 47.96003723144531, -76.37368774414062, 33.83048629760742, 88.71927642822266, 91.6092529296875, -68.17705535888672, 167.51815795898438, -74.81985473632812, 96.1143798828125, 73.93790435791016, 402.3328857421875, 33.493438720703125, 15.704771041870117, 42.700347900390625, 242.53109741210938, -166.0819549560547, 61.106040954589844, 37.4896240234375, 35.298187255859375, 150.20413208007812, 91.0615005493164, 154.52044677734375, -34.93688201904297, 14.976682662963867, -9.55572509765625, -2.8096084594726562, -13.589698791503906, 46.16149139404297, 72.29927062988281, 228.3336944580078, 59.19190979003906, 44.45808410644531, 76.97301483154297, 29.58983612060547, -20.773635864257812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000325.npy"} +{"epoch": 0.491307634164777, "step": 326, "batch_size": 64, "mean": 26.049846649169922, "std": 81.71894073486328, "min": -173.92929077148438, "p10": -64.4814437866211, "median": 21.715097427368164, "p90": 130.3647933959961, "max": 215.53407287597656, "pos_frac": 0.578125, "sample": [-39.821998596191406, -22.11483383178711, 23.03488540649414, 32.6021842956543, 24.18731689453125, -173.92929077148438, 215.53407287597656, 129.45953369140625, 49.768280029296875, 69.8269271850586, -51.313201904296875, 84.37696838378906, 46.23090744018555, -20.47283935546875, 36.432682037353516, 133.9279327392578, -13.039384841918945, 87.25930786132812, -11.512138366699219, 93.16127014160156, 121.03623962402344, -31.525253295898438, 17.799652099609375, 73.51806640625, -34.444053649902344, 16.65045166015625, -103.93438720703125, -15.309814453125, -79.73800659179688, -16.947769165039062, 167.4309844970703, -124.45519256591797, 146.65975952148438, -16.433582305908203, 63.734642028808594, 60.28816223144531, 12.493446350097656, -15.11700439453125, -42.483116149902344, -37.62884521484375, 127.48907470703125, 112.09941864013672, -64.62461853027344, 6.1968994140625, 113.32765197753906, 37.302085876464844, 83.79655456542969, -60.11322021484375, 116.5607681274414, 154.38795471191406, -20.732433319091797, -13.309976577758789, -115.94230651855469, 77.06912994384766, -64.14736938476562, -6.2024383544921875, -130.32681274414062, 43.99554443359375, 62.52907180786133, 168.86697387695312, 92.4884262084961, -59.861331939697266, 20.395309448242188, 130.7527618408203], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000326.npy"} +{"epoch": 0.4928193499622071, "step": 327, "batch_size": 64, "mean": 51.717769622802734, "std": 111.94368743896484, "min": -136.8144073486328, "p10": -74.48968124389648, "median": 30.56515121459961, "p90": 168.0191421508789, "max": 570.1990356445312, "pos_frac": 0.703125, "sample": [44.358985900878906, -101.6614761352539, -74.04964447021484, 570.1990356445312, 40.80126953125, 264.67144775390625, -114.72314453125, 74.06730651855469, -51.085540771484375, 73.65248107910156, 165.64520263671875, 33.22165298461914, -106.46601104736328, 125.87071228027344, 45.516517639160156, 151.04263305664062, 122.85421752929688, 23.88393783569336, 2.44830322265625, -38.0521125793457, -2.382139205932617, 27.908649444580078, 64.1950454711914, 140.96316528320312, 169.0365447998047, 52.95903015136719, 77.532958984375, 68.99208068847656, 89.4029769897461, 22.988174438476562, 26.224380493164062, 22.827320098876953, -74.67826843261719, 128.89817810058594, 107.45075988769531, 21.74677276611328, 62.19871520996094, 9.469772338867188, -28.296241760253906, -1.9703407287597656, 194.4910125732422, 25.746482849121094, -136.8144073486328, 16.049697875976562, 103.6520767211914, 260.8646240234375, -44.55987548828125, 108.16873168945312, 240.2236328125, 106.99238586425781, 125.35157012939453, 6.335123062133789, 19.586151123046875, 13.859241485595703, -85.33735656738281, -57.07154083251953, 98.4856948852539, 67.2479476928711, -13.008514404296875, -14.89383316040039, -21.857681274414062, -109.52294921875, 209.2705078125, -40.984893798828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000327.npy"} +{"epoch": 0.4943310657596372, "step": 328, "batch_size": 64, "mean": 56.990631103515625, "std": 77.14360046386719, "min": -130.31597900390625, "p10": -20.529483413696287, "median": 48.74122619628906, "p90": 152.92345275878907, "max": 249.859375, "pos_frac": 0.765625, "sample": [150.40951538085938, 7.446039199829102, 14.857076644897461, 39.73992919921875, 138.13381958007812, 21.218114852905273, -61.46501541137695, 128.15487670898438, 74.23003387451172, 37.13585662841797, 75.9731674194336, 154.0008544921875, 120.68003845214844, 23.15163803100586, 72.73968505859375, -41.938446044921875, 147.87948608398438, 11.909921646118164, 214.4541778564453, -18.840961456298828, 64.11602783203125, 10.24444580078125, 7.077703475952148, 59.847713470458984, -15.980819702148438, -21.448387145996094, 249.859375, 64.03475189208984, -130.31597900390625, 59.52739715576172, -93.6250991821289, 104.58712768554688, 87.13116455078125, 10.561279296875, 162.17237854003906, -21.253135681152344, -2.8052520751953125, 1.161102294921875, 230.40341186523438, 12.428054809570312, 98.69923400878906, 157.58522033691406, -10.564094543457031, 30.526830673217773, -10.925064086914062, 148.642333984375, -8.25794792175293, -0.3719329833984375, 26.230361938476562, 62.0165901184082, 97.21135711669922, -60.52503967285156, 57.742523193359375, 88.07943725585938, 19.633529663085938, 82.17100524902344, 97.21021270751953, -13.922414779663086, 143.39492797851562, 36.318382263183594, 142.9586639404297, 3.5881500244140625, 162.5282440185547, 149.83685302734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000328.npy"} +{"epoch": 0.4958427815570673, "step": 329, "batch_size": 64, "mean": 57.512611389160156, "std": 87.89127349853516, "min": -196.37774658203125, "p10": -22.733452415466306, "median": 29.18239974975586, "p90": 186.12886352539064, "max": 245.2296142578125, "pos_frac": 0.71875, "sample": [7.322233200073242, 11.000614166259766, 42.67170715332031, 38.51637268066406, -63.17991638183594, 18.98334503173828, 172.361572265625, -16.487945556640625, 210.46978759765625, -3.9397735595703125, 108.04953002929688, 49.902503967285156, -21.529394149780273, 87.09716796875, 0.7427005767822266, 7.964942932128906, 29.31391143798828, 168.10153198242188, -29.489410400390625, 35.00103759765625, 15.666946411132812, -8.534101486206055, 187.74017333984375, 166.1986083984375, -62.686622619628906, 150.88818359375, 202.4696044921875, 95.68677520751953, 23.49786376953125, 14.303321838378906, 134.69046020507812, 89.332763671875, -34.25598907470703, 201.10943603515625, 2.799398422241211, -6.7080535888671875, 240.75772094726562, 16.03437614440918, 173.52322387695312, 78.00721740722656, 245.2296142578125, 137.23602294921875, -3.6458778381347656, -15.409805297851562, -11.104522705078125, 145.68014526367188, -10.544822692871094, 33.557579040527344, -3.9784698486328125, 67.32160949707031, 5.110691070556641, 129.08401489257812, 94.00578308105469, -23.24947738647461, 182.369140625, 196.66604614257812, -8.275850296020508, 144.7021484375, 39.80934143066406, -196.37774658203125, 29.050888061523438, 1.364227294921875, 2.0912914276123047, -33.2786865234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000329.npy"} +{"epoch": 0.4973544973544973, "step": 330, "batch_size": 64, "mean": 53.860130310058594, "std": 99.13968658447266, "min": -186.86270141601562, "p10": -73.83675765991211, "median": 40.88243103027344, "p90": 187.16492156982423, "max": 278.93597412109375, "pos_frac": 0.703125, "sample": [4.3488311767578125, -49.75444030761719, -32.765586853027344, -123.33452606201172, 31.7567138671875, -186.86270141601562, 95.11561584472656, 8.780128479003906, -29.694969177246094, 123.2524185180664, 103.16168212890625, 78.18269348144531, -94.4757080078125, -14.632011413574219, 28.335636138916016, 189.75091552734375, 206.58090209960938, -74.99585723876953, 221.99429321289062, 107.95733642578125, 115.20466613769531, -128.42239379882812, 115.05938720703125, -9.115739822387695, 131.84254455566406, 231.8883514404297, 6.63092041015625, 15.021392822265625, -73.96160125732422, 17.58344268798828, 83.17874908447266, 124.49518585205078, -36.24317932128906, 88.86357116699219, -59.00761032104492, 174.68740844726562, 103.41173553466797, 195.01580810546875, 278.93597412109375, 28.594562530517578, 0.8113784790039062, 104.88087463378906, 8.124588012695312, 90.83780670166016, 150.6002655029297, 151.25384521484375, 3.094602584838867, 50.11812210083008, -20.305023193359375, -23.294509887695312, -15.994941711425781, 91.88653564453125, 152.1019287109375, 157.8416748046875, 25.804672241210938, 37.881195068359375, 43.8836669921875, -73.54545593261719, 210.7735595703125, -103.84126281738281, 88.052490234375, 138.85899353027344, -0.27213287353515625, 181.1309356689453], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000330.npy"} +{"epoch": 0.4988662131519274, "step": 331, "batch_size": 64, "mean": 70.66366577148438, "std": 85.74348449707031, "min": -178.04237365722656, "p10": -32.24680404663084, "median": 59.44280433654785, "p90": 168.62393188476562, "max": 274.7261962890625, "pos_frac": 0.8125, "sample": [177.2685546875, 12.023384094238281, 56.032718658447266, 112.69082641601562, 132.54669189453125, 170.18984985351562, 96.00212097167969, 159.92852783203125, 69.18545532226562, -38.58638000488281, 22.17672348022461, 274.7261962890625, 165.66065979003906, 178.47080993652344, 52.49510192871094, 25.970191955566406, 194.23910522460938, 76.48545837402344, 35.616119384765625, 165.34523010253906, 137.0017547607422, 14.464767456054688, 16.765838623046875, 71.48258972167969, -7.477867126464844, 142.33673095703125, 101.4317855834961, 257.73828125, 62.85289001464844, -16.548675537109375, 90.81698608398438, 134.05691528320312, 161.82057189941406, 133.2467498779297, -17.45446014404297, -3.7801437377929688, 54.47954559326172, -83.99380493164062, -72.91438293457031, 122.3004379272461, -44.148841857910156, 169.89390563964844, 41.480010986328125, 10.91802978515625, 32.65656280517578, -178.04237365722656, 37.792213439941406, 6.54034423828125, -54.39080047607422, 37.34974670410156, 37.460548400878906, 148.30380249023438, 151.57052612304688, 40.43650817871094, 47.958396911621094, 6.250801086425781, -6.339468955993652, 154.69732666015625, 51.95977783203125, -89.33149719238281, 154.2512664794922, 104.15658569335938, 81.57449340820312, 142.3831024169922], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000331.npy"} +{"epoch": 0.5003779289493575, "step": 332, "batch_size": 64, "mean": 56.47351837158203, "std": 81.63681030273438, "min": -128.85977172851562, "p10": -16.346254730224608, "median": 38.56285858154297, "p90": 165.43194122314452, "max": 276.6304626464844, "pos_frac": 0.78125, "sample": [-17.565425872802734, 7.490242004394531, 7.570320129394531, -3.53167724609375, 3.770578384399414, 117.8375015258789, 41.301300048828125, 13.346712112426758, 29.746826171875, 46.28787612915039, -21.702552795410156, 259.9184875488281, 229.8135223388672, 54.19563293457031, 39.06343078613281, 51.358131408691406, 47.75306701660156, 0.19384002685546875, 136.32815551757812, 10.402267456054688, 77.98352813720703, 25.2642822265625, -59.43813705444336, 148.36325073242188, 38.062286376953125, 126.69860076904297, -128.85977172851562, 34.668556213378906, 20.576553344726562, 4.765007019042969, 29.56916046142578, 165.6419219970703, 48.03204345703125, 3.8254547119140625, 7.200836181640625, 23.465194702148438, 2.608461380004883, 214.46978759765625, -55.87529754638672, -6.505666732788086, 48.805908203125, 92.64706420898438, 121.71672821044922, -9.752721786499023, 39.42817687988281, -10.692054748535156, 107.51539611816406, 140.1961212158203, 102.618408203125, 65.35910034179688, 199.2917938232422, 36.060462951660156, 102.02635192871094, -10.22848892211914, -6.880210876464844, -17.922592163085938, 259.9408264160156, -13.501522064208984, 70.14517211914062, 164.94198608398438, 276.6304626464844, 63.95002746582031, -54.07175064086914, 71.9864501953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000332.npy"} +{"epoch": 0.5018896447467877, "step": 333, "batch_size": 64, "mean": 68.1856689453125, "std": 81.92330169677734, "min": -134.03829956054688, "p10": -18.08258800506591, "median": 56.943931579589844, "p90": 168.13714752197268, "max": 290.5568542480469, "pos_frac": 0.828125, "sample": [1.2427444458007812, 1.054941177368164, 149.7607421875, 241.00648498535156, 57.149810791015625, 21.560638427734375, 53.74857711791992, 131.83551025390625, 2.8050498962402344, 53.7242431640625, 107.05611419677734, 37.824100494384766, 116.0437240600586, 129.99331665039062, 145.42733764648438, 38.811553955078125, 41.866302490234375, 187.40516662597656, 195.15365600585938, 159.99813842773438, -2.9463653564453125, 56.73805236816406, 31.969017028808594, 21.413726806640625, 97.3204345703125, -75.4610366821289, -29.922916412353516, 58.0078125, 22.519668579101562, 67.42183685302734, 165.45465087890625, 162.01141357421875, -32.063018798828125, 76.82389068603516, 118.39110565185547, 70.2843017578125, 237.0428466796875, 11.542747497558594, 47.60458755493164, 83.10873413085938, -12.791322708129883, 23.923446655273438, -134.03829956054688, 16.876188278198242, 55.65361785888672, -105.73243713378906, 290.5568542480469, 5.312904357910156, -20.35027313232422, 68.64990997314453, -2.2181549072265625, -37.65779113769531, 79.46527862548828, 169.2867889404297, 7.681352615356445, 10.333747863769531, 133.7646484375, -0.01238250732421875, 156.55535888671875, 84.88188171386719, 174.90313720703125, 90.6238784790039, 140.69720458984375, 106.78751373291016], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000333.npy"} +{"epoch": 0.5034013605442177, "step": 334, "batch_size": 64, "mean": 51.71104431152344, "std": 92.18326568603516, "min": -146.7069091796875, "p10": -59.90785522460937, "median": 37.58066940307617, "p90": 151.13228454589844, "max": 331.4082946777344, "pos_frac": 0.671875, "sample": [-54.030860900878906, -106.0315170288086, 78.48326110839844, 88.46347045898438, 72.23802185058594, -30.248321533203125, -102.5322265625, 331.4082946777344, 30.18199920654297, -146.7069091796875, -0.8632659912109375, 328.0887145996094, -19.94720458984375, 111.79512023925781, -0.784637451171875, 112.938232421875, -27.952392578125, -17.101320266723633, 126.14382934570312, 134.14053344726562, 102.79965209960938, 204.60182189941406, 19.05182647705078, 56.480224609375, 98.89822387695312, -82.0032958984375, 27.1947021484375, 0.48888206481933594, 105.06068420410156, 5.182804107666016, -6.494081497192383, 5.1593170166015625, 169.95541381835938, -21.04931640625, 100.97318267822266, 152.04791259765625, 101.99496459960938, -1.9543914794921875, 84.41574096679688, 13.589195251464844, -8.344627380371094, -62.42656707763672, -19.957733154296875, -32.01045227050781, 144.10110473632812, -4.817169189453125, 3.628326416015625, 13.08932113647461, -66.30221557617188, 107.31459045410156, 120.6653060913086, 48.34711456298828, 0.214874267578125, 84.69790649414062, -64.15885925292969, 127.31121826171875, 66.28657531738281, 154.73812866210938, 44.979339599609375, 21.05974006652832, 148.99581909179688, 120.09210968017578, 171.29977416992188, 146.62710571289062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000334.npy"} +{"epoch": 0.5049130763416477, "step": 335, "batch_size": 64, "mean": 56.584686279296875, "std": 96.53642272949219, "min": -110.30801391601562, "p10": -59.15250778198242, "median": 44.864402770996094, "p90": 190.22933044433594, "max": 319.2213439941406, "pos_frac": 0.734375, "sample": [49.504058837890625, 125.53046417236328, -55.80180358886719, 267.55364990234375, 78.71820068359375, 319.2213439941406, 43.91355895996094, 3.275655746459961, 191.1107177734375, 126.5703353881836, 203.89903259277344, 144.89096069335938, 54.20689392089844, 45.81524658203125, 188.17276000976562, 158.22547912597656, 23.822105407714844, 103.43666076660156, 9.030281066894531, 171.37814331054688, -72.73951721191406, 12.643218994140625, 47.40434265136719, 192.7836456298828, -92.82666015625, 74.19522094726562, 135.05276489257812, -17.038368225097656, -41.06144714355469, 80.71881866455078, 153.42059326171875, 18.43944549560547, 17.25183868408203, 110.5812759399414, 150.403564453125, -75.35020446777344, 172.56884765625, -1.0967826843261719, 11.625007629394531, 201.59490966796875, 4.744659423828125, -24.846773147583008, 0.6593170166015625, 49.70490264892578, 59.40159606933594, 135.95384216308594, -32.971214294433594, -109.16969299316406, 12.998905181884766, 2.869424819946289, 16.499282836914062, -55.12007522583008, -40.13787078857422, -110.30801391601562, -81.96420288085938, -60.588523864746094, 228.19569396972656, 146.34243774414062, 0.240203857421875, 74.25483703613281, -34.06730651855469, -3.7096633911132812, 11.243522644042969, 100.150390625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000335.npy"} +{"epoch": 0.5064247921390779, "step": 336, "batch_size": 64, "mean": 57.479217529296875, "std": 96.23854064941406, "min": -109.70796203613281, "p10": -48.25253143310545, "median": 42.92720985412598, "p90": 185.11186370849612, "max": 334.87164306640625, "pos_frac": 0.703125, "sample": [-71.60598754882812, 38.067138671875, 169.86471557617188, 41.45286560058594, 334.87164306640625, 191.22410583496094, 112.79583740234375, -10.715721130371094, 179.7628173828125, 6.756021499633789, 12.102693557739258, -18.39029884338379, 187.40431213378906, -26.156463623046875, 136.56385803222656, 20.641338348388672, 20.486351013183594, 84.27554321289062, 238.14096069335938, -109.70796203613281, -32.079254150390625, 63.565277099609375, 244.32763671875, 68.16101837158203, -33.264129638671875, 128.27699279785156, 44.401554107666016, -100.82115173339844, -19.148391723632812, -79.00658416748047, 208.9890594482422, -77.80269622802734, 50.08202362060547, 63.00331115722656, -13.43178939819336, 5.061750411987305, 21.271728515625, -4.175697326660156, 49.35179901123047, 130.56393432617188, 12.376983642578125, -3.7302169799804688, 78.96760559082031, 26.585708618164062, 131.9600830078125, 48.50140380859375, 76.30357360839844, 31.511871337890625, 48.219970703125, 131.77207946777344, 26.1392822265625, -3.4119720458984375, 90.01776885986328, 172.1301727294922, 323.67718505859375, 47.63221740722656, -21.945709228515625, -27.16119384765625, 148.31253051757812, -54.67613220214844, 40.912445068359375, 133.8791961669922, 67.01701354980469, -101.48234558105469], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000336.npy"} +{"epoch": 0.5079365079365079, "step": 337, "batch_size": 64, "mean": 46.7825813293457, "std": 94.92849731445312, "min": -149.56448364257812, "p10": -68.01993331909178, "median": 43.40521240234375, "p90": 152.64597473144534, "max": 338.7495422363281, "pos_frac": 0.71875, "sample": [69.88704681396484, -105.28876495361328, -149.56448364257812, -25.51728057861328, 33.923248291015625, 143.5113983154297, -36.327064514160156, 133.00743103027344, 63.585113525390625, 21.13831329345703, 184.37277221679688, 18.483970642089844, 81.51905059814453, -142.28030395507812, -76.8076171875, -35.06908416748047, 22.433090209960938, 338.7495422363281, 20.194236755371094, -1.8656158447265625, 23.380271911621094, 7.088373184204102, -32.38324737548828, -139.12957763671875, 142.08094787597656, 87.66670227050781, 1.8898468017578125, 58.81432342529297, 58.49845886230469, 197.12709045410156, 3.5785140991210938, -3.9035720825195312, 156.3614959716797, -14.455848693847656, 125.63430786132812, 45.1937255859375, 170.8235626220703, -52.35420227050781, -19.852981567382812, 105.79730224609375, 124.40216827392578, 6.594581604003906, 143.97642517089844, 41.61669921875, 67.88916015625, -50.51127624511719, 63.80323791503906, 88.47947692871094, 104.17440795898438, 52.11083984375, 90.71009826660156, 299.583984375, -17.05530548095703, 18.53497314453125, 30.30560302734375, 128.42445373535156, 5.872964859008789, -74.73381805419922, 106.27174377441406, 109.89378356933594, -119.30166625976562, 49.44720458984375, 176.2368621826172, 67.41813659667969], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000337.npy"} +{"epoch": 0.509448223733938, "step": 338, "batch_size": 64, "mean": 46.322906494140625, "std": 82.64006805419922, "min": -205.40626525878906, "p10": -49.558915710449206, "median": 35.616682052612305, "p90": 156.00286254882815, "max": 192.9547119140625, "pos_frac": 0.71875, "sample": [-87.36404418945312, 58.45671463012695, 144.81011962890625, 144.83456420898438, 146.2161865234375, 126.21369171142578, -94.46240234375, 104.01143646240234, 39.113861083984375, 186.34190368652344, 192.9547119140625, 86.95487213134766, -5.0988311767578125, 44.039947509765625, 13.924407958984375, -87.35771942138672, 29.573936462402344, -112.32254028320312, 2.40240478515625, -3.2150802612304688, -15.784843444824219, 156.90872192382812, 32.03700637817383, -13.283235549926758, 117.93875885009766, 109.98159790039062, -205.40626525878906, 137.81866455078125, 83.5032958984375, 62.558387756347656, -24.93041229248047, 118.70440673828125, -11.43478012084961, 12.028133392333984, 53.54334259033203, 91.63920593261719, 18.860496520996094, 27.274139404296875, 178.29281616210938, 31.502944946289062, 7.959495544433594, 158.55905151367188, -9.766067504882812, 124.840576171875, 32.119503021240234, 14.152969360351562, -55.16234588623047, -23.341964721679688, 65.6400375366211, -23.25021743774414, 25.415687561035156, 162.06341552734375, 79.23220825195312, 5.1475067138671875, 77.57769775390625, 98.67732238769531, 24.171859741210938, 153.88919067382812, 84.19864654541016, -104.02786254882812, -36.48424530029297, 73.74158477783203, 163.1345977783203, -25.603206634521484], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000338.npy"} +{"epoch": 0.5109599395313681, "step": 339, "batch_size": 64, "mean": 54.697265625, "std": 89.97712707519531, "min": -223.79425048828125, "p10": -56.215214538574195, "median": 39.88225173950195, "p90": 160.2938385009766, "max": 294.6825866699219, "pos_frac": 0.796875, "sample": [51.093116760253906, 76.83287811279297, 13.330459594726562, -34.48677062988281, -18.63970184326172, 163.26895141601562, 32.598846435546875, 84.20496368408203, 33.96110153198242, -70.89189910888672, 70.9615478515625, -8.198745727539062, 24.424163818359375, 27.193103790283203, 7.269321441650391, 54.05842590332031, -72.86526489257812, 127.40670776367188, 148.74073791503906, 11.238985061645508, 154.4193115234375, 37.95598602294922, 122.29093933105469, 99.3452377319336, 53.66356658935547, 162.81149291992188, -1.7259674072265625, 21.57730484008789, -223.79425048828125, 92.0977783203125, 21.921001434326172, 107.72196960449219, 196.25909423828125, 182.27606201171875, 96.54730987548828, 11.68832778930664, 141.15158081054688, -100.21607971191406, 128.99697875976562, 85.80674743652344, 294.6825866699219, 30.065475463867188, -20.43436622619629, -2.4493026733398438, 26.763397216796875, 6.30694580078125, 23.613525390625, 133.04937744140625, -85.62129974365234, -65.52740478515625, 18.710817337036133, 16.371646881103516, 109.80506134033203, 142.91378784179688, -136.95596313476562, 250.00033569335938, 5.861907958984375, 55.855377197265625, 41.80851745605469, 132.2335205078125, 103.79647827148438, 91.28202819824219, 4.721441268920898, 211.47584533691406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000339.npy"} +{"epoch": 0.5124716553287982, "step": 340, "batch_size": 64, "mean": 48.29922103881836, "std": 103.85588073730469, "min": -255.00048828125, "p10": -65.71936492919922, "median": 28.611690521240234, "p90": 187.7139862060547, "max": 274.5369873046875, "pos_frac": 0.625, "sample": [82.71839904785156, 170.67446899414062, -18.306224822998047, 119.86408233642578, 24.823837280273438, 196.73976135253906, -8.123388290405273, 0.7809352874755859, -23.130393981933594, 234.48007202148438, -15.549522399902344, 80.60902404785156, 31.590744018554688, 111.80867004394531, 208.4749755859375, 122.388671875, 24.151689529418945, -116.94439697265625, 173.94467163085938, 83.89155578613281, -6.180849075317383, -17.353796005249023, -66.67073059082031, 101.64838409423828, -1.506500244140625, 192.2928466796875, -27.804962158203125, 132.5411834716797, -61.842445373535156, 172.4415283203125, -94.23899841308594, 134.62106323242188, -17.31170654296875, 67.88522338867188, 274.5369873046875, 73.77630615234375, 135.1591796875, 1.8814754486083984, -16.64581298828125, 184.93780517578125, 118.59846496582031, -63.49951171875, 8.547203063964844, 167.05953979492188, 51.58449935913086, -11.441061019897461, -255.00048828125, -72.2298583984375, 5.153709411621094, 194.04901123046875, 2.2453155517578125, 61.714351654052734, 75.0951919555664, -30.769113540649414, -173.5658721923828, -51.99485778808594, 155.15744018554688, -8.140769958496094, 146.43417358398438, 188.90377807617188, 44.768009185791016, 25.63263702392578, -93.20735931396484, -40.998146057128906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000340.npy"} +{"epoch": 0.5139833711262283, "step": 341, "batch_size": 64, "mean": 65.3171157836914, "std": 92.36721801757812, "min": -138.982421875, "p10": -43.85487899780273, "median": 58.8580207824707, "p90": 190.04645080566408, "max": 235.217041015625, "pos_frac": 0.703125, "sample": [-51.430484771728516, 121.94170379638672, 145.457275390625, 52.329681396484375, 195.5895233154297, 61.2188720703125, 148.606201171875, 235.217041015625, 162.63348388671875, -2.9964599609375, 155.27223205566406, 56.497169494628906, -115.98906707763672, -28.500431060791016, 123.5508041381836, -4.073387145996094, 158.96078491210938, -3.1000137329101562, 160.86221313476562, 171.5067596435547, 108.06990051269531, 192.92385864257812, 192.45559692382812, 93.83340454101562, -79.95709228515625, 165.8636932373047, 202.9872589111328, 2.1469078063964844, 13.710639953613281, -28.686614990234375, 19.631961822509766, 8.675796508789062, -36.97618103027344, 17.043113708496094, 11.622451782226562, 171.50802612304688, 93.17066955566406, -11.207094192504883, 75.16533660888672, -115.72493743896484, -41.75458526611328, -44.7550048828125, 69.74718475341797, 184.42510986328125, 19.389328002929688, 199.98497009277344, 94.96893310546875, 111.04408264160156, 150.100830078125, 212.02352905273438, 133.140625, 35.922943115234375, 123.67024993896484, -5.821807861328125, -22.9285888671875, 116.31454467773438, 115.57048797607422, 38.38572692871094, -6.413576126098633, -58.30052185058594, -40.27429962158203, 43.12120056152344, 51.90587615966797, -138.982421875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000341.npy"} +{"epoch": 0.5154950869236583, "step": 342, "batch_size": 64, "mean": 63.56008529663086, "std": 88.78968048095703, "min": -118.62511444091797, "p10": -45.66180496215818, "median": 53.31571006774902, "p90": 167.9491409301758, "max": 326.53924560546875, "pos_frac": 0.734375, "sample": [48.437538146972656, 49.072723388671875, -0.493682861328125, -17.675559997558594, 7.931669235229492, 146.90713500976562, 35.922794342041016, 163.95997619628906, -71.17198181152344, -21.283798217773438, 6.094793319702148, 113.05508422851562, -11.818265914916992, 156.4315185546875, 18.313392639160156, -55.83332824707031, 19.99135398864746, -118.11323547363281, -1.2868633270263672, -22.315513610839844, 54.555423736572266, 135.36827087402344, 129.7928924560547, 164.33050537109375, -61.23217010498047, 97.2682113647461, 97.79545593261719, 80.39665985107422, 131.89439392089844, 65.65985107421875, 58.82065200805664, 46.73884201049805, 147.976806640625, 91.16070556640625, 48.05647277832031, -21.660566329956055, 105.00457763671875, 39.34326171875, -55.6673583984375, 156.59556579589844, 55.53068542480469, 169.49998474121094, 163.02593994140625, 62.527008056640625, -78.5028076171875, 207.53990173339844, -5.017494201660156, -16.01272201538086, 326.53924560546875, 221.93907165527344, 100.57671356201172, -118.62511444091797, 40.90941619873047, 186.75955200195312, 2.4100685119628906, 174.2061004638672, 109.59638977050781, 3.6017227172851562, -2.1452560424804688, 99.38093566894531, 78.36722564697266, 52.07599639892578, 26.79833984375, 248.54022216796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000342.npy"} +{"epoch": 0.5170068027210885, "step": 343, "batch_size": 64, "mean": 45.81767272949219, "std": 88.71236419677734, "min": -158.23919677734375, "p10": -43.335332489013666, "median": 30.162593841552734, "p90": 149.82345275878913, "max": 347.9144287109375, "pos_frac": 0.734375, "sample": [86.34373474121094, 33.55327606201172, 2.5692520141601562, -7.6083831787109375, -0.229888916015625, 82.87490844726562, -14.18798828125, 2.1255321502685547, -37.609588623046875, -31.275970458984375, -127.94640350341797, -23.04168701171875, 96.91667175292969, 36.471717834472656, 347.9144287109375, -18.585994720458984, 156.07574462890625, 222.70513916015625, 103.76985931396484, 83.11102294921875, 78.05511474609375, 10.189733505249023, 109.8145751953125, 104.7562484741211, 24.59536361694336, 97.91630554199219, 13.990432739257812, -45.789222717285156, 53.65745544433594, 17.280115127563477, -120.07335662841797, 32.74700164794922, 135.23477172851562, 90.29828643798828, 160.10598754882812, 18.57569122314453, 103.71353912353516, 170.2395477294922, -143.93592834472656, 214.2500762939453, -48.540740966796875, 70.83415222167969, 10.258506774902344, 239.91107177734375, 38.67298889160156, 27.57818603515625, 125.96633911132812, -5.656253814697266, 22.31237030029297, 129.34954833984375, 124.6075668334961, 16.396934509277344, 2.1985855102539062, 74.87195587158203, 49.189857482910156, 43.32273864746094, 10.184326171875, -9.983779907226562, 68.13714599609375, 20.555442810058594, -158.23919677734375, -3.1529388427734375, -56.8203010559082, 20.80943489074707], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000343.npy"} +{"epoch": 0.5185185185185185, "step": 344, "batch_size": 64, "mean": 66.24297332763672, "std": 107.91194152832031, "min": -149.65213012695312, "p10": -64.8389907836914, "median": 58.56154251098633, "p90": 178.0529724121094, "max": 471.6167907714844, "pos_frac": 0.75, "sample": [-78.80145263671875, 145.12669372558594, 51.92266082763672, 72.90040588378906, 139.01797485351562, -10.40553092956543, 145.23841857910156, 1.6837043762207031, 39.631797790527344, 103.79559326171875, 108.56820678710938, 9.582977294921875, 17.372419357299805, 38.354740142822266, 97.75242614746094, 67.10992431640625, 8.257080078125, 471.6167907714844, -52.051483154296875, 123.0323486328125, -77.7908935546875, -35.66102600097656, -96.9060287475586, 206.40835571289062, 11.45444107055664, -47.95520782470703, -70.31935119628906, -1.6719589233398438, 143.79502868652344, 109.14663696289062, -84.469970703125, 198.83648681640625, -32.025184631347656, 100.39877319335938, 60.87742614746094, 170.28639221191406, 134.00225830078125, -11.814346313476562, 51.34681701660156, -149.65213012695312, 5.794574737548828, 19.714412689208984, -39.15789031982422, 7.15559196472168, -47.74768829345703, 9.695470809936523, 180.08291625976562, 136.52088928222656, 118.38674926757812, 102.93133544921875, 114.18610382080078, 41.164703369140625, 146.92599487304688, 57.28447723388672, 162.3950653076172, 30.783632278442383, 349.13818359375, 109.08277130126953, 59.83860778808594, 173.31643676757812, -128.087158203125, 197.69126892089844, 130.79185485839844, 223.66966247558594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000344.npy"} +{"epoch": 0.5200302343159486, "step": 345, "batch_size": 64, "mean": 59.55921936035156, "std": 97.50281524658203, "min": -135.67166137695312, "p10": -46.612018966674796, "median": 41.212528228759766, "p90": 177.84783172607425, "max": 302.72625732421875, "pos_frac": 0.71875, "sample": [89.4149169921875, 15.28707504272461, 209.15280151367188, -135.67166137695312, 17.509319305419922, 21.861404418945312, -7.9397430419921875, -24.355712890625, -41.13794708251953, -37.514312744140625, 135.77581787109375, 141.1450653076172, 146.9388885498047, 101.3708267211914, -66.61659240722656, 132.29400634765625, -106.33391571044922, 123.59516906738281, 29.081153869628906, 180.2110137939453, 271.11358642578125, 3.6093807220458984, 220.5059356689453, 120.1103744506836, 142.22705078125, -27.49420166015625, 26.11267852783203, 17.569143295288086, -17.593948364257812, 30.89197540283203, 114.6758041381836, 302.72625732421875, 135.27561950683594, 22.20049476623535, 73.90567016601562, -115.67781066894531, 137.0857696533203, 240.66903686523438, 131.4189453125, -16.35464096069336, 172.333740234375, 36.42974090576172, -62.76355743408203, 244.5052032470703, 114.08763122558594, -48.95804977416992, 38.180397033691406, -37.041114807128906, -22.692214965820312, 50.265220642089844, 63.387908935546875, 44.244659423828125, -6.713294982910156, 33.0108642578125, 6.4639892578125, 127.51889038085938, 2.2231388092041016, -134.00979614257812, -34.38932418823242, 85.8864517211914, 114.31062316894531, 80.9365234375, 54.42850112915039, 153.09933471679688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000345.npy"} +{"epoch": 0.5215419501133787, "step": 346, "batch_size": 64, "mean": 72.15606689453125, "std": 96.52890014648438, "min": -171.31622314453125, "p10": -51.22891464233396, "median": 71.88579559326172, "p90": 188.8869644165039, "max": 266.49993896484375, "pos_frac": 0.765625, "sample": [145.59938049316406, 158.93699645996094, 217.0194091796875, 161.10604858398438, 174.9358367919922, 70.68370056152344, 57.96321105957031, 117.08690643310547, 160.323486328125, 189.7484893798828, 145.50912475585938, 5.132301330566406, 140.52069091796875, 160.53256225585938, -69.21818542480469, 167.60618591308594, -109.92146301269531, 82.92144775390625, -20.19605255126953, 73.087890625, 79.46430969238281, 121.17221069335938, 136.94529724121094, 41.118019104003906, 123.728271484375, 258.1169128417969, -171.31622314453125, 76.97317504882812, 55.95295333862305, -133.4348602294922, 7.004669189453125, -60.23278045654297, 164.2877960205078, -4.096523284912109, 24.714488983154297, 80.88472747802734, 24.071022033691406, -25.865005493164062, 150.0111541748047, 213.11456298828125, 266.49993896484375, 151.1812286376953, -30.219894409179688, 204.3976593017578, 29.381690979003906, 27.52368927001953, 140.98092651367188, 52.81324768066406, -75.33380889892578, 91.28160095214844, 60.40110778808594, 9.094955444335938, -15.878664016723633, -29.736724853515625, -64.71344757080078, 25.397796630859375, 45.76609802246094, 163.46702575683594, -10.073188781738281, 186.87673950195312, -1.9788322448730469, 1.0073108673095703, 7.3155364990234375, 190.5440673828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000346.npy"} +{"epoch": 0.5230536659108088, "step": 347, "batch_size": 64, "mean": 55.376441955566406, "std": 83.04348754882812, "min": -86.63238525390625, "p10": -40.04507827758788, "median": 43.273183822631836, "p90": 174.697868347168, "max": 251.79629516601562, "pos_frac": 0.671875, "sample": [26.548433303833008, -5.5093536376953125, 12.650016784667969, 111.13862609863281, -51.73518371582031, 53.643707275390625, 151.62063598632812, 113.73503875732422, 135.9072723388672, 130.67555236816406, -34.086273193359375, 148.76507568359375, 47.064178466796875, -69.31694793701172, -78.56857299804688, 251.79629516601562, 46.102943420410156, 40.443424224853516, -42.010406494140625, 177.08956909179688, 72.97898864746094, 13.745702743530273, 161.4190216064453, -21.522491455078125, -34.31756591796875, 31.278926849365234, 93.40380096435547, 139.25640869140625, 207.80276489257812, -11.662109375, 6.055946350097656, -2.365346908569336, -50.068214416503906, 129.64239501953125, -22.49175453186035, 47.243831634521484, -10.441776275634766, 79.66365051269531, 80.2952880859375, 66.10018157958984, 77.480224609375, 24.877723693847656, 2.94525146484375, 100.70449829101562, 13.353044509887695, 205.516357421875, -35.459312438964844, -15.348529815673828, 194.70889282226562, 179.5095672607422, 169.1172332763672, 127.87744140625, -86.63238525390625, 72.65776062011719, -79.373291015625, 146.87344360351562, 8.922952651977539, 13.085311889648438, -11.676044464111328, -2.3470630645751953, -3.1456336975097656, -9.010326385498047, 180.18087768554688, 127.302734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000347.npy"} +{"epoch": 0.5245653817082389, "step": 348, "batch_size": 64, "mean": 63.05001449584961, "std": 99.79865264892578, "min": -175.95755004882812, "p10": -55.49367980957031, "median": 46.85829162597656, "p90": 202.7229934692383, "max": 314.2433776855469, "pos_frac": 0.6875, "sample": [118.71676635742188, 162.07196044921875, 65.15945434570312, 22.38507080078125, 230.77499389648438, 86.34260559082031, -69.13011169433594, -8.652397155761719, -175.95755004882812, -15.652137756347656, 280.15203857421875, -60.89582824707031, 54.69197082519531, 97.27214813232422, 98.54830932617188, 149.43035888671875, 36.750701904296875, 204.58070373535156, 58.330631256103516, 213.31588745117188, 24.03256607055664, 148.84481811523438, 153.06634521484375, 3.3830795288085938, 73.4968490600586, 179.14773559570312, -13.995452880859375, 166.3903045654297, -50.76770782470703, -59.23046875, 48.82952880859375, -1.7974014282226562, 2.624521255493164, 255.41189575195312, 90.34149932861328, -68.97814178466797, 107.14164733886719, -57.66984176635742, -23.007041931152344, 98.51004028320312, 130.5261993408203, 314.2433776855469, 79.42190551757812, 4.769023895263672, 40.05072021484375, -37.32414245605469, -57.51909637451172, -31.156089782714844, 13.304183959960938, 92.65370178222656, -4.098415374755859, 304.331298828125, -30.976049423217773, 59.564735412597656, 100.42141723632812, -2.5731124877929688, 44.887054443359375, 25.300750732421875, -5.488861083984375, 19.107961654663086, -19.177841186523438, 135.7441864013672, 198.38833618164062, 36.78947067260742], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000348.npy"} +{"epoch": 0.5260770975056689, "step": 349, "batch_size": 64, "mean": 68.83206176757812, "std": 97.23397827148438, "min": -129.69448852539062, "p10": -33.59163246154785, "median": 67.28182983398438, "p90": 191.15127868652348, "max": 304.4131774902344, "pos_frac": 0.703125, "sample": [-33.395694732666016, 104.21096801757812, 179.4852752685547, 60.107078552246094, 67.56112670898438, -6.588539123535156, 108.80443572998047, 182.53237915039062, 89.57603454589844, 23.6004638671875, -6.765291213989258, 44.75970458984375, 126.00613403320312, 67.00253295898438, 129.44204711914062, 137.3008575439453, 29.457887649536133, -93.20589447021484, -129.69448852539062, 153.40957641601562, 75.25410461425781, 181.1177520751953, -16.59307861328125, 14.58810043334961, 139.94784545898438, 16.685348510742188, -33.67560577392578, 80.42337036132812, -27.67572784423828, 290.1469421386719, 97.39183044433594, 108.60316467285156, 8.855331420898438, 84.64324951171875, 4.5908050537109375, 178.89544677734375, 74.90521240234375, 228.76234436035156, -80.5465316772461, 124.53825378417969, 304.4131774902344, 35.563819885253906, 12.059051513671875, 82.66445922851562, 86.90534973144531, 194.8450927734375, 251.99212646484375, -9.492595672607422, -54.48054504394531, -1.746084213256836, 105.91648864746094, -18.806900024414062, -21.52075958251953, -16.97711181640625, 83.48392486572266, 212.1663818359375, -17.068954467773438, -60.12635803222656, -51.808799743652344, 154.6893768310547, 4.147115707397461, -18.923240661621094, 62.43144989013672, 300.46044921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000349.npy"} +{"epoch": 0.527588813303099, "step": 350, "batch_size": 64, "mean": 85.30538177490234, "std": 88.1926498413086, "min": -98.61121368408203, "p10": -18.61298065185547, "median": 89.08243942260742, "p90": 196.07344665527347, "max": 310.6732482910156, "pos_frac": 0.8125, "sample": [161.77188110351562, -22.425628662109375, 106.03201293945312, 1.8080902099609375, 111.70201873779297, 23.42918586730957, 238.73089599609375, 142.04344177246094, 120.53829956054688, 91.20149230957031, 45.11906433105469, -44.223114013671875, 127.56253051757812, 125.70803833007812, 80.19618225097656, 95.99516296386719, 190.21246337890625, 16.21805191040039, -18.262222290039062, -10.730855941772461, 143.22930908203125, 121.17330932617188, -98.61121368408203, 160.49301147460938, 65.3500747680664, -0.47689056396484375, -62.613037109375, 225.3454132080078, 48.69953155517578, 27.218524932861328, 157.50131225585938, -18.7633056640625, 171.12722778320312, 37.73186492919922, 120.10609436035156, 215.82461547851562, 91.95416259765625, 239.67864990234375, 310.6732482910156, 271.5547180175781, 9.107397079467773, 198.58529663085938, 176.81373596191406, -9.414634704589844, 31.018138885498047, 1.410308837890625, 99.74634552001953, 3.8693904876708984, -37.109554290771484, 67.8941421508789, 63.33964538574219, 77.93666076660156, 1.6585025787353516, 78.07093048095703, 86.96338653564453, -74.78169250488281, 110.99188232421875, 67.74696350097656, -12.921012878417969, 139.96212768554688, 168.24363708496094, 136.61109924316406, 96.26937103271484, 167.70870971679688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000350.npy"} +{"epoch": 0.5291005291005291, "step": 351, "batch_size": 64, "mean": 60.617576599121094, "std": 100.95906066894531, "min": -186.65005493164062, "p10": -58.20272903442382, "median": 61.765113830566406, "p90": 178.7323013305664, "max": 247.26678466796875, "pos_frac": 0.703125, "sample": [-11.472375869750977, 147.45980834960938, 157.52267456054688, 177.00772094726562, -3.4290122985839844, 165.16262817382812, -9.525135040283203, 99.869384765625, 182.59109497070312, 45.08219909667969, -6.684814453125, 205.97750854492188, 40.265892028808594, -44.91436767578125, 42.58174514770508, -12.134086608886719, -15.992324829101562, 21.303741455078125, 122.73479461669922, 113.67222595214844, 131.4495849609375, -106.82367706298828, 178.5204620361328, 7.0549774169921875, 125.21771240234375, 247.26678466796875, 84.25408935546875, 148.41659545898438, -186.65005493164062, 61.469146728515625, 165.72708129882812, -61.304405212402344, 63.16876220703125, 14.347244262695312, 106.15934753417969, -126.16336059570312, -64.1079330444336, 210.2435302734375, 25.675697326660156, -17.37911605834961, 120.60839080810547, 178.82308959960938, 30.55609130859375, -20.58448028564453, 229.64976501464844, 42.824981689453125, 130.5584259033203, 72.49708557128906, 3.7592926025390625, 111.78069305419922, -40.359031677246094, -0.4843025207519531, 150.13006591796875, 101.81912994384766, 186.57681274414062, 33.928062438964844, 62.06108093261719, 157.92137145996094, -176.40260314941406, 40.75511932373047, -182.97715759277344, 128.20852661132812, 145.21826171875, -50.965484619140625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000351.npy"} +{"epoch": 0.5306122448979592, "step": 352, "batch_size": 64, "mean": 67.48252868652344, "std": 106.80232238769531, "min": -110.77735900878906, "p10": -49.204275894165036, "median": 42.04352951049805, "p90": 191.96611938476562, "max": 528.7159423828125, "pos_frac": 0.75, "sample": [148.8856201171875, 278.2347717285156, 194.89288330078125, 28.632827758789062, 188.93463134765625, -110.77735900878906, -6.529453277587891, 25.49005889892578, 6.133663177490234, 104.94113159179688, 109.41326904296875, 118.77792358398438, -99.12692260742188, 9.157306671142578, 66.50015258789062, 20.174285888671875, -22.366424560546875, -16.693954467773438, -64.4198989868164, 7.0021820068359375, 137.92379760742188, 113.96279907226562, -6.79620361328125, 175.5340118408203, 167.13375854492188, 85.53695678710938, 47.65777587890625, -103.48509979248047, 34.32529067993164, -50.683048248291016, 2.6182823181152344, -0.5953121185302734, 68.88471984863281, 72.62751770019531, 24.10888671875, 65.8310546875, 49.74342346191406, 194.06201171875, 192.69400024414062, 185.60543823242188, 156.75588989257812, -52.28661346435547, 528.7159423828125, 7.728456497192383, 36.429283142089844, 155.07901000976562, -50.73084259033203, 52.54388427734375, -21.3708438873291, 73.11131286621094, 4.796173095703125, 106.80801391601562, 264.0953674316406, 134.1458740234375, 190.26773071289062, 6.6663970947265625, -45.753807067871094, 200.2625732421875, -15.754032135009766, 33.46144104003906, 13.98434066772461, 109.03646850585938, 20.954282760620117, -34.01109313964844], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000352.npy"} +{"epoch": 0.5321239606953893, "step": 353, "batch_size": 64, "mean": 58.27006530761719, "std": 96.5897445678711, "min": -295.4234619140625, "p10": -31.543160629272457, "median": 49.860652923583984, "p90": 172.46937561035156, "max": 305.78155517578125, "pos_frac": 0.703125, "sample": [49.87999725341797, 247.123779296875, -62.689327239990234, 77.07951354980469, -143.68783569335938, 23.08065414428711, 97.5801773071289, -19.634902954101562, 211.77415466308594, 41.5284423828125, 28.53763198852539, 15.308753967285156, 116.40353393554688, 66.00152587890625, 177.6023406982422, -9.884397506713867, 21.4410400390625, 173.78997802734375, 147.08787536621094, 154.77426147460938, 305.78155517578125, 154.0306396484375, 20.555803298950195, 112.78008270263672, -25.390960693359375, -0.18863296508789062, 182.05276489257812, 53.26764678955078, 122.48931121826172, 94.59243774414062, 80.06332397460938, -14.52645492553711, 136.83258056640625, 1.2183647155761719, -295.4234619140625, 182.59544372558594, -56.682342529296875, -28.898563385009766, 157.93124389648438, 149.84429931640625, 94.02122497558594, 147.95730590820312, 169.38796997070312, -1.724029541015625, -90.8065185546875, 92.30296325683594, 166.39833068847656, 1.762521743774414, -6.189250946044922, 14.669425964355469, 49.84130859375, 38.15721130371094, -34.259735107421875, -8.713851928710938, 56.382545471191406, 25.188674926757812, 33.630577087402344, -6.3436431884765625, -6.826324462890625, -17.30449676513672, 88.82998657226562, -32.67655944824219, 140.37588500976562, 69.20039367675781], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000353.npy"} +{"epoch": 0.5336356764928194, "step": 354, "batch_size": 64, "mean": 66.9397201538086, "std": 100.9171142578125, "min": -139.26686096191406, "p10": -75.25880966186521, "median": 66.10718154907227, "p90": 191.0928634643555, "max": 281.32305908203125, "pos_frac": 0.75, "sample": [256.7688903808594, 31.310932159423828, -47.05158996582031, 92.2889404296875, -98.55542755126953, -14.061697006225586, 30.329833984375, 59.459068298339844, 195.19825744628906, 17.65518569946289, 116.19568634033203, 171.73858642578125, 164.07179260253906, 84.10965728759766, 54.03406524658203, -0.7256965637207031, 139.25762939453125, -81.9615478515625, 158.62457275390625, 232.93820190429688, 46.662994384765625, 165.07363891601562, 103.25273132324219, 89.50042724609375, -99.4898452758789, 7.696462631225586, -133.18482971191406, 112.23078918457031, 43.509033203125, 80.94210052490234, 37.922393798828125, 175.956298828125, 181.51361083984375, 85.90765380859375, 138.79052734375, 10.584068298339844, 72.75529479980469, -139.26686096191406, 171.99424743652344, 5.2758941650390625, 137.51211547851562, 153.58030700683594, -43.72649383544922, -94.49751281738281, 250.21218872070312, -134.39059448242188, 211.14698791503906, 281.32305908203125, 59.10542297363281, -59.61908721923828, 32.27320098876953, 83.87606811523438, 112.87686157226562, 210.38198852539062, 52.22663497924805, 35.30002212524414, -12.611305236816406, 6.701869964599609, -10.917709350585938, 100.76445007324219, 98.20337677001953, 154.20748901367188, -29.709716796875, -29.329570770263672], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000354.npy"} +{"epoch": 0.5351473922902494, "step": 355, "batch_size": 64, "mean": 85.11663055419922, "std": 110.93936157226562, "min": -138.57217407226562, "p10": -47.4990737915039, "median": 84.66183853149414, "p90": 217.26978607177742, "max": 411.5406494140625, "pos_frac": 0.75, "sample": [115.61006164550781, 296.26434326171875, -45.19932556152344, -8.928009033203125, 262.51446533203125, 150.8838348388672, 40.436973571777344, -73.9286117553711, -14.837333679199219, 145.13949584960938, 97.62457275390625, 86.50747680664062, 127.55108642578125, -48.48468017578125, 178.43841552734375, 172.1758270263672, 4.162059783935547, 118.16571044921875, -89.75428771972656, 178.5570068359375, -20.935897827148438, 35.05134201049805, 80.54608154296875, 195.98362731933594, 149.0063934326172, 22.572126388549805, 94.6465072631836, 90.5838623046875, 161.98048400878906, 411.5406494140625, -79.3674545288086, 156.88949584960938, -1.2245292663574219, 281.638916015625, 39.10832214355469, -119.69136810302734, 69.04939270019531, -17.543800354003906, 55.05692672729492, 156.01332092285156, -31.649024963378906, -84.78106689453125, 143.88104248046875, 24.578262329101562, 10.21415901184082, 276.83349609375, 226.39242553710938, 23.563426971435547, 183.04421997070312, -138.57217407226562, 153.95272827148438, 172.11129760742188, 193.08946228027344, 17.42531967163086, 181.72715759277344, -18.200159072875977, 25.137672424316406, 7.086299896240234, 230.83499145507812, -10.706214904785156, 150.2149658203125, 51.083457946777344, 82.81620025634766, 123.58283233642578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000355.npy"} +{"epoch": 0.5366591080876795, "step": 356, "batch_size": 64, "mean": 70.31560516357422, "std": 110.1673812866211, "min": -217.9102020263672, "p10": -69.96321334838866, "median": 66.57463836669922, "p90": 195.65080718994142, "max": 398.6991882324219, "pos_frac": 0.703125, "sample": [166.07876586914062, 184.85205078125, 110.36431884765625, -75.85502624511719, 155.71395874023438, 27.65538787841797, 3.1032791137695312, 248.30027770996094, 117.69060516357422, 3.8456687927246094, -72.38400268554688, -149.66131591796875, 177.8295440673828, -106.58467864990234, 159.08428955078125, 206.20355224609375, 24.01184844970703, 28.114025115966797, 182.11549377441406, -9.28877067565918, 205.41152954101562, -217.9102020263672, 195.16119384765625, 171.15066528320312, 30.83538818359375, 195.8606414794922, -51.99172592163086, -54.77351379394531, 88.40129089355469, 52.724395751953125, 75.5129165649414, 66.03556823730469, -7.251798629760742, 14.297260284423828, -83.97089385986328, 120.57745361328125, 176.52337646484375, -64.31470489501953, 58.38716125488281, 163.96841430664062, -5.678714752197266, 33.47746276855469, 146.98248291015625, -17.361068725585938, 60.25776672363281, 102.56499481201172, 81.15672302246094, 74.14754486083984, -40.85901641845703, 125.54034423828125, -5.504756927490234, 87.5517807006836, 169.27505493164062, 212.73587036132812, -2.69854736328125, 160.94635009765625, 67.11370849609375, -7.762519836425781, 214.2801513671875, -24.196685791015625, -91.38423156738281, 398.6991882324219, 63.07530975341797, 182.01553344726562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000356.npy"} +{"epoch": 0.5381708238851096, "step": 357, "batch_size": 64, "mean": 53.622982025146484, "std": 106.22366333007812, "min": -249.0379180908203, "p10": -59.5780101776123, "median": 53.98350143432617, "p90": 189.84498901367192, "max": 290.8138122558594, "pos_frac": 0.6875, "sample": [290.8138122558594, -12.648809432983398, 194.8345947265625, -10.299575805664062, -184.6422119140625, 168.19149780273438, 87.23912048339844, 48.999820709228516, 56.83372497558594, 43.379486083984375, -20.409271240234375, 84.98595428466797, -57.476768493652344, 34.3297233581543, 205.67945861816406, 113.90518188476562, 149.6043243408203, 106.03315734863281, 87.38750457763672, -5.722537994384766, 130.01153564453125, 43.523048400878906, 96.06187438964844, 10.364381790161133, 28.768531799316406, 90.9678726196289, -22.446653366088867, 51.640846252441406, -155.78668212890625, 79.2515869140625, -71.17271423339844, -31.21367835998535, -60.47854232788086, 69.65570831298828, 43.38146209716797, 214.20809936523438, 234.47862243652344, 56.46681213378906, 155.9846954345703, 169.28958129882812, -22.55431365966797, 56.32615661621094, -2.371185302734375, 23.81725311279297, 178.20257568359375, 151.63038635253906, 0.70135498046875, -35.9578857421875, -38.29518508911133, 39.19390869140625, 45.96519088745117, 249.2263946533203, 132.56451416015625, 66.38890075683594, 95.28826141357422, 58.2799072265625, -12.479413986206055, 90.86905670166016, 158.77813720703125, -125.26200866699219, -249.0379180908203, -162.761962890625, 229.83316040039062, -10.448881149291992], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000357.npy"} +{"epoch": 0.5396825396825397, "step": 358, "batch_size": 64, "mean": 62.74085235595703, "std": 112.89836883544922, "min": -243.33734130859375, "p10": -67.38102951049804, "median": 41.54452323913574, "p90": 181.59123687744142, "max": 405.8132019042969, "pos_frac": 0.75, "sample": [106.73440551757812, 81.75210571289062, 91.01617431640625, 180.45462036132812, -57.44764709472656, 181.90457153320312, 151.99461364746094, -60.420982360839844, -9.93552017211914, 6.6087646484375, -15.256763458251953, 126.66242980957031, 31.513755798339844, 168.18853759765625, 151.6399383544922, 58.24320983886719, -82.82426452636719, -18.722640991210938, 91.42298889160156, 44.539669036865234, -53.932098388671875, 6.891582489013672, 5.246589660644531, 90.97835540771484, 168.3905792236328, 35.324928283691406, -34.66368103027344, 23.054855346679688, -70.36390686035156, 25.849018096923828, -71.32462310791016, 54.737388610839844, -243.33734130859375, 12.811803817749023, -35.27442932128906, 66.18292236328125, 137.09881591796875, 74.65444946289062, 30.671104431152344, -102.78071594238281, 180.86012268066406, 209.2900390625, 368.48095703125, 33.9782600402832, 405.8132019042969, 177.91656494140625, 143.40887451171875, -29.520641326904297, 15.469680786132812, 208.21856689453125, 1.5059089660644531, -97.0335693359375, -145.2567596435547, 151.03265380859375, 4.1010589599609375, 146.1254119873047, 7.875522613525391, 98.478271484375, 38.54937744140625, 106.72465515136719, 170.79837036132812, 193.06069946289062, 34.29986572265625, 242.9540252685547], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000358.npy"} +{"epoch": 0.5411942554799698, "step": 359, "batch_size": 64, "mean": 31.496538162231445, "std": 110.95440673828125, "min": -355.35394287109375, "p10": -79.73360214233398, "median": 40.72494888305664, "p90": 159.50327758789064, "max": 360.0431823730469, "pos_frac": 0.65625, "sample": [-61.65953063964844, 112.60418701171875, -13.5823974609375, 56.892242431640625, 48.19762420654297, 42.22838592529297, -27.55588150024414, 22.944496154785156, -33.76359558105469, -355.35394287109375, 234.1747589111328, 16.408205032348633, 22.512161254882812, 57.443450927734375, -15.631484985351562, 45.005126953125, 164.1988983154297, -55.006202697753906, 12.10382080078125, -132.80128479003906, -26.040279388427734, -50.97914505004883, 54.870941162109375, -204.2820587158203, 68.09616088867188, 157.74513244628906, 64.94453430175781, 18.576725006103516, 85.1519775390625, -146.97962951660156, 177.36932373046875, 360.0431823730469, -74.34933471679688, 160.16746520996094, 85.50949096679688, 67.4550552368164, 11.051979064941406, 129.5281982421875, 57.99305725097656, -49.595252990722656, -82.04114532470703, 154.52748107910156, 115.2572250366211, 75.77774810791016, 11.001762390136719, 185.62466430664062, -56.366432189941406, -119.81463623046875, -12.259841918945312, 133.22787475585938, 56.20451736450195, -7.548795700073242, -217.30523681640625, 28.516103744506836, 114.68688201904297, 19.81878662109375, 84.03729248046875, 42.37300109863281, 39.22151184082031, 157.95350646972656, 46.6302375793457, 172.66961669921875, -0.6040992736816406, -11.446136474609375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000359.npy"} +{"epoch": 0.5427059712773998, "step": 360, "batch_size": 64, "mean": 71.51376342773438, "std": 107.90541076660156, "min": -139.0146484375, "p10": -34.52071838378906, "median": 39.38739013671875, "p90": 205.91553802490236, "max": 402.44830322265625, "pos_frac": 0.703125, "sample": [11.97235107421875, 208.961181640625, 12.018035888671875, 61.77516174316406, 102.72872161865234, 156.5509033203125, -6.144584655761719, 19.597965240478516, 8.31241226196289, 250.3546142578125, -1.174062728881836, -91.06802368164062, -65.61714172363281, -107.37687683105469, 185.04859924316406, -32.793853759765625, 402.44830322265625, -15.810062408447266, -2.391918182373047, 127.27439880371094, 127.64886474609375, -16.392112731933594, -42.94464111328125, -10.0997314453125, 19.627517700195312, 2.5880203247070312, 162.17738342285156, -15.320243835449219, 98.94210052490234, -17.272232055664062, 49.43054962158203, 35.76214599609375, 100.65765380859375, -139.0146484375, 94.03720092773438, 206.92010498046875, -35.26080322265625, 117.21292114257812, 164.76524353027344, 18.042739868164062, 386.6978454589844, 2.348175048828125, 163.83209228515625, -11.739599227905273, 265.1766662597656, 52.08098602294922, 159.43243408203125, 28.774612426757812, 27.856060028076172, 136.7645263671875, 136.71316528320312, 176.00946044921875, 189.68142700195312, -41.72322082519531, 9.414155960083008, 113.17385864257812, -5.655271530151367, -28.024757385253906, 103.26104736328125, 203.57154846191406, 43.01263427734375, 8.555778503417969, 97.04871368408203, 214.44407653808594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000360.npy"} +{"epoch": 0.54421768707483, "step": 361, "batch_size": 64, "mean": 72.46682739257812, "std": 105.40652465820312, "min": -117.99794006347656, "p10": -53.053434371948235, "median": 65.75466918945312, "p90": 175.5688003540039, "max": 422.71392822265625, "pos_frac": 0.734375, "sample": [67.40795135498047, 146.62744140625, 14.88870620727539, -25.151988983154297, 172.90623474121094, 97.74266815185547, -81.43492126464844, -116.56999206542969, 75.41964721679688, 39.8520393371582, 91.977783203125, -43.76736068725586, -0.7506027221679688, 133.42245483398438, 127.90237426757812, 79.41869354248047, -22.397296905517578, 6.567363739013672, 9.4061279296875, -4.8574981689453125, 287.1716003417969, 64.10138702392578, -57.033180236816406, 176.70989990234375, 143.44479370117188, 210.7531280517578, 156.4604034423828, -117.99794006347656, 167.157470703125, 148.18130493164062, 19.942157745361328, -4.5031585693359375, 138.79200744628906, 422.71392822265625, 102.86518859863281, 162.34591674804688, 152.181396484375, 128.5386505126953, -103.93411254882812, -6.170845031738281, -5.109994888305664, 14.979019165039062, 222.45896911621094, 195.7301788330078, 47.98319625854492, 167.60084533691406, 155.8577880859375, 10.929008483886719, -79.46387481689453, 52.49940490722656, 109.54244995117188, 47.57563781738281, 70.09587097167969, 29.276317596435547, 351.8695373535156, 52.35118865966797, 143.7855987548828, 4.512168884277344, -29.699234008789062, 78.22413635253906, 76.9610366821289, 56.27959442138672, -19.431774139404297, -77.26002502441406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000361.npy"} +{"epoch": 0.54572940287226, "step": 362, "batch_size": 64, "mean": 91.83322143554688, "std": 146.32118225097656, "min": -179.79733276367188, "p10": -42.26117324829101, "median": 74.40314865112305, "p90": 227.74944915771485, "max": 790.9830932617188, "pos_frac": 0.734375, "sample": [-57.94702911376953, 7.906599044799805, -25.831100463867188, 3.926206588745117, -37.902061462402344, -179.79733276367188, 71.78234100341797, 116.688720703125, 790.9830932617188, 130.56993103027344, 24.67723846435547, -13.781383514404297, 405.6238708496094, -21.98602294921875, -6.7882232666015625, 2.1256980895996094, 93.10525512695312, 61.635467529296875, 131.09344482421875, 281.3309631347656, 6.783609390258789, 57.92474365234375, 143.55381774902344, 137.38800048828125, 77.02395629882812, -2.9672088623046875, 116.94099426269531, -44.639556884765625, -27.405284881591797, 159.3699493408203, 492.7452392578125, 62.855918884277344, -19.252517700195312, 176.37603759765625, 22.493263244628906, 260.57745361328125, 158.66915893554688, 36.24017333984375, 117.9742660522461, -44.129364013671875, 1.9149131774902344, 171.33721923828125, 188.93572998046875, 173.1106719970703, 49.07923889160156, 163.3369140625, 159.75970458984375, 229.2471466064453, 136.451171875, 108.61372375488281, -24.167734146118164, 94.77572631835938, 161.64752197265625, -108.11820983886719, 89.9718017578125, 62.75361633300781, 168.26023864746094, -82.46795654296875, -25.479331970214844, 231.21128845214844, 224.25482177734375, -113.27333068847656, 93.57315063476562, 56.65946960449219], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000362.npy"} +{"epoch": 0.54724111866969, "step": 363, "batch_size": 64, "mean": 76.83131408691406, "std": 100.81221771240234, "min": -136.83609008789062, "p10": -22.033064651489255, "median": 54.211219787597656, "p90": 202.73036499023445, "max": 354.6752624511719, "pos_frac": 0.8125, "sample": [97.85215759277344, -130.84951782226562, 181.79541015625, 15.542655944824219, 109.34396362304688, 48.95128631591797, 18.96087646484375, 276.88458251953125, 54.65704345703125, -67.16569519042969, -13.009963989257812, 129.90707397460938, 354.6752624511719, 330.1631164550781, 16.046180725097656, 187.26046752929688, 53.112308502197266, 103.70118713378906, 53.31007385253906, 144.8997039794922, 68.28977966308594, 136.35838317871094, 16.041658401489258, 263.22698974609375, 175.90907287597656, 3.137849807739258, 31.59088134765625, 44.107322692871094, -22.7637939453125, 38.698158264160156, -39.099342346191406, -136.83609008789062, 114.00926971435547, -129.73426818847656, 124.92587280273438, 139.3995361328125, 68.92117309570312, 153.89065551757812, 14.501302719116211, -13.338451385498047, 74.01065063476562, 11.176200866699219, 50.3360595703125, 229.5829315185547, 55.31086730957031, 209.36032104492188, 164.2899627685547, 33.22212219238281, 159.51617431640625, -2.5038070678710938, 94.88738250732422, -0.3486137390136719, 147.86819458007812, 38.05804443359375, -62.33476257324219, 177.0911865234375, 223.42774963378906, -20.32802963256836, 110.23982238769531, 28.203296661376953, 16.134336471557617, 75.85873413085938, 53.76539611816406, 33.105560302734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000363.npy"} +{"epoch": 0.5487528344671202, "step": 364, "batch_size": 64, "mean": 83.55179595947266, "std": 119.96007537841797, "min": -168.80845642089844, "p10": -29.008963775634765, "median": 65.92225646972656, "p90": 222.323405456543, "max": 372.11248779296875, "pos_frac": 0.75, "sample": [218.9847412109375, -8.027412414550781, 22.69971466064453, 191.2745361328125, 338.4449462890625, 24.714942932128906, 95.04864501953125, 127.39515686035156, 15.730018615722656, 269.5827941894531, 362.0916748046875, 130.98077392578125, -16.45461654663086, -86.06208038330078, 144.03176879882812, 177.14019775390625, 97.88140106201172, 3.6684532165527344, 372.11248779296875, 33.6533203125, 190.1959228515625, 53.942604064941406, 140.68785095214844, 223.6680450439453, -168.80845642089844, -71.07810974121094, 212.03060913085938, 7.135683059692383, -0.6621761322021484, 132.79469299316406, 178.81826782226562, 131.632080078125, -17.156219482421875, 106.5262680053711, 49.23320007324219, -11.898849487304688, 110.25151062011719, 219.1859130859375, -25.88170623779297, 7.749422073364258, 113.088623046875, 19.95623016357422, -28.04169464111328, 20.604736328125, 354.3328857421875, 16.46915054321289, -29.423507690429688, 149.99343872070312, 18.565231323242188, 131.2317657470703, 7.0602569580078125, 149.48167419433594, -145.64578247070312, 225.0616455078125, -149.99212646484375, 22.383544921875, 77.90190887451172, 152.97825622558594, -13.892587661743164, 31.433837890625, -9.535160064697266, -63.20045471191406, 144.42562866210938, 168.81932067871094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000364.npy"} +{"epoch": 0.5502645502645502, "step": 365, "batch_size": 64, "mean": 65.55528259277344, "std": 84.76556396484375, "min": -118.07185363769531, "p10": -18.16886711120605, "median": 61.422447204589844, "p90": 164.6959686279297, "max": 416.85198974609375, "pos_frac": 0.8125, "sample": [14.401426315307617, 40.389427185058594, 132.63754272460938, -3.5715560913085938, 72.8320083618164, 4.7702789306640625, 159.95547485351562, 144.33200073242188, 165.45164489746094, 57.110809326171875, 127.83238220214844, 61.281890869140625, -118.07185363769531, 9.100547790527344, 32.476165771484375, 178.3231964111328, -13.611713409423828, 6.344097137451172, 18.806907653808594, 66.49829864501953, 65.427001953125, 114.65558624267578, 146.39584350585938, 18.004961013793945, -4.02922248840332, -92.48945617675781, 79.7614974975586, 172.11268615722656, 36.985755920410156, 97.86395263671875, -39.14241027832031, 7.281343460083008, 196.5267791748047, 41.61881637573242, 99.45646667480469, 122.87399291992188, 143.38421630859375, -5.694952011108398, 101.72348022460938, 63.15563201904297, 184.52532958984375, 43.15985107421875, 61.56300354003906, 43.804656982421875, 126.84379577636719, 126.9135513305664, 6.341867446899414, -29.56269645690918, 416.85198974609375, 58.25421905517578, 64.47122955322266, 8.561880111694336, 92.93305206298828, -95.12728881835938, 0.9380226135253906, 65.19140625, -0.6275634765625, -60.519527435302734, 65.90678405761719, 35.30889892578125, 162.93272399902344, 179.75634765625, -20.121932983398438, 134.07720947265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000365.npy"} +{"epoch": 0.5517762660619804, "step": 366, "batch_size": 64, "mean": 61.298095703125, "std": 104.48036193847656, "min": -161.4346923828125, "p10": -72.55831756591796, "median": 55.252071380615234, "p90": 182.41854248046877, "max": 292.57421875, "pos_frac": 0.734375, "sample": [47.734622955322266, 199.34500122070312, 89.85343170166016, 125.48762512207031, -12.128776550292969, 156.94766235351562, -161.4346923828125, -35.73069763183594, -35.09928894042969, 3.0740280151367188, -66.03067016601562, 108.91011810302734, 20.763023376464844, 91.70893859863281, 137.47198486328125, 68.82147216796875, -111.4393310546875, 69.30215454101562, 10.539243698120117, 133.79962158203125, 190.34060668945312, 68.80350494384766, 79.38029479980469, -30.359752655029297, 34.11439514160156, 132.6631317138672, 17.253437042236328, 169.74978637695312, -121.68743896484375, -107.26953887939453, 292.57421875, 38.10131072998047, 4.948644638061523, 267.212158203125, -155.28733825683594, 57.15094757080078, -35.3497314453125, -94.7943115234375, 39.16633987426758, 36.5420036315918, 11.682083129882812, -28.035205841064453, 10.716756820678711, 33.340789794921875, -65.4443359375, 37.44801330566406, -8.415193557739258, 265.23028564453125, 220.34324645996094, 79.86347961425781, 53.35319519042969, 169.15640258789062, 139.7926788330078, -75.35588073730469, 160.37242126464844, -44.126914978027344, 143.63705444335938, 151.20689392089844, 183.49533081054688, 164.5428009033203, 170.54931640625, 95.07381439208984, 179.90603637695312, 149.5969696044922], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000366.npy"} +{"epoch": 0.5532879818594104, "step": 367, "batch_size": 64, "mean": 61.615333557128906, "std": 96.93912506103516, "min": -165.85836791992188, "p10": -71.02470321655272, "median": 56.70320129394531, "p90": 177.41628265380865, "max": 364.3160400390625, "pos_frac": 0.765625, "sample": [23.49652862548828, 199.680908203125, 46.84516143798828, -24.932998657226562, 96.87800598144531, -6.0324859619140625, 186.66653442382812, 122.8118667602539, 72.66026306152344, 14.024185180664062, 48.60426330566406, 74.8580322265625, 4.981315612792969, 45.641845703125, 203.91192626953125, 49.71848678588867, 85.58525085449219, -30.49541473388672, -92.3840560913086, 159.564208984375, 20.954208374023438, 63.1737060546875, -30.598207473754883, -79.03229522705078, 130.86370849609375, -162.4394989013672, -76.51044464111328, 142.787353515625, 26.440879821777344, 52.983428955078125, 182.193115234375, 165.68186950683594, 113.6784896850586, 155.21484375, 141.46676635742188, 12.311931610107422, -2.5854339599609375, -58.224639892578125, 29.62453269958496, 124.64250183105469, 58.58824157714844, -165.85836791992188, 39.93955993652344, 197.19969177246094, 151.71563720703125, -76.63543701171875, 73.65098571777344, 72.60105895996094, 145.30982971191406, 19.555187225341797, 364.3160400390625, 33.62049865722656, -126.08373260498047, 125.60749053955078, 57.44244384765625, 198.03427124023438, 38.24932861328125, -44.96403503417969, 105.61041259765625, 166.2703399658203, -16.350738525390625, 74.90676879882812, 55.963958740234375, 159.9815673828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000367.npy"} +{"epoch": 0.5547996976568406, "step": 368, "batch_size": 64, "mean": 52.005489349365234, "std": 96.51107788085938, "min": -179.61697387695312, "p10": -76.64753570556641, "median": 53.74245262145996, "p90": 175.4600067138672, "max": 249.5919647216797, "pos_frac": 0.6875, "sample": [-179.61697387695312, 145.44688415527344, 7.325294494628906, 69.82015228271484, 68.8785400390625, 173.91732788085938, 160.57455444335938, 75.01595306396484, -21.959693908691406, -31.220230102539062, 165.40818786621094, -4.115413665771484, -19.34771728515625, 60.6668701171875, 180.17120361328125, 128.13357543945312, -23.260528564453125, -94.33317565917969, -90.2548828125, 105.97696685791016, 16.093643188476562, 60.900177001953125, 85.07599639892578, -49.827308654785156, 176.12115478515625, 35.6755256652832, -121.41276550292969, 187.79136657714844, 249.5919647216797, 49.65639877319336, -43.08033752441406, 238.90609741210938, 41.753501892089844, -4.885311126708984, 91.010498046875, 133.56869506835938, 75.65554809570312, 79.58555603027344, 19.507591247558594, 122.65980529785156, 57.82850646972656, -26.56574821472168, 145.5850830078125, 33.6824951171875, -59.73652648925781, -1.8662185668945312, 22.712841033935547, -19.537628173828125, 170.43878173828125, -76.66065979003906, 204.9007110595703, 167.9344482421875, 203.79510498046875, 129.42752075195312, 27.51762580871582, 98.41554260253906, 45.608131408691406, -76.61691284179688, -80.62842559814453, 64.88810729980469, 33.964019775390625, 20.870851516723633, -163.58795166015625, 84.40719604492188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000368.npy"} +{"epoch": 0.5563114134542706, "step": 369, "batch_size": 64, "mean": 47.178794860839844, "std": 100.6832046508789, "min": -318.15283203125, "p10": -59.43927726745605, "median": 47.366525650024414, "p90": 173.07274932861327, "max": 256.09576416015625, "pos_frac": 0.703125, "sample": [57.42321014404297, -127.2616195678711, 54.317222595214844, 165.5302734375, -112.35293579101562, 144.538818359375, 46.142269134521484, -30.201087951660156, 145.10853576660156, 47.4779167175293, 29.75788116455078, 135.26239013671875, 163.74830627441406, 200.35821533203125, 168.1714324951172, 1.0996246337890625, 68.33015441894531, 99.47291564941406, 2.5717544555664062, 34.518428802490234, 1.81341552734375, -134.15655517578125, 34.46563720703125, -55.69674301147461, 51.219085693359375, 114.4180908203125, -36.33188247680664, 79.80430603027344, 59.29183578491211, -35.48717498779297, 132.07696533203125, -21.13409423828125, 54.07707977294922, 171.892578125, 67.36461639404297, 165.29916381835938, 175.01991271972656, 21.56072998046875, 70.42269897460938, -61.04322052001953, -51.541961669921875, 31.5819091796875, -13.006206512451172, 75.4756088256836, -10.95419692993164, 47.25513458251953, 256.09576416015625, 5.551719665527344, 172.96112060546875, -20.526100158691406, -3.4627227783203125, 50.22892761230469, 5.062664031982422, 180.1473388671875, -1.8700942993164062, -32.28539276123047, 1.7137279510498047, 183.3983154296875, 102.0849380493164, 211.06015014648438, -78.17671966552734, -318.15283203125, -95.20890045166016, 173.12059020996094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000369.npy"} +{"epoch": 0.5578231292517006, "step": 370, "batch_size": 64, "mean": 75.92509460449219, "std": 105.43343353271484, "min": -218.5922393798828, "p10": -46.42858276367187, "median": 67.62724304199219, "p90": 218.893588256836, "max": 271.065185546875, "pos_frac": 0.796875, "sample": [67.13845825195312, 267.9967956542969, 147.19952392578125, 32.545188903808594, 250.07064819335938, 1.733642578125, 248.43443298339844, -48.73785400390625, 226.50796508789062, 37.06020736694336, 16.614944458007812, 68.11602783203125, 0.232208251953125, 183.50790405273438, 271.065185546875, 104.96334838867188, 174.76844787597656, -120.54401397705078, -15.289291381835938, 137.9998779296875, 122.11801147460938, 102.24018859863281, 103.9794921875, 120.81657409667969, 189.3389129638672, 78.27963256835938, 104.38825988769531, 44.85053634643555, -7.667366027832031, -41.040283203125, -5.8600311279296875, 22.784385681152344, 17.363136291503906, -126.14224243164062, 57.26275634765625, 143.95608520507812, 250.22149658203125, 52.62071228027344, 170.67970275878906, 164.14053344726562, 230.52236938476562, -218.5922393798828, 54.916656494140625, 114.84846496582031, 32.66645050048828, 201.126708984375, 112.72505187988281, 149.22914123535156, 137.88218688964844, 183.33663940429688, -171.40501403808594, 21.352684020996094, 178.42782592773438, 1.786764144897461, -3.5364990234375, 26.727584838867188, 38.382781982421875, 2.9348602294921875, -51.228858947753906, -1.564920425415039, 24.738258361816406, 153.80728149414062, -49.50226974487305, 73.90986633300781], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000370.npy"} +{"epoch": 0.5593348450491308, "step": 371, "batch_size": 64, "mean": 61.35060119628906, "std": 91.29695892333984, "min": -124.50578308105469, "p10": -61.964240264892574, "median": 69.62979507446289, "p90": 159.26456756591801, "max": 303.2674255371094, "pos_frac": 0.75, "sample": [151.20028686523438, -124.50578308105469, 237.49481201171875, 66.8652114868164, 54.48817443847656, 31.013519287109375, 129.6437225341797, -0.5936031341552734, -4.379051208496094, 23.14771842956543, 31.29205322265625, 117.15039825439453, -58.9392204284668, 185.26510620117188, 27.102455139160156, 303.2674255371094, -19.43241310119629, 109.55194091796875, 26.493179321289062, 162.72068786621094, 33.13401794433594, -63.212005615234375, 6.666868209838867, 18.665512084960938, 111.03358459472656, 148.54312133789062, 78.87804412841797, 278.1282958984375, 126.24819946289062, 119.48222351074219, 149.54376220703125, 179.15359497070312, -43.725791931152344, 30.129653930664062, 91.70500946044922, 8.904144287109375, -30.720108032226562, -101.18450927734375, -43.17228698730469, 183.1473846435547, -91.87458038330078, 93.44068908691406, 72.42085266113281, 82.49678039550781, 73.55502319335938, -59.05278778076172, 100.9742202758789, 124.69388580322266, -92.85151672363281, 69.81887817382812, 130.33702087402344, -22.813262939453125, 19.196250915527344, 93.16142272949219, 136.47232055664062, 10.030746459960938, 144.28274536132812, 50.259742736816406, 111.28276062011719, 69.44071197509766, 138.5267791748047, 94.2679214477539, -86.3999252319336, -65.42381286621094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000371.npy"} +{"epoch": 0.5608465608465608, "step": 372, "batch_size": 64, "mean": 41.178436279296875, "std": 110.63504028320312, "min": -170.10702514648438, "p10": -118.35020599365232, "median": 22.132478713989258, "p90": 183.36427917480475, "max": 287.55078125, "pos_frac": 0.640625, "sample": [-4.7574615478515625, 39.726436614990234, 134.62673950195312, 91.55197143554688, 75.98536682128906, 112.61907196044922, 168.34100341796875, -41.977195739746094, 143.62310791015625, 250.7760009765625, 215.34835815429688, -11.547012329101562, 121.82929992675781, 262.67816162109375, 54.727256774902344, 133.99539184570312, 157.65231323242188, -170.10702514648438, 9.638160705566406, 73.32074737548828, 19.710102081298828, -3.414091110229492, 27.931793212890625, 211.61123657226562, 19.404376983642578, -35.61206817626953, -97.40048217773438, -4.765167236328125, 189.80282592773438, -95.57018280029297, 102.17766571044922, 40.584503173828125, -131.224853515625, -4.224742889404297, -127.32865905761719, 23.73041343688965, -159.793212890625, 12.083072662353516, 139.0419158935547, 14.614248275756836, 250.44708251953125, 92.95890808105469, 163.99822998046875, 84.71287536621094, -64.05055236816406, 89.24557495117188, -159.92575073242188, 14.82790756225586, 20.534543991088867, -11.779716491699219, 8.126411437988281, -162.07350158691406, 126.13540649414062, -3.9644241333007812, -26.225732803344727, -46.0689697265625, -142.7947998046875, 73.26940155029297, 153.39877319335938, 287.55078125, -63.92164993286133, -41.65470886230469, 5.030754089355469, 28.233734130859375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000372.npy"} +{"epoch": 0.562358276643991, "step": 373, "batch_size": 64, "mean": 69.060302734375, "std": 135.63816833496094, "min": -179.22723388671875, "p10": -97.66248703002927, "median": 46.906599044799805, "p90": 233.50451812744157, "max": 530.4349365234375, "pos_frac": 0.703125, "sample": [-140.9407196044922, 165.41787719726562, 96.0849609375, 314.37200927734375, 133.68296813964844, -122.9616470336914, -69.31793975830078, 121.86062622070312, 80.8549575805664, 43.64784240722656, 135.6889190673828, -147.87301635742188, 11.006118774414062, 357.7896728515625, -109.81015014648438, -15.40854263305664, 74.55953216552734, 119.17081451416016, -179.22723388671875, -66.45222473144531, 40.396183013916016, -9.082939147949219, 79.0159683227539, -111.04312896728516, 170.03677368164062, 158.0185089111328, -18.435653686523438, -15.880950927734375, 86.27108001708984, 114.53865814208984, 192.97506713867188, 1.6022491455078125, 50.16535568237305, 186.373779296875, -62.91857147216797, -23.789569854736328, 19.732269287109375, 530.4349365234375, -17.02039337158203, 151.1761932373047, 130.6088104248047, 26.158935546875, -117.42311096191406, -48.31875991821289, 67.51565551757812, 161.64736938476562, -48.900054931640625, 256.24853515625, 36.354591369628906, 51.01173400878906, 13.141525268554688, 250.87428283691406, 3.1171035766601562, 471.81732177734375, 37.638946533203125, 95.27314758300781, 55.15534591674805, 120.71617126464844, 30.14458465576172, 254.48329162597656, 33.02812194824219, -0.9321174621582031, 38.42852783203125, 177.3590545654297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000373.npy"} +{"epoch": 0.563869992441421, "step": 374, "batch_size": 64, "mean": 75.30964660644531, "std": 115.2933349609375, "min": -251.38201904296875, "p10": -38.282712554931635, "median": 74.5197639465332, "p90": 193.95281219482422, "max": 431.75201416015625, "pos_frac": 0.796875, "sample": [351.4707946777344, 179.98460388183594, 140.6583251953125, -18.444412231445312, 28.130420684814453, 130.06900024414062, 187.89920043945312, 194.37155151367188, 189.19668579101562, 70.28092193603516, 62.30133819580078, 27.680892944335938, 103.17048645019531, 431.75201416015625, 91.1212387084961, 55.30841064453125, 133.94088745117188, 1.1540069580078125, 28.199996948242188, -148.77069091796875, -3.2051010131835938, 125.14659118652344, 56.0501823425293, -118.90362548828125, 113.18374633789062, 154.47283935546875, 51.34891128540039, 215.67144775390625, -22.060455322265625, 117.51608276367188, 54.69648742675781, 94.38998413085938, 3.748077392578125, 46.11498260498047, 119.39472961425781, 64.80845642089844, -33.98402404785156, -220.0865020751953, 171.0743408203125, 87.16888427734375, 24.230663299560547, -104.13504028320312, 21.436498641967773, 286.65966796875, 223.98312377929688, 93.52823638916016, -251.38201904296875, 50.75592041015625, 78.75860595703125, 43.649574279785156, 109.87188720703125, 103.91702270507812, 192.9757537841797, 108.47911071777344, -6.162837982177734, 4.381978988647461, 82.55555725097656, 216.24862670898438, 115.57781982421875, 165.00320434570312, -40.12500762939453, -29.9678955078125, 54.76075744628906, -41.20576477050781], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000374.npy"} +{"epoch": 0.5653817082388511, "step": 375, "batch_size": 64, "mean": 53.097965240478516, "std": 109.03299713134766, "min": -218.4559326171875, "p10": -94.33198165893555, "median": 51.36066436767578, "p90": 177.35662841796878, "max": 336.4010925292969, "pos_frac": 0.6875, "sample": [3.1061935424804688, -82.06551361083984, 72.99974822998047, 124.30741882324219, 128.74026489257812, 37.737815856933594, 207.52264404296875, -10.452861785888672, 95.56051635742188, 132.46182250976562, -6.761482238769531, 155.5350799560547, -109.91947937011719, 146.77215576171875, 12.93234634399414, -16.134044647216797, 52.01551818847656, -106.10627746582031, 92.3788070678711, -8.927780151367188, 48.553001403808594, 197.35345458984375, 168.05062866210938, -199.02993774414062, 103.59310150146484, 180.34967041015625, 1.9479598999023438, -218.4559326171875, 25.040748596191406, 157.3283233642578, 35.567359924316406, -59.30358123779297, 32.10077667236328, -93.92591094970703, 50.705810546875, 124.35913848876953, 98.06816864013672, 47.54997253417969, 203.5053253173828, 112.33917236328125, 256.6194763183594, 71.00589752197266, 129.67724609375, -160.1510467529297, -40.05723571777344, 40.90727233886719, 210.3922119140625, -1.2734527587890625, 336.4010925292969, 96.73677062988281, 170.37286376953125, 117.24880981445312, -27.22565460205078, 82.25691223144531, -52.5081672668457, 60.460838317871094, 108.42945098876953, -127.63642883300781, -24.03624725341797, 150.20701599121094, -13.579090118408203, 139.72596740722656, 31.40106964111328, -94.50601196289062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000375.npy"} +{"epoch": 0.5668934240362812, "step": 376, "batch_size": 64, "mean": 72.9223403930664, "std": 127.04837036132812, "min": -212.17176818847656, "p10": -70.11426734924315, "median": 58.662086486816406, "p90": 207.93488159179688, "max": 469.0834045410156, "pos_frac": 0.640625, "sample": [44.459625244140625, -20.530006408691406, -10.143770217895508, 58.30131530761719, 43.92505645751953, -14.147533416748047, -2.7114486694335938, 136.0209197998047, -17.16900634765625, 54.48272705078125, 112.24960327148438, 101.19915771484375, -152.73471069335938, -5.824440002441406, -212.17176818847656, -0.6522598266601562, 209.69677734375, 86.99172973632812, -2.2233543395996094, 449.1023254394531, 86.91342163085938, 222.382080078125, -4.85125732421875, 141.7935333251953, 244.41590881347656, -3.484466552734375, 151.85678100585938, 162.0815887451172, 146.42022705078125, 118.96039581298828, 159.89735412597656, 85.05807495117188, 203.82379150390625, 81.63475036621094, 46.97351837158203, 210.76609802246094, 469.0834045410156, -58.24140548706055, 189.02467346191406, -75.20263671875, 59.022857666015625, -79.41754913330078, 197.13827514648438, 87.52813720703125, 123.00210571289062, 318.4186706542969, -94.55902862548828, 134.120849609375, -50.33160400390625, 153.52386474609375, -210.82138061523438, 28.12181854248047, 50.63390350341797, 47.46522903442383, 186.86492919921875, -1.964303970336914, 156.04112243652344, -25.552608489990234, 176.22801208496094, 97.57675170898438, 0.1834564208984375, -35.216033935546875, -3.729166030883789, -84.67543029785156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000376.npy"} +{"epoch": 0.5684051398337112, "step": 377, "batch_size": 64, "mean": 50.09230422973633, "std": 88.6153793334961, "min": -154.5297393798828, "p10": -46.45175704956054, "median": 40.86392021179199, "p90": 163.3740661621094, "max": 223.81295776367188, "pos_frac": 0.703125, "sample": [132.49598693847656, -49.47258758544922, 142.72218322753906, 132.37918090820312, 91.23857116699219, 19.88080596923828, -140.16064453125, -102.78167724609375, 36.950843811035156, 24.49420928955078, 140.4852294921875, 16.605369567871094, 111.67578887939453, 52.07867431640625, 125.27517700195312, 117.03679656982422, -84.6378402709961, 84.0643081665039, -32.670501708984375, -79.30940246582031, 195.2440643310547, 0.6596603393554688, 44.77699661254883, 68.92394256591797, 128.49652099609375, -141.28305053710938, 34.53266143798828, 223.81295776367188, 48.24666213989258, -26.486021041870117, 161.6351318359375, -19.768831253051758, 130.52488708496094, -4.94757080078125, -39.40315246582031, -24.317312240600586, 139.75210571289062, 29.551429748535156, 169.1455078125, 35.34197998046875, -12.857200622558594, 87.6227035522461, 27.53568458557129, 57.50971221923828, 4.354736328125, 197.7167510986328, 85.77339172363281, -8.814029693603516, -36.41215515136719, 87.75611877441406, 164.11932373046875, 5.809967041015625, -27.072168350219727, 173.1848602294922, 4.9473419189453125, -16.567834854125977, 161.06752014160156, 129.49716186523438, -154.5297393798828, 36.070838928222656, 107.72770690917969, -38.975196838378906, 166.3262481689453, 111.32659912109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000377.npy"} +{"epoch": 0.5699168556311414, "step": 378, "batch_size": 64, "mean": 54.244998931884766, "std": 104.53541564941406, "min": -165.88919067382812, "p10": -31.108745765686034, "median": 31.607669830322266, "p90": 179.49550933837892, "max": 335.20123291015625, "pos_frac": 0.71875, "sample": [134.22653198242188, 98.1069564819336, 4.941843032836914, 9.253120422363281, 129.33172607421875, 155.70343017578125, -61.58357238769531, 24.675128936767578, 48.76039123535156, 179.8614044189453, -165.88919067382812, 16.032285690307617, 276.37066650390625, -14.916996002197266, 181.61595153808594, 101.47455596923828, 54.21046447753906, 16.314132690429688, 178.64175415039062, -17.422069549560547, 74.59051513671875, 0.5025634765625, 12.78377914428711, 73.93563079833984, 159.66195678710938, -31.974853515625, 37.642127990722656, 325.1232604980469, -140.5475311279297, 9.479816436767578, 92.83259582519531, 28.77324676513672, -106.70439147949219, 69.54461669921875, 135.3758544921875, 34.44209289550781, -5.8831634521484375, 20.77088165283203, 107.30382537841797, -23.371171951293945, 335.20123291015625, 115.53407287597656, 3.722015380859375, 97.026123046875, 60.985504150390625, 119.30868530273438, -1.8234138488769531, 172.66070556640625, 58.972572326660156, 24.336210250854492, 17.86220932006836, -9.621566772460938, -4.048419952392578, -139.6641845703125, -26.569011688232422, 94.38096618652344, -29.087827682495117, 285.6760559082031, 11.547266006469727, 48.76274871826172, -20.310810089111328, -164.7877960205078, -14.743547439575195, 212.36984252929688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000378.npy"} +{"epoch": 0.5714285714285714, "step": 379, "batch_size": 64, "mean": 69.2255859375, "std": 109.35449981689453, "min": -201.06561279296875, "p10": -44.36699867248535, "median": 63.60552215576172, "p90": 173.68819885253907, "max": 430.39837646484375, "pos_frac": 0.765625, "sample": [104.34333801269531, -52.488800048828125, 148.29571533203125, -4.30084228515625, 97.82275390625, 92.70124816894531, 47.707298278808594, -128.97158813476562, 72.1431884765625, 8.913238525390625, 24.21575164794922, 72.88554382324219, -41.461151123046875, 116.54924011230469, -9.296180725097656, 326.11163330078125, -38.831451416015625, -45.612361907958984, 165.85601806640625, 170.3488311767578, 430.39837646484375, 141.96026611328125, 188.92416381835938, 172.165771484375, 121.23440551757812, -201.06561279296875, 125.71868133544922, -72.79600524902344, 81.72028350830078, -96.12307739257812, 266.99163818359375, 65.05526733398438, 19.636062622070312, 20.067943572998047, -111.08621215820312, 174.34066772460938, 148.26220703125, 28.439605712890625, 24.9244384765625, -10.065654754638672, -31.778785705566406, 26.170761108398438, 93.92761993408203, 167.92877197265625, 168.58331298828125, -4.277561187744141, 269.72039794921875, 314.4818420410156, -2.750263214111328, 26.577072143554688, 121.04237365722656, 10.103271484375, 21.686752319335938, 67.94879150390625, 117.25160217285156, 5.183708190917969, 26.873428344726562, 79.3946533203125, 36.37540817260742, 11.103605270385742, 90.75391387939453, 62.15577697753906, 80.44374084472656, 25.902481079101562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000379.npy"} +{"epoch": 0.5729402872260015, "step": 380, "batch_size": 64, "mean": 72.96170043945312, "std": 95.56358337402344, "min": -186.48956298828125, "p10": -28.07123718261719, "median": 61.55964279174805, "p90": 210.65784759521486, "max": 280.0169372558594, "pos_frac": 0.796875, "sample": [42.190067291259766, 129.87612915039062, 64.43165588378906, 51.0045166015625, -58.51820373535156, -113.76358795166016, 155.30923461914062, 219.20053100585938, -2.5814170837402344, 165.97933959960938, 104.16291809082031, 52.07793045043945, -5.167850494384766, 86.45780944824219, 7.763416290283203, -27.935501098632812, 17.693599700927734, 69.46879577636719, 135.45404052734375, 175.59906005859375, 114.21354675292969, 55.322288513183594, 5.877738952636719, -186.48956298828125, 209.6774139404297, -14.626640319824219, 140.02401733398438, 50.238258361816406, -48.86924743652344, 35.446990966796875, 111.27958679199219, 220.82382202148438, 58.17359161376953, 54.39567947387695, 212.12741088867188, 10.018051147460938, 246.87350463867188, 125.4610366821289, -28.129409790039062, 21.87653350830078, 153.6356201171875, -142.48321533203125, 167.9669189453125, 211.07803344726562, 53.602874755859375, 105.88035583496094, -12.142341613769531, 77.54792785644531, -11.453399658203125, 152.82554626464844, 28.462631225585938, 3.7351207733154297, -104.79673767089844, 231.70846557617188, 100.43498229980469, 111.78019714355469, 13.243776321411133, 280.0169372558594, 145.38442993164062, 167.58111572265625, 74.2037124633789, 58.68762969970703, 92.10469818115234, 48.12677001953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000380.npy"} +{"epoch": 0.5744520030234316, "step": 381, "batch_size": 64, "mean": 38.12488555908203, "std": 125.89793395996094, "min": -361.9024658203125, "p10": -63.16847305297851, "median": 16.182790756225586, "p90": 171.14503784179686, "max": 627.2718505859375, "pos_frac": 0.640625, "sample": [13.520233154296875, 278.678955078125, -5.108453750610352, -118.12884521484375, -213.4538116455078, 16.597164154052734, 1.1351547241210938, 32.218868255615234, 198.76025390625, 92.79917907714844, 82.64342498779297, -3.610645294189453, 15.768417358398438, 43.57643508911133, 42.42662048339844, 627.2718505859375, 98.68624877929688, -9.515636444091797, 26.432403564453125, -21.86504364013672, 15.469999313354492, -68.2848129272461, 7.534666061401367, 202.00157165527344, -94.03990936279297, 123.85114288330078, -42.37959289550781, -50.479713439941406, 109.69412231445312, 0.08322334289550781, 61.35340118408203, 145.518798828125, 9.625572204589844, -13.162704467773438, -14.846904754638672, 159.52804565429688, 28.267711639404297, -11.977703094482422, -149.8519744873047, 53.389739990234375, 171.43624877929688, -31.996917724609375, -4.136878967285156, 100.28327941894531, 37.87378692626953, 206.07603454589844, 109.76172637939453, 53.54175567626953, -3.845388412475586, 39.422080993652344, 6.466064453125, 100.29054260253906, 202.44789123535156, -63.20869445800781, 36.108272552490234, -18.944869995117188, 122.14019775390625, -361.9024658203125, -63.074623107910156, 34.43293762207031, 15.01055908203125, -46.84545135498047, -41.936317443847656, 170.46554565429688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000381.npy"} +{"epoch": 0.5759637188208617, "step": 382, "batch_size": 64, "mean": 55.44483947753906, "std": 90.61003875732422, "min": -146.0802764892578, "p10": -50.88208808898925, "median": 34.24100875854492, "p90": 180.9260864257813, "max": 305.4026184082031, "pos_frac": 0.75, "sample": [-60.05653381347656, 22.44437599182129, 238.896484375, 66.91658782958984, 185.5452880859375, -10.731231689453125, 185.8148956298828, 17.690765380859375, 253.78826904296875, 78.7229995727539, 67.86369323730469, 5.5800323486328125, -22.083038330078125, 32.973114013671875, 141.5967559814453, 76.13471984863281, -67.27641296386719, 30.59112548828125, -146.0802764892578, 142.89865112304688, 0.9738826751708984, 34.39848327636719, -2.5406951904296875, 2.220684051513672, -53.07994079589844, 20.432220458984375, 58.98890686035156, 55.36180114746094, -69.12569427490234, 218.4444580078125, 90.39108276367188, 48.91110610961914, -58.54702377319336, -75.67929077148438, 169.49037170410156, 3.502288818359375, -45.75376510620117, 21.194046020507812, 56.38128662109375, 140.99578857421875, 71.14060974121094, 262.2197265625, -33.36671447753906, 57.31141662597656, 8.78955078125, 153.33157348632812, -32.7579460144043, -23.48754119873047, 30.12432861328125, 14.665046691894531, 86.71145629882812, 132.86883544921875, -27.540592193603516, 49.4721565246582, 170.14794921875, -0.9202880859375, 32.895538330078125, 23.048160552978516, 157.48553466796875, 305.4026184082031, 98.76008605957031, 65.73701477050781, 54.15751266479492, 34.083534240722656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000382.npy"} +{"epoch": 0.5774754346182918, "step": 383, "batch_size": 64, "mean": 50.891353607177734, "std": 101.86714172363281, "min": -164.24099731445312, "p10": -71.97522430419922, "median": 36.200260162353516, "p90": 180.83715362548833, "max": 318.7460632324219, "pos_frac": 0.6875, "sample": [12.139593124389648, 148.85614013671875, 125.03314208984375, 28.035140991210938, 22.14440155029297, -64.81739044189453, 134.27862548828125, 35.634490966796875, 19.34075164794922, -95.94367218017578, 21.196842193603516, -102.82881164550781, 102.047119140625, 79.06916809082031, 133.658935546875, 74.23091125488281, 184.80514526367188, -79.09705352783203, 205.22705078125, 140.32640075683594, 95.37832641601562, -39.4730224609375, 217.0809783935547, -24.188018798828125, 156.48036193847656, -40.951995849609375, 48.07035827636719, -45.76206970214844, 36.766029357910156, 318.7460632324219, 121.31592559814453, -32.752410888671875, -72.00618743896484, 221.77566528320312, -34.37986373901367, 117.87222290039062, 121.19670867919922, -164.24099731445312, 6.2222137451171875, 60.759521484375, -71.9029769897461, 15.677703857421875, 40.94136047363281, 15.476961135864258, 65.48136901855469, -100.61865234375, 24.47089385986328, 64.74017333984375, -58.03904724121094, 254.39962768554688, 18.99281883239746, 218.3382568359375, 90.38196563720703, 26.312973022460938, -27.213417053222656, 166.47384643554688, 97.59375, -4.62445068359375, -16.295822143554688, -146.79916381835938, 166.87649536132812, -49.82073974609375, 171.57850646972656, 103.37742614746094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000383.npy"} +{"epoch": 0.5789871504157218, "step": 384, "batch_size": 64, "mean": 60.551448822021484, "std": 77.00213623046875, "min": -116.49722290039062, "p10": -35.35931777954101, "median": 62.43264198303223, "p90": 166.41475524902347, "max": 231.29763793945312, "pos_frac": 0.796875, "sample": [-48.17631530761719, 176.25714111328125, 54.608070373535156, 122.42575073242188, 5.987232208251953, 74.59818267822266, 107.67011260986328, 55.111976623535156, 156.61349487304688, -13.29464340209961, 105.64089965820312, 72.61909484863281, 51.225433349609375, 16.561668395996094, -20.035072326660156, 58.85699462890625, 20.761268615722656, 218.18984985351562, 7.645263671875, 173.15798950195312, -29.61603546142578, 88.98468017578125, 35.79888153076172, -37.82072448730469, -101.47418212890625, 72.72750854492188, 193.62265014648438, 123.19892120361328, -75.73857116699219, 142.10169982910156, 170.61529541015625, 125.622314453125, 11.370437622070312, 64.14495086669922, 52.890769958496094, 7.875762939453125, 196.87802124023438, 31.47732162475586, -116.49722290039062, 142.44033813476562, 34.810028076171875, 75.82194519042969, 117.7069320678711, -87.15201568603516, 231.29763793945312, 68.20425415039062, 61.80484390258789, 8.892385482788086, 20.01015853881836, 99.70267486572266, 116.96884155273438, 16.833343505859375, 73.72805786132812, 71.03654479980469, 36.70110321044922, -0.7797431945800781, -55.43512725830078, 106.32290649414062, -3.0193939208984375, -6.508472442626953, 152.15859985351562, 114.05257415771484, 63.06044006347656, 64.04711151123047], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000384.npy"} +{"epoch": 0.5804988662131519, "step": 385, "batch_size": 64, "mean": 69.76329040527344, "std": 88.96564483642578, "min": -93.51473236083984, "p10": -43.06542434692382, "median": 65.71475219726562, "p90": 193.04446868896488, "max": 243.9475860595703, "pos_frac": 0.75, "sample": [-93.51473236083984, -11.476551055908203, 142.61837768554688, 78.34814453125, 64.74980163574219, -7.653839111328125, 116.20089721679688, 121.53399658203125, 66.56288146972656, 37.98045349121094, -61.0145378112793, 53.767608642578125, 80.45555877685547, 124.46275329589844, 28.673057556152344, 142.7261962890625, 165.62417602539062, 126.15068054199219, 26.167335510253906, 51.2303352355957, -26.387222290039062, 3.8087615966796875, 7.5856475830078125, 186.66793823242188, 183.64651489257812, 243.9475860595703, 16.456714630126953, 104.73692321777344, 27.44213104248047, 131.2517852783203, 82.65199279785156, -88.38214111328125, 124.13720703125, 1.0167083740234375, 64.86662292480469, 225.0853729248047, 134.62005615234375, 91.9735107421875, 88.80899047851562, 150.37681579589844, -36.47988510131836, -58.380592346191406, 107.166259765625, 101.81704711914062, 8.485710144042969, 235.74488830566406, 207.9986114501953, -19.452163696289062, -45.88779830932617, -25.686859130859375, -21.1339111328125, 56.739803314208984, -14.428573608398438, -80.55909729003906, 152.82281494140625, 119.71797943115234, 18.762413024902344, 162.19802856445312, -54.82305145263672, 225.43603515625, -12.269458770751953, 5.147071838378906, 195.7772674560547, 228.2335662841797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000385.npy"} +{"epoch": 0.582010582010582, "step": 386, "batch_size": 64, "mean": 58.50677490234375, "std": 115.51941680908203, "min": -166.21946716308594, "p10": -60.38720512390136, "median": 58.8576774597168, "p90": 154.27886352539065, "max": 561.378662109375, "pos_frac": 0.71875, "sample": [561.378662109375, -72.6329345703125, -31.395915985107422, 30.481657028198242, 72.12318420410156, -6.22113037109375, -0.5137252807617188, 102.97567749023438, 60.16043472290039, 82.10636138916016, 22.505538940429688, 62.36085510253906, -133.71424865722656, 20.137733459472656, 45.64677429199219, 103.72151947021484, 74.05856323242188, 123.49749755859375, 126.02550506591797, 102.44905090332031, 19.96278953552246, 89.56136322021484, 96.5456314086914, -80.64706420898438, -35.519920349121094, 148.98452758789062, 68.80064392089844, -28.02362060546875, 96.30974578857422, 93.72321319580078, 54.92169952392578, 156.30999755859375, -49.70732879638672, 126.03173828125, -62.491756439208984, -12.74993896484375, 72.95354461669922, 56.14654541015625, 89.99906921386719, 198.54336547851562, 45.288604736328125, -55.476585388183594, 35.19084930419922, 185.01080322265625, 177.50283813476562, -41.4047737121582, -73.52777099609375, 57.862037658691406, 11.324064254760742, 225.49606323242188, 7.441841125488281, -37.19685363769531, -164.46426391601562, 450.7611083984375, 149.53955078125, 60.808448791503906, 45.10118103027344, 100.8485107421875, 78.57225036621094, 146.4854736328125, 59.85331726074219, -34.818111419677734, 35.64929962158203, -166.21946716308594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000386.npy"} +{"epoch": 0.5835222978080121, "step": 387, "batch_size": 64, "mean": 67.58799743652344, "std": 80.73281860351562, "min": -100.59547424316406, "p10": -25.39673500061035, "median": 53.72576141357422, "p90": 169.2447265625, "max": 291.76837158203125, "pos_frac": 0.78125, "sample": [101.39556884765625, -1.1271209716796875, 134.40594482421875, -35.083778381347656, -3.9304656982421875, 246.1554412841797, 47.83360290527344, 14.12936019897461, 47.0740966796875, 291.76837158203125, 13.476167678833008, -12.388559341430664, 47.171783447265625, 84.50611877441406, 177.2765655517578, 17.510215759277344, 59.04557800292969, 119.75105285644531, 146.30218505859375, 56.118377685546875, 101.12664794921875, 171.1195068359375, 164.8702392578125, -22.673946380615234, 5.270927429199219, 102.19619750976562, -21.62932586669922, 51.33314514160156, 131.25723266601562, 70.79996490478516, 4.560504913330078, -26.563644409179688, 38.711341857910156, 164.09829711914062, -29.75140380859375, 115.9177017211914, 64.53628540039062, 14.31505012512207, 48.444271087646484, -32.59883117675781, -11.553377151489258, 74.78080749511719, 93.1117172241211, -100.59547424316406, -58.569976806640625, 72.72557067871094, 160.7720489501953, 225.99514770507812, 29.940656661987305, 145.38510131835938, -16.533565521240234, 50.30763244628906, 50.413055419921875, 34.59834289550781, 65.08914184570312, 80.76222229003906, -64.0575180053711, 12.187482833862305, 74.58775329589844, 211.3520965576172, 222.939697265625, 92.95226287841797, 48.99761962890625, 163.31268310546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000387.npy"} +{"epoch": 0.5850340136054422, "step": 388, "batch_size": 64, "mean": 49.538291931152344, "std": 97.24677276611328, "min": -158.2348175048828, "p10": -55.16930694580077, "median": 26.752971649169922, "p90": 169.08857421875, "max": 259.6390380859375, "pos_frac": 0.71875, "sample": [138.1577911376953, 160.4817657470703, 89.73190307617188, 216.83824157714844, 21.80626678466797, 247.81504821777344, 41.52803039550781, 59.48869323730469, 26.860923767089844, -32.01270294189453, 52.19165802001953, 125.08025360107422, -27.087860107421875, 94.618896484375, -10.3486328125, 259.6390380859375, -10.450931549072266, 245.9772491455078, 16.02410888671875, 153.32839965820312, 126.71826171875, -20.615768432617188, 168.81007385253906, 29.089004516601562, 167.64007568359375, -137.18521118164062, 118.5726318359375, 87.95292663574219, 5.453699111938477, 19.916841506958008, -18.4097843170166, -17.86060905456543, 3.1133804321289062, 154.78050231933594, -25.390920639038086, 58.780426025390625, 90.15408325195312, -25.6622314453125, 45.96379089355469, -44.91581726074219, 58.16179656982422, 246.62692260742188, -158.2348175048828, 141.9893341064453, 169.2079315185547, 9.321273803710938, 4.923393249511719, 1.5764923095703125, 26.64501953125, 181.68051147460938, 7.389274597167969, 157.69692993164062, -20.946456909179688, 14.789810180664062, -129.10003662109375, -59.56365966796875, -107.17430114746094, 63.70361328125, 3.432201385498047, 5.2309417724609375, 74.90223693847656, -97.72245025634766, -91.63581848144531, 10.977235794067383], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000388.npy"} +{"epoch": 0.5865457294028723, "step": 389, "batch_size": 64, "mean": 57.674339294433594, "std": 91.62287902832031, "min": -174.3614044189453, "p10": -47.855413055419916, "median": 51.89120864868164, "p90": 173.6033935546875, "max": 269.1260986328125, "pos_frac": 0.703125, "sample": [-53.748626708984375, -9.483182907104492, 111.12252807617188, 45.596343994140625, 114.53623962402344, 72.01104736328125, 88.44215393066406, 138.70401000976562, 216.76907348632812, 97.76058959960938, -72.17568969726562, -77.87702941894531, 95.36198425292969, 91.21160125732422, 49.50556945800781, 132.29281616210938, 66.188720703125, 127.38268280029297, 59.875213623046875, 16.454463958740234, 154.2469940185547, 142.8577880859375, -0.04883766174316406, 63.531707763671875, 104.6165771484375, -26.422164916992188, 181.89544677734375, -32.09032440185547, 45.07899475097656, 109.2085189819336, -36.88603210449219, 125.86297607421875, -39.192832946777344, 171.05230712890625, 174.69671630859375, 34.69384765625, 216.915283203125, 58.54718780517578, 33.99440383911133, -155.44442749023438, 178.94619750976562, -25.540409088134766, 37.141578674316406, -17.87105941772461, 52.67438507080078, -11.983306884765625, 167.49513244628906, -17.649410247802734, -174.3614044189453, 33.138397216796875, 17.49036407470703, 27.72148895263672, -106.30206298828125, -22.081966400146484, -51.56794738769531, 212.9288330078125, 170.57847595214844, 132.83038330078125, 51.1080322265625, 269.1260986328125, -17.7701358795166, 116.85543823242188, 15.351430892944336, 15.85470199584961], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000389.npy"} +{"epoch": 0.5880574452003023, "step": 390, "batch_size": 64, "mean": 52.69813919067383, "std": 77.28421783447266, "min": -140.32748413085938, "p10": -35.81375045776367, "median": 34.81395721435547, "p90": 149.7066802978516, "max": 243.4688720703125, "pos_frac": 0.765625, "sample": [-17.995864868164062, 42.866661071777344, 67.05355072021484, 27.823471069335938, 119.94092559814453, 15.119758605957031, 97.57099914550781, 100.48464965820312, -39.39019012451172, 57.15363311767578, 10.346128463745117, -42.41807556152344, 92.05935668945312, 18.945737838745117, -1.5081558227539062, 243.4688720703125, 141.81494140625, -47.09767150878906, 106.48652648925781, -77.20621490478516, 90.76504516601562, 24.14592742919922, 13.163185119628906, 197.30422973632812, 143.50830078125, 133.62689208984375, 39.5791015625, -70.23556518554688, 4.269611358642578, 152.36312866210938, -36.236236572265625, 30.048812866210938, 15.229631423950195, 126.1378402709961, -26.867626190185547, -34.82794952392578, -140.32748413085938, 130.91357421875, 187.37966918945312, 0.4020500183105469, 165.7373046875, 8.005411148071289, -7.088348388671875, 40.4534912109375, -2.5043296813964844, 98.79834747314453, 17.054229736328125, 25.006614685058594, 196.77804565429688, 18.62957000732422, 133.5625, 109.20036315917969, 12.456817626953125, 79.20403289794922, 10.6435546875, 69.68304443359375, 9.975740432739258, -33.27677917480469, 121.9373779296875, -29.255353927612305, 88.14501953125, 65.72412109375, 76.75568389892578, 201.1932373046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000390.npy"} +{"epoch": 0.5895691609977324, "step": 391, "batch_size": 64, "mean": 62.50682830810547, "std": 103.98819732666016, "min": -162.985595703125, "p10": -58.43759002685547, "median": 61.61712646484375, "p90": 172.47973937988283, "max": 448.82403564453125, "pos_frac": 0.734375, "sample": [-1.0029449462890625, -159.70436096191406, 169.54812622070312, 42.784210205078125, 32.39556121826172, 151.4683837890625, -12.89157485961914, 167.70687866210938, 160.7542724609375, 285.87994384765625, 68.13133239746094, -110.2559814453125, 114.22653198242188, -4.1146392822265625, 114.46461486816406, 203.71881103515625, -82.9887466430664, 72.99812316894531, -59.209686279296875, 17.15776824951172, -1.1590042114257812, 130.9241943359375, 25.65985107421875, 39.38451385498047, 14.892974853515625, -56.63603210449219, -119.62930297851562, -36.33363342285156, 52.81309509277344, -32.51927947998047, 448.82403564453125, 127.28451538085938, 70.4880142211914, -30.95012664794922, 64.19671630859375, 9.630378723144531, 3.232576370239258, 87.74429321289062, 102.05183410644531, 149.7947998046875, 115.0110092163086, 153.8258819580078, -162.985595703125, -72.18911743164062, 67.87873077392578, 69.92869567871094, 196.55856323242188, 49.015724182128906, 138.28097534179688, 180.8651123046875, 57.490478515625, 7.5334014892578125, 173.73614501953125, 119.45453643798828, 213.3564453125, 148.40390014648438, 91.69325256347656, 59.03753662109375, -29.501747131347656, 30.060752868652344, 31.384567260742188, 76.14598083496094, 87.83287811279297, -23.1424560546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000391.npy"} +{"epoch": 0.5910808767951625, "step": 392, "batch_size": 64, "mean": 72.25267028808594, "std": 109.33184814453125, "min": -195.166748046875, "p10": -37.53371658325195, "median": 58.491878509521484, "p90": 205.07226104736327, "max": 445.18896484375, "pos_frac": 0.78125, "sample": [-140.53829956054688, 6.573886871337891, 51.40557098388672, 41.368072509765625, 51.3797607421875, -36.618614196777344, 156.29006958007812, -55.006019592285156, 176.12060546875, 128.6418914794922, 35.5020751953125, -195.166748046875, 101.9406967163086, 155.93795776367188, 247.0848388671875, 76.69129943847656, -6.9674835205078125, 120.43515014648438, 205.2183380126953, 98.54541015625, 62.114097595214844, -10.42474365234375, 33.233070373535156, -108.90229034423828, 117.7796630859375, 134.24298095703125, 183.39395141601562, 68.7661361694336, 80.73736572265625, -105.46791076660156, 49.895660400390625, 343.34942626953125, 138.2914276123047, -17.565643310546875, -37.9259033203125, 75.69874572753906, 49.10608673095703, 78.20407104492188, 95.757568359375, 138.00003051757812, 230.8497314453125, 204.73141479492188, -66.2538070678711, 177.22894287109375, 0.04006195068359375, 9.386482238769531, 33.87232971191406, 4.869483947753906, 211.3623046875, 2.0550689697265625, -7.919105529785156, 445.18896484375, 3.228900909423828, 111.5003662109375, -8.911529541015625, 12.928689956665039, 280.0709228515625, -2.96173095703125, 32.59869384765625, 118.2198486328125, 15.338457107543945, 96.60027313232422, 54.869659423828125, 78.15396118164062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000392.npy"} +{"epoch": 0.5925925925925926, "step": 393, "batch_size": 64, "mean": 70.35931396484375, "std": 81.43692016601562, "min": -69.46089172363281, "p10": -23.295351409912108, "median": 64.21770095825195, "p90": 179.74535675048833, "max": 291.8067626953125, "pos_frac": 0.765625, "sample": [105.25526428222656, 4.255170822143555, 62.05305480957031, 157.28794860839844, 169.482421875, 90.09003448486328, 31.701847076416016, 67.04218292236328, 66.3823471069336, 51.89828872680664, 7.275384902954102, 12.696388244628906, 193.87408447265625, 137.8638916015625, 68.50379180908203, -55.786773681640625, -20.94812774658203, 183.3705291748047, -3.0091323852539062, -3.7245140075683594, -22.5084228515625, 34.32743835449219, 141.22360229492188, 191.8636474609375, 126.95584106445312, 151.97325134277344, -16.835939407348633, 171.28662109375, 118.53962707519531, 127.76969909667969, 84.19256591796875, -61.889408111572266, 238.13975524902344, 184.63327026367188, -4.076417922973633, 71.08819580078125, -49.690643310546875, 200.52853393554688, -24.359405517578125, -2.352903366088867, 83.8499755859375, 11.762313842773438, 138.91073608398438, -36.9983024597168, 48.84428405761719, 153.91836547851562, 112.06631469726562, -22.365707397460938, 84.86982727050781, 98.06124877929688, 167.4751739501953, 291.8067626953125, 54.991119384765625, -23.632606506347656, 37.27186584472656, 167.50204467773438, 45.23759460449219, 17.768905639648438, -69.46089172363281, 6.8074798583984375, 36.067420959472656, 80.65342712402344, 25.156570434570312, 6.0590057373046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000393.npy"} +{"epoch": 0.5941043083900227, "step": 394, "batch_size": 64, "mean": 51.482940673828125, "std": 78.84032440185547, "min": -155.28562927246094, "p10": -59.68370018005369, "median": 57.62960624694824, "p90": 145.90088043212896, "max": 207.1713104248047, "pos_frac": 0.796875, "sample": [12.057327270507812, 207.1713104248047, 196.4855194091797, 12.265838623046875, 11.044082641601562, 32.555877685546875, -66.72811889648438, 39.51307678222656, 201.590087890625, 50.79127502441406, 124.3448486328125, 111.66807556152344, 26.32174301147461, 57.625247955322266, -43.24672317504883, 34.80707550048828, -18.97039031982422, -36.43744659423828, -79.13351440429688, 34.7975959777832, 110.41157531738281, -28.742542266845703, 101.58970642089844, 20.368057250976562, 58.61219024658203, 185.67047119140625, 151.77294921875, 68.58908081054688, 121.59589385986328, 19.99799346923828, -155.28562927246094, 47.20558166503906, 132.1993865966797, 75.21812438964844, 29.972898483276367, 86.85910034179688, -7.929441452026367, 26.76172637939453, 79.80023193359375, 3.6966514587402344, 72.11973571777344, -92.92289733886719, -98.3651123046875, 86.64863586425781, 78.84984588623047, 202.159423828125, -134.8950653076172, 114.35267639160156, -4.829353332519531, 70.32125854492188, -74.92288208007812, 76.13587188720703, 57.63396453857422, 109.30999755859375, 61.61167907714844, 101.6539535522461, 75.9703140258789, 129.24880981445312, 32.9429931640625, 34.3043212890625, 29.461639404296875, 156.361328125, 66.8156509399414, 108.05445098876953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000394.npy"} +{"epoch": 0.5956160241874527, "step": 395, "batch_size": 64, "mean": 55.06922149658203, "std": 84.29630279541016, "min": -105.12273406982422, "p10": -40.74346008300781, "median": 35.72089767456055, "p90": 157.52329254150393, "max": 309.1462707519531, "pos_frac": 0.734375, "sample": [102.9903335571289, 160.0025634765625, 153.6959686279297, -1.2713394165039062, 143.26852416992188, 86.94642639160156, -4.199323654174805, 91.1041488647461, 186.64212036132812, -14.839433670043945, 31.481903076171875, 108.43890380859375, 140.5908660888672, 159.16357421875, 53.22710418701172, 167.3480682373047, -5.3611297607421875, 77.56126403808594, 105.0887451171875, 15.566635131835938, -9.311698913574219, 39.49639129638672, -13.059722900390625, 57.3488883972168, 119.20512390136719, -105.12273406982422, 58.259239196777344, 101.72624969482422, 51.67086410522461, -11.966438293457031, 85.02702331542969, 309.1462707519531, 27.945114135742188, 63.04351043701172, -58.08149719238281, 45.23924255371094, -29.19961166381836, 19.93942642211914, 109.1310806274414, -43.756103515625, 8.763519287109375, 7.713407516479492, 146.79527282714844, 30.897640228271484, 11.59091567993164, 83.94754028320312, -5.8071441650390625, 290.60516357421875, -42.02049255371094, 4.816007614135742, 4.7368316650390625, 265.5418701171875, -66.84970092773438, 11.166553497314453, 14.160903930664062, 141.20431518554688, -45.10091018676758, 31.945404052734375, 90.72921752929688, -82.43537139892578, 7.826618194580078, -37.76371765136719, 2.7283496856689453, 75.11157989501953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000395.npy"} +{"epoch": 0.5971277399848829, "step": 396, "batch_size": 64, "mean": 60.71634292602539, "std": 83.34130859375, "min": -129.95501708984375, "p10": -26.624969100952146, "median": 49.95807456970215, "p90": 168.65813751220705, "max": 312.22833251953125, "pos_frac": 0.765625, "sample": [157.95025634765625, 312.22833251953125, 165.83905029296875, 153.15249633789062, 192.06173706054688, 145.74928283691406, 49.297000885009766, 133.68812561035156, 19.01696014404297, 44.28314208984375, 56.326194763183594, 16.69476318359375, 98.85765838623047, -41.500736236572266, 55.70983123779297, -9.507640838623047, 180.44775390625, 56.637245178222656, 12.995647430419922, 78.65946960449219, 43.525848388671875, -37.04438781738281, 232.27960205078125, 54.90886688232422, -5.826728820800781, 50.61914825439453, 63.41250228881836, -129.95501708984375, 148.51808166503906, 82.4063720703125, 43.0378303527832, -105.53742980957031, 7.204051971435547, 157.79798889160156, 55.503265380859375, 60.57642364501953, 96.04154968261719, 70.23155212402344, 186.27157592773438, 162.85955810546875, -11.078533172607422, 35.862335205078125, 8.140277862548828, 94.27474975585938, 25.10120391845703, 59.683380126953125, 29.504188537597656, 42.60100555419922, 86.97396850585938, -23.17272186279297, -13.184333801269531, -28.104503631591797, 48.526222229003906, -7.159980773925781, -16.028461456298828, 150.142822265625, 45.948768615722656, 169.86631774902344, 9.713298797607422, 37.24507141113281, -32.05235290527344, -115.92082214355469, -12.993568420410156, 186.5404815673828], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000396.npy"} +{"epoch": 0.5986394557823129, "step": 397, "batch_size": 64, "mean": 36.185546875, "std": 105.1511001586914, "min": -274.3332824707031, "p10": -80.57515869140624, "median": 28.571372985839844, "p90": 163.8723114013672, "max": 303.6517639160156, "pos_frac": 0.671875, "sample": [-0.22677040100097656, 13.596168518066406, -68.0137939453125, 44.592254638671875, -54.34868621826172, 67.24461364746094, 148.70904541015625, 24.863426208496094, 151.4954071044922, -13.937454223632812, 0.4950675964355469, 185.66258239746094, 161.91558837890625, 112.41302490234375, 61.936180114746094, 88.38249969482422, -176.33303833007812, -82.91748046875, 55.8129768371582, -13.276374816894531, 118.43244934082031, -10.802688598632812, 60.948455810546875, 109.9229736328125, 28.81678009033203, -234.83924865722656, -54.365570068359375, 26.029617309570312, 216.72537231445312, 62.79646301269531, -6.865089416503906, 26.49019432067871, 188.65310668945312, 35.935760498046875, 150.5395965576172, -75.1097412109375, 55.58259201049805, 164.71090698242188, 125.33344268798828, 16.867341995239258, 136.51980590820312, 186.45928955078125, 100.26912689208984, 23.6815185546875, -274.3332824707031, 35.513519287109375, 303.6517639160156, -97.09918212890625, -45.419708251953125, 28.02886962890625, 4.190044403076172, 61.88523864746094, -1.3386611938476562, 28.325965881347656, 18.501256942749023, -41.604515075683594, 58.16828155517578, 64.91771697998047, -17.133758544921875, 135.2680206298828, -152.7047882080078, -116.01848602294922, -29.66596031188965, 191.94485473632812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000397.npy"} +{"epoch": 0.600151171579743, "step": 398, "batch_size": 64, "mean": 75.77496337890625, "std": 96.86148834228516, "min": -78.70915222167969, "p10": -27.429409027099606, "median": 60.41123580932617, "p90": 178.92919158935547, "max": 450.96160888671875, "pos_frac": 0.765625, "sample": [108.62576293945312, 70.03750610351562, 126.77195739746094, 60.73515319824219, 176.3194122314453, 14.397518157958984, 210.5791473388672, 287.22662353515625, -34.80767822265625, 196.07394409179688, 158.54690551757812, 48.82395935058594, -1.3048858642578125, -13.652862548828125, 77.17500305175781, 19.574630737304688, 124.65763092041016, -14.214179992675781, 87.26004028320312, -78.70915222167969, 147.95506286621094, -6.864145278930664, 172.04782104492188, 273.449462890625, -10.852714538574219, 180.04766845703125, 109.51898956298828, 111.39092254638672, 25.67547035217285, 60.35126495361328, -32.00190353393555, 33.97669982910156, -22.862762451171875, 138.26727294921875, 20.122215270996094, -14.915472030639648, 74.13780212402344, 175.67283630371094, 28.704452514648438, 60.47120666503906, -33.76268005371094, -63.61418914794922, 127.95594024658203, 52.52461242675781, 116.96379089355469, 142.47222900390625, 19.555578231811523, 7.276420593261719, -9.06424331665039, 109.09956359863281, 18.28765869140625, 450.96160888671875, 36.917274475097656, 10.419120788574219, 102.93974304199219, -29.38654327392578, 267.6274108886719, -47.047508239746094, 94.26424407958984, 1.4948444366455078, 1.2348823547363281, 136.48097229003906, 174.23519897460938, 13.35313606262207], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000398.npy"} +{"epoch": 0.6016628873771731, "step": 399, "batch_size": 64, "mean": 87.00018310546875, "std": 109.84545135498047, "min": -98.27456665039062, "p10": -30.553049087524407, "median": 77.15658569335938, "p90": 198.9765594482422, "max": 553.904052734375, "pos_frac": 0.78125, "sample": [129.43202209472656, 45.33989715576172, 129.96292114257812, 168.26763916015625, 18.399887084960938, 229.74755859375, 124.48602294921875, 68.30016326904297, 5.614315032958984, 6.307380676269531, -96.69851684570312, 42.665924072265625, 95.04534912109375, -32.7505989074707, 54.96766662597656, 84.64643859863281, 85.6441650390625, 247.98080444335938, 103.43891906738281, 348.2355651855469, -98.27456665039062, -0.49505615234375, -83.40042114257812, 140.9057159423828, 292.0835266113281, 20.292892456054688, 27.960865020751953, 553.904052734375, 25.657310485839844, -15.243072509765625, 25.16240692138672, -3.622163772583008, 24.519454956054688, 21.672931671142578, 36.76054382324219, -25.432090759277344, 71.30335235595703, 181.60740661621094, 83.00981903076172, 200.20681762695312, 191.76068115234375, -6.54762077331543, 174.39610290527344, -35.144683837890625, 203.5633087158203, 119.49256134033203, 84.94596862792969, 85.83106994628906, -44.81611633300781, -5.301414489746094, 176.3872833251953, 121.39476013183594, 144.64956665039062, 149.62796020507812, 132.06094360351562, 165.65045166015625, 68.71115112304688, 196.10595703125, 39.01240539550781, -32.747745513916016, 191.04574584960938, -20.13605499267578, 93.13530731201172, 37.320945739746094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000399.npy"} +{"epoch": 0.6031746031746031, "step": 400, "batch_size": 64, "mean": 48.633819580078125, "std": 87.89905548095703, "min": -184.8043212890625, "p10": -47.30597839355468, "median": 30.63689422607422, "p90": 156.02863311767578, "max": 217.18038940429688, "pos_frac": 0.71875, "sample": [15.055309295654297, 81.51520538330078, 46.561737060546875, 159.7886199951172, 138.15330505371094, 136.7355194091797, 61.068748474121094, 102.94691467285156, 139.64138793945312, 154.7816619873047, 31.206851959228516, -43.98619079589844, 30.066936492919922, 6.796886444091797, 23.494792938232422, -34.86627197265625, -3.360198974609375, 156.56304931640625, 116.97052764892578, 144.42276000976562, 12.754669189453125, 23.890727996826172, 77.48443603515625, -6.909749984741211, -184.8043212890625, 140.2527618408203, -4.183099746704102, 27.829879760742188, 11.88730239868164, -15.870988845825195, -124.27651977539062, -5.97477912902832, 115.84358215332031, 17.542526245117188, 184.1542205810547, 10.821239471435547, -48.72874450683594, 29.18820571899414, 141.43930053710938, 133.42681884765625, 210.73199462890625, 111.01509857177734, -138.20321655273438, 138.13204956054688, 10.181785583496094, 33.295631408691406, -5.57403564453125, -54.747100830078125, 28.82556915283203, 217.18038940429688, 56.86592483520508, 117.11890411376953, -50.34690856933594, 36.050880432128906, -39.56550598144531, 207.18101501464844, 201.6497802734375, 46.690887451171875, 53.83318328857422, -17.82391357421875, -136.53228759765625, -13.415813446044922, 14.393562316894531, 86.30145263671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000400.npy"} +{"epoch": 0.6046863189720333, "step": 401, "batch_size": 64, "mean": 76.97929382324219, "std": 92.99199676513672, "min": -146.32977294921875, "p10": -23.577141952514644, "median": 50.12537956237793, "p90": 203.13128356933595, "max": 311.0163879394531, "pos_frac": 0.8125, "sample": [4.612510681152344, 130.1897430419922, 175.81192016601562, 99.92105102539062, -20.711925506591797, 0.15977096557617188, 44.964447021484375, 78.76387786865234, -3.7689857482910156, -11.621604919433594, 269.87469482421875, -9.556259155273438, -102.07012939453125, -27.699386596679688, 38.77948760986328, -24.805091857910156, 150.92333984375, 203.70904541015625, 151.3858642578125, 235.1083984375, 64.6628646850586, 48.59364700317383, 41.0546875, 88.82334899902344, -146.32977294921875, -59.35338592529297, 66.18550109863281, 92.53970336914062, 92.4329605102539, 51.65711212158203, 138.14370727539062, 46.45207977294922, 42.568389892578125, 207.63949584960938, -1.4416961669921875, 163.241943359375, 7.175924301147461, 150.52096557617188, 30.724214553833008, 201.78317260742188, 36.72516632080078, 184.08326721191406, 124.57162475585938, -25.01869010925293, 214.15487670898438, 11.166980743408203, 11.9832763671875, 275.1098937988281, 154.61468505859375, 191.38909912109375, 77.00995635986328, 0.7394313812255859, 19.699848175048828, 36.09510803222656, -29.056171417236328, 62.87126922607422, 82.82606506347656, 42.87806701660156, 172.7644500732422, 172.8242645263672, 47.139404296875, 2.5292396545410156, 37.51144790649414, 311.0163879394531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000401.npy"} +{"epoch": 0.6061980347694633, "step": 402, "batch_size": 64, "mean": 59.58970642089844, "std": 87.31135559082031, "min": -141.06005859375, "p10": -41.64195404052734, "median": 54.41984558105469, "p90": 166.26907348632812, "max": 268.3800048828125, "pos_frac": 0.765625, "sample": [109.62699127197266, 223.10418701171875, 115.02421569824219, 110.22750091552734, 88.00907897949219, 57.594688415527344, 239.93858337402344, 52.8040771484375, 69.25263977050781, 117.49158477783203, -28.715957641601562, 207.0050048828125, 56.85858917236328, -27.327362060546875, 83.36689758300781, 45.65633010864258, 7.713836669921875, 25.83323860168457, 40.094512939453125, 35.00474548339844, 167.09048461914062, 12.192485809326172, 15.354591369628906, -104.26066589355469, 203.48516845703125, -4.463954925537109, 86.1988296508789, 220.64187622070312, 101.30110168457031, 59.27763366699219, 268.3800048828125, 164.35244750976562, -5.0498504638671875, 81.25239562988281, 39.33119201660156, 53.50050354003906, 118.50139617919922, 128.0888214111328, 25.9176025390625, 74.81440734863281, -32.90406036376953, -98.25596618652344, -141.06005859375, 117.26985931396484, 48.00767517089844, 5.12263298034668, 55.33918762207031, -43.23246765136719, 35.517005920410156, -8.68075942993164, 118.6176528930664, 156.604248046875, 139.43785095214844, 150.19937133789062, -54.60877990722656, 3.4654483795166016, -20.120750427246094, -73.43283081054688, -108.08067321777344, 68.53378295898438, 33.10551071166992, 134.62844848632812, -37.930755615234375, 31.72962188720703], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000402.npy"} +{"epoch": 0.6077097505668935, "step": 403, "batch_size": 64, "mean": 73.47581481933594, "std": 97.6409912109375, "min": -126.70684814453125, "p10": -28.745986557006827, "median": 61.674293518066406, "p90": 176.9805679321289, "max": 478.244140625, "pos_frac": 0.8125, "sample": [-13.034805297851562, 114.3829345703125, 198.59652709960938, 36.154151916503906, 160.5117950439453, 44.16227722167969, 345.62200927734375, 103.96270751953125, 42.23432922363281, -4.8542327880859375, 152.9227294921875, -20.704208374023438, 145.72860717773438, -61.574832916259766, 142.4281463623047, 77.333251953125, 61.57752990722656, 23.099390029907227, 46.98388671875, 12.416845321655273, 5.67936897277832, -38.52326583862305, 478.244140625, 175.26800537109375, -18.587188720703125, 100.33253479003906, 197.60440063476562, 25.230928421020508, 206.7456817626953, 29.231910705566406, 5.035251617431641, 55.66888427734375, 61.77105712890625, 16.08203887939453, 71.56575775146484, -18.19124984741211, 15.000951766967773, 184.47291564941406, 54.50354766845703, 108.06301879882812, 24.1910400390625, 1.9031620025634766, 167.8323211669922, 5.976970672607422, 91.36768341064453, -35.82647705078125, 94.35423278808594, 130.64523315429688, 96.08062744140625, 75.539794921875, 132.040283203125, 177.7145233154297, -126.70684814453125, -32.19246292114258, 156.1014404296875, -112.73471069335938, 122.72722625732422, 66.46656036376953, 40.38987350463867, 150.05148315429688, -39.557769775390625, 21.59198760986328, 62.878395080566406, 108.4697265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000403.npy"} +{"epoch": 0.6092214663643235, "step": 404, "batch_size": 64, "mean": 75.45799255371094, "std": 102.14546966552734, "min": -118.96401977539062, "p10": -45.328915405273435, "median": 60.94463348388672, "p90": 208.5811935424805, "max": 289.9119873046875, "pos_frac": 0.703125, "sample": [-13.8924560546875, 52.19486999511719, 87.2956771850586, -10.250991821289062, 67.3202133178711, -16.284690856933594, 196.822265625, 26.70116424560547, -53.01805877685547, -7.6256866455078125, 190.68731689453125, -40.817901611328125, -118.96401977539062, -94.48635864257812, 2.702045440673828, 204.91748046875, -23.326011657714844, -13.690567016601562, 192.15533447265625, 216.39208984375, 126.97549438476562, 113.8742904663086, 145.43862915039062, 218.9739990234375, -30.868194580078125, 167.43882751464844, -50.813743591308594, 277.613037109375, 116.17483520507812, 157.30421447753906, 21.133270263671875, 94.26399230957031, -29.130460739135742, 152.5501708984375, -23.14647674560547, -9.029350280761719, 4.426582336425781, 156.08383178710938, 113.82962799072266, 0.6909217834472656, 22.611045837402344, -10.013702392578125, 147.93385314941406, 55.1705322265625, 155.3377227783203, 23.28006362915039, 253.41226196289062, 190.52706909179688, 210.0271453857422, 49.18988800048828, 7.725978851318359, 285.387451171875, 289.9119873046875, 66.71873474121094, 134.2269287109375, 137.82684326171875, 205.20730590820312, -47.26220703125, -56.36070251464844, -75.88151550292969, 68.76667785644531, 71.91680908203125, 31.94793701171875, 43.08881378173828], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000404.npy"} +{"epoch": 0.6107331821617535, "step": 405, "batch_size": 64, "mean": 63.28079605102539, "std": 92.64891052246094, "min": -120.77560424804688, "p10": -41.19151458740234, "median": 36.99496269226074, "p90": 184.58940887451172, "max": 317.3265075683594, "pos_frac": 0.75, "sample": [228.01885986328125, -2.613811492919922, -55.85219955444336, 317.3265075683594, 216.22872924804688, 17.781845092773438, 183.6912841796875, -34.815582275390625, -10.12808609008789, -71.23928833007812, -8.275039672851562, 15.15791130065918, 30.45993995666504, -6.670948028564453, 85.39598083496094, 18.492897033691406, 221.707763671875, -1.9343013763427734, 6.864097595214844, 65.09762573242188, 145.5673828125, 179.86468505859375, 28.879608154296875, 112.51460266113281, 1.4072532653808594, 32.291996002197266, 127.54714965820312, 141.1686553955078, -50.283119201660156, 97.67076873779297, 33.96237564086914, -12.421939849853516, 177.60147094726562, 126.7538070678711, -53.4234619140625, 95.87354278564453, 42.8282585144043, -120.77560424804688, 85.99959564208984, 70.4837646484375, 22.255069732666016, -43.92405700683594, 42.93213653564453, 37.994140625, 18.84052276611328, 58.11791229248047, 15.997413635253906, 63.02623748779297, 259.6895751953125, 132.78341674804688, -20.550762176513672, 0.026214599609375, 102.74393463134766, 46.29551696777344, 179.19964599609375, 231.56634521484375, 19.726455688476562, 35.995784759521484, 148.37152099609375, -97.93048095703125, 0.20473480224609375, 184.9743194580078, 167.17828369140625, -33.748008728027344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000405.npy"} +{"epoch": 0.6122448979591837, "step": 406, "batch_size": 64, "mean": 77.15496826171875, "std": 101.1622543334961, "min": -126.7188491821289, "p10": -54.66751708984375, "median": 72.34293746948242, "p90": 195.62157287597657, "max": 339.26812744140625, "pos_frac": 0.78125, "sample": [180.93890380859375, 35.24501037597656, -52.67716979980469, -11.903518676757812, 126.96101379394531, 92.20081329345703, 143.31773376464844, 339.26812744140625, -55.52052307128906, 280.89910888671875, 127.34386444091797, 110.9959945678711, 17.036869049072266, -4.446990966796875, 196.82540893554688, 278.97930908203125, 198.35443115234375, 43.799896240234375, 57.83961486816406, 180.81639099121094, 124.93600463867188, 28.857566833496094, 103.578369140625, 94.74750518798828, 48.29747772216797, 125.81538391113281, -34.07315444946289, 120.0478515625, 5.572134017944336, 35.952205657958984, 1.7522392272949219, -1.5238876342773438, -125.34344482421875, -15.273921966552734, 75.773193359375, 165.90615844726562, 102.25907135009766, 171.31346130371094, -70.884521484375, 174.44381713867188, 70.23931884765625, 23.87933349609375, 74.4465560913086, 225.3742218017578, 27.642440795898438, 149.81289672851562, -71.38851928710938, 16.300363540649414, 150.0394287109375, 144.74514770507812, 69.70248413085938, -102.48887634277344, 102.98335266113281, -92.32460021972656, 63.25090789794922, -126.7188491821289, 62.79322814941406, 192.8126220703125, -25.341720581054688, 16.045257568359375, 290.9520263671875, 101.0234375, 99.94223022460938, 55.76741027832031], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000406.npy"} +{"epoch": 0.6137566137566137, "step": 407, "batch_size": 64, "mean": 76.03689575195312, "std": 142.9007110595703, "min": -164.36940002441406, "p10": -55.4653293609619, "median": 52.095008850097656, "p90": 194.08152618408207, "max": 757.85546875, "pos_frac": 0.71875, "sample": [0.2605133056640625, -16.842512130737305, 1.127655029296875, 83.00225067138672, -35.917198181152344, 137.43911743164062, 208.66139221191406, -33.999908447265625, -164.36940002441406, 123.4961166381836, 78.34806823730469, 40.571048736572266, 3.6905059814453125, 13.487091064453125, -82.170166015625, 10.836967468261719, 92.19625091552734, 123.29190826416016, 220.63131713867188, 164.35067749023438, 169.56478881835938, -11.517599105834961, 93.58995056152344, 72.12332153320312, 146.84173583984375, 80.07107543945312, -5.096916198730469, -18.793838500976562, 94.9757080078125, 522.6959838867188, 91.78860473632812, 3.3852767944335938, 161.9202423095703, 197.15878295898438, 169.01809692382812, 3.935516357421875, -82.48312377929688, 757.85546875, -25.483339309692383, 41.382118225097656, -105.95034790039062, 158.42230224609375, 110.31196594238281, 62.807899475097656, 186.90126037597656, 283.031005859375, 142.53045654296875, 7.050895690917969, 344.7956237792969, 0.6769065856933594, 33.12684631347656, -28.041015625, -65.02261352539062, -122.79229736328125, -44.58177185058594, 2.029582977294922, -17.679231643676758, 159.9182891845703, -60.12971115112305, -3.6842575073242188, 24.20830535888672, 82.48129272460938, 111.37601470947266, 173.55072021484375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000407.npy"} +{"epoch": 0.6152683295540439, "step": 408, "batch_size": 64, "mean": 84.0474853515625, "std": 90.58114624023438, "min": -102.89524841308594, "p10": -18.895145606994628, "median": 67.9002914428711, "p90": 189.67351837158208, "max": 371.05169677734375, "pos_frac": 0.828125, "sample": [59.63372802734375, 167.33868408203125, 115.76837158203125, 122.93557739257812, 14.403335571289062, -60.64739990234375, -102.89524841308594, 18.851123809814453, 110.13452911376953, 40.64317321777344, 55.98485565185547, 258.19451904296875, -0.4860363006591797, 76.06547546386719, 181.05638122558594, 193.3665771484375, 12.671001434326172, 5.123542785644531, 42.30792999267578, 61.311309814453125, 67.1234130859375, -19.427772521972656, 6.536924362182617, 159.68630981445312, 196.45281982421875, -59.11346435546875, 50.74475860595703, 250.3570556640625, 105.09234619140625, 156.00912475585938, 164.11305236816406, -22.47457504272461, 125.28703308105469, 21.879865646362305, 60.925506591796875, 174.33355712890625, 68.21156311035156, -17.6523494720459, 371.05169677734375, 99.47932434082031, 31.79883575439453, 162.52102661132812, 211.040771484375, 180.427734375, 140.66355895996094, 22.21974754333496, 137.04351806640625, -31.38193702697754, 77.43731689453125, 54.72575378417969, 145.8211669921875, 99.63168334960938, 286.70623779296875, -86.48973083496094, 112.18717193603516, 102.4388427734375, 31.168277740478516, -5.347877502441406, 24.715484619140625, 67.58901977539062, 125.10028839111328, 58.592655181884766, -4.0762176513671875, 104.12797546386719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000408.npy"} +{"epoch": 0.6167800453514739, "step": 409, "batch_size": 64, "mean": 38.188819885253906, "std": 80.66439819335938, "min": -149.50933837890625, "p10": -45.96980361938476, "median": 25.04256534576416, "p90": 150.48054199218754, "max": 249.23178100585938, "pos_frac": 0.65625, "sample": [39.04505920410156, -3.8207759857177734, -24.261581420898438, -46.96144104003906, 15.96297836303711, -19.056827545166016, 37.794586181640625, -10.571895599365234, 23.922748565673828, -43.655982971191406, -26.993480682373047, 169.35882568359375, -20.645387649536133, 192.31829833984375, 106.39567565917969, -5.738189697265625, 37.955352783203125, 1.0040435791015625, 39.8174934387207, 188.99630737304688, 124.19474029541016, -40.82846450805664, 92.1693115234375, 137.8248291015625, 28.694740295410156, -69.07597351074219, -29.443099975585938, 114.5908203125, 26.162382125854492, -16.5869140625, -66.25517272949219, 29.664932250976562, 106.15274810791016, -33.34291458129883, 110.66735076904297, -8.15943717956543, 142.54953002929688, 111.8710708618164, 22.602615356445312, 37.600975036621094, -48.4511604309082, -14.400688171386719, 90.41116333007812, -68.07501220703125, 3.6280899047851562, 15.753707885742188, 9.327362060546875, 53.462886810302734, -149.50933837890625, 181.40882873535156, 23.397464752197266, 153.87954711914062, 0.38238525390625, 194.96194458007812, -136.42771911621094, 5.492347717285156, 43.710113525390625, 73.34696197509766, 81.5714340209961, 57.69831848144531, 249.23178100585938, -31.951353073120117, 124.00497436523438, 59.31044387817383], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000409.npy"} +{"epoch": 0.618291761148904, "step": 410, "batch_size": 64, "mean": 58.07272720336914, "std": 82.0196762084961, "min": -228.69720458984375, "p10": -29.795057678222655, "median": 54.993879318237305, "p90": 156.41993560791016, "max": 278.39337158203125, "pos_frac": 0.796875, "sample": [94.41138458251953, 80.34602355957031, 53.25739669799805, -12.35659408569336, 172.36419677734375, 79.641845703125, 94.02789306640625, 16.059757232666016, -49.37596130371094, -113.04395294189453, 71.53272247314453, 154.6618194580078, -1.0330657958984375, -61.431129455566406, 156.61827087402344, 85.56855010986328, 155.9571533203125, -228.69720458984375, 106.49497985839844, 195.464111328125, 278.39337158203125, 41.74017333984375, 7.0717926025390625, 81.84326934814453, 14.390121459960938, 97.39041137695312, -14.632217407226562, 79.5244140625, 44.130043029785156, -37.90154266357422, 131.93325805664062, -27.772720336914062, 17.758075714111328, 132.130859375, 35.70869445800781, 175.54556274414062, 136.75746154785156, 32.1646614074707, 58.45002746582031, 95.65292358398438, -23.67511749267578, -30.661773681640625, 102.3040542602539, 21.111160278320312, 122.42386627197266, 56.73036193847656, 108.42276000976562, -8.753562927246094, 38.612667083740234, 115.42572021484375, 46.332000732421875, 61.139190673828125, 230.58847045898438, 48.48271179199219, 42.92913818359375, 67.46240997314453, 9.793460845947266, 35.705841064453125, 9.28521728515625, 115.91865539550781, 158.38116455078125, 3.1265487670898438, -71.52267456054688, 26.34522247314453], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000410.npy"} +{"epoch": 0.6198034769463341, "step": 411, "batch_size": 64, "mean": 65.00819396972656, "std": 101.78104400634766, "min": -178.82907104492188, "p10": -49.7080581665039, "median": 54.44219970703125, "p90": 174.64986877441407, "max": 393.9941101074219, "pos_frac": 0.734375, "sample": [-20.71784210205078, 136.7860870361328, 156.3691864013672, 134.9044189453125, 84.11100006103516, -31.922286987304688, -96.67851257324219, -178.82907104492188, 97.55753326416016, 21.111480712890625, 129.00540161132812, 189.06265258789062, 112.08712005615234, 65.11537170410156, 148.6278076171875, 47.727867126464844, 112.85328674316406, -53.500343322753906, 100.89644622802734, 57.57145690917969, 12.181343078613281, 212.4585418701172, 174.34701538085938, 49.930145263671875, -25.792022705078125, 120.9046630859375, 1.2325973510742188, 16.97897720336914, -64.44308471679688, -128.6505584716797, -39.49639892578125, -32.27863311767578, -39.251930236816406, 285.9161682128906, 169.06341552734375, 62.29128646850586, 20.60944938659668, 170.69281005859375, 34.452545166015625, 393.9941101074219, 117.14019775390625, 49.98712921142578, -49.79095458984375, 176.96548461914062, 129.55764770507812, 166.306640625, -59.175048828125, 51.31294250488281, -14.430835723876953, -49.51463317871094, 32.13087463378906, -18.385787963867188, 165.52537536621094, 174.7796630859375, 44.74164581298828, 92.9681625366211, 87.39794158935547, 77.72891998291016, 253.03558349609375, 4.337158203125, -36.70238494873047, 11.329940795898438, 10.622482299804688, 135.3766326904297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000411.npy"} +{"epoch": 0.6213151927437641, "step": 412, "batch_size": 64, "mean": 53.42518615722656, "std": 80.92982482910156, "min": -87.01852416992188, "p10": -37.45684280395508, "median": 36.275644302368164, "p90": 163.57440795898438, "max": 242.8301239013672, "pos_frac": 0.703125, "sample": [-67.9438705444336, 158.87261962890625, 37.632301330566406, 183.7913818359375, 17.31714630126953, 104.39128112792969, 21.784927368164062, 149.6151885986328, -87.01852416992188, -6.842189788818359, 40.183074951171875, 12.746742248535156, 94.36715698242188, 160.41671752929688, -5.423881530761719, -38.61273956298828, 124.20455932617188, -78.83810424804688, 27.420936584472656, -14.060272216796875, 182.13067626953125, 84.86820983886719, 18.831512451171875, 60.580474853515625, 97.19943237304688, -48.591697692871094, -7.3442535400390625, 66.57856750488281, 35.471439361572266, 13.030242919921875, 3.2819995880126953, 234.9326629638672, 15.530668258666992, 242.8301239013672, -19.46710205078125, 37.07984924316406, 5.35748291015625, -19.82180404663086, 146.13182067871094, -12.600288391113281, -17.188989639282227, -34.75975036621094, 129.39785766601562, -50.348323822021484, 51.812503814697266, 176.5643768310547, 143.83465576171875, 97.94080352783203, 221.45994567871094, -25.288867950439453, 45.13776397705078, 98.13575744628906, 119.02101135253906, 31.339988708496094, 18.630794525146484, 144.4485321044922, -18.09818458557129, 49.09101104736328, -53.26620864868164, 67.03549194335938, 164.92770385742188, 0.6452465057373047, 116.91588592529297, -28.19158935546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000412.npy"} +{"epoch": 0.6228269085411943, "step": 413, "batch_size": 64, "mean": 58.87272644042969, "std": 104.35812377929688, "min": -221.00845336914062, "p10": -56.06112365722655, "median": 51.05935478210449, "p90": 168.31412048339845, "max": 423.56549072265625, "pos_frac": 0.75, "sample": [8.440990447998047, -65.48542785644531, 152.48208618164062, 65.86971282958984, -13.155235290527344, -9.955459594726562, 5.4000396728515625, 204.88621520996094, -40.28501892089844, 210.12786865234375, -221.00845336914062, 140.59381103515625, 157.79531860351562, -20.444473266601562, -43.95390319824219, 26.985565185546875, -79.16261291503906, 158.7219696044922, 25.119888305664062, 23.07073402404785, 253.9307861328125, 69.28057861328125, 89.62313842773438, 50.45828628540039, 118.98159790039062, 33.08228302001953, 423.56549072265625, 134.55462646484375, 51.660423278808594, 49.57221221923828, 74.01808166503906, 85.58861541748047, 93.279052734375, 47.651214599609375, 185.6446533203125, -31.658172607421875, 109.94053649902344, 18.094619750976562, 65.09183502197266, 34.834007263183594, -114.72319793701172, 162.57464599609375, 118.90241241455078, -89.92557525634766, 104.46127319335938, -22.072160720825195, -187.1571044921875, 44.579444885253906, 72.36092376708984, 17.103633880615234, -59.36395263671875, -48.354522705078125, 254.1510009765625, 9.109302520751953, 31.488327026367188, 112.19383239746094, 34.34335708618164, 148.74563598632812, 56.51182174682617, 90.6903305053711, -27.6192626953125, 124.59907531738281, 170.77389526367188, 121.24363708496094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000413.npy"} +{"epoch": 0.6243386243386243, "step": 414, "batch_size": 64, "mean": 72.30569458007812, "std": 106.42288208007812, "min": -137.35995483398438, "p10": -49.285804748535156, "median": 67.1916275024414, "p90": 191.19910430908203, "max": 460.6246337890625, "pos_frac": 0.734375, "sample": [137.21719360351562, -48.80531311035156, 131.5572967529297, 139.617919921875, -3.194366455078125, 67.28831481933594, -137.35995483398438, 82.44573974609375, 88.98316192626953, 29.468521118164062, 86.42959594726562, 124.90995788574219, 131.49057006835938, -55.84857940673828, 188.65213012695312, 63.84601974487305, -83.79159545898438, -68.83661651611328, 50.174720764160156, 93.59749603271484, 2.5521106719970703, 74.17671203613281, 217.97418212890625, -51.579383850097656, 192.29066467285156, 138.69284057617188, 104.31178283691406, -0.9533767700195312, 83.00384521484375, 128.4066162109375, 185.59934997558594, -2.4571685791015625, 149.48841857910156, -27.630722045898438, 241.68678283691406, 74.49051666259766, 132.29888916015625, 62.02424621582031, 67.09494018554688, 130.2406768798828, 20.119766235351562, -26.38751220703125, 45.184837341308594, -11.602949142456055, 395.275146484375, -25.673110961914062, 18.21703338623047, 72.273681640625, -127.12173461914062, 57.973182678222656, 460.6246337890625, 15.771751403808594, -11.738937377929688, 29.137065887451172, 162.28382873535156, 227.84304809570312, 20.683143615722656, 26.911453247070312, 118.542724609375, 73.99683380126953, -49.491729736328125, -19.928159713745117, 23.498058319091797, 211.61773681640625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000414.npy"} +{"epoch": 0.6258503401360545, "step": 415, "batch_size": 64, "mean": 56.51627731323242, "std": 99.39273834228516, "min": -181.93814086914062, "p10": -68.42824554443358, "median": 45.008012771606445, "p90": 178.38525848388673, "max": 367.01934814453125, "pos_frac": 0.75, "sample": [-8.712770462036133, 114.65791320800781, 25.388498306274414, 72.99606323242188, 37.1042594909668, 24.52629852294922, 127.71067810058594, 199.58718872070312, -57.913665771484375, 118.7568588256836, 72.13489532470703, -50.189605712890625, 159.5423583984375, 164.0528564453125, -1.0897216796875, 179.9188995361328, 246.11065673828125, 69.03623962402344, 118.53932189941406, -4.634363174438477, -4.18780517578125, 78.67311096191406, 94.39232635498047, 0.05998992919921875, 63.35670471191406, 174.8067626953125, -105.62760925292969, 10.472633361816406, 55.37150573730469, 295.4184265136719, -72.93449401855469, 120.08021545410156, 80.80882263183594, -2.24652099609375, -142.17535400390625, 367.01934814453125, 87.20183563232422, 64.46023559570312, 138.09188842773438, -103.28173828125, 98.10758209228516, -112.86451721191406, -88.22118377685547, -181.93814086914062, 29.131431579589844, 34.26764678955078, -5.448143005371094, 66.32485961914062, 17.54480743408203, 12.74565315246582, 4.907453536987305, 44.54297637939453, 33.03215026855469, 45.47304916381836, 18.47350311279297, 108.76457214355469, 1.9179935455322266, 117.40290069580078, 150.66036987304688, -7.608650207519531, 5.481117248535156, 42.759300231933594, 193.65756225585938, 180.64453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000415.npy"} +{"epoch": 0.6273620559334845, "step": 416, "batch_size": 64, "mean": 56.206329345703125, "std": 127.84849548339844, "min": -282.8236999511719, "p10": -47.50148239135741, "median": 40.17689895629883, "p90": 155.50269927978516, "max": 545.1573486328125, "pos_frac": 0.671875, "sample": [12.782432556152344, -164.29417419433594, -19.668415069580078, 114.81556701660156, 23.548370361328125, 154.4745635986328, -42.27204895019531, -1.00653076171875, 144.47659301757812, 203.17205810546875, 63.028045654296875, 545.1573486328125, 330.4400329589844, 12.223594665527344, -15.806629180908203, 59.168434143066406, 1.2132396697998047, 79.85518646240234, -176.10336303710938, 103.83419799804688, 4.084333419799805, 127.20124816894531, 67.77413940429688, 144.76031494140625, 0.607269287109375, 120.26258850097656, 492.2676696777344, 178.96328735351562, 224.293212890625, 44.20301055908203, 62.000267028808594, 121.67041015625, -32.184112548828125, -59.407676696777344, 1.4769763946533203, -36.121177673339844, -72.65887451171875, -97.2408447265625, -8.394411087036133, 7.160617828369141, -24.081348419189453, 67.46505737304688, -33.77216339111328, -6.8731231689453125, 92.05966186523438, -2.4621238708496094, 141.36949157714844, -282.8236999511719, -4.148927688598633, 91.38764953613281, 36.150787353515625, -13.608434677124023, -49.74266815185547, 129.11842346191406, -38.70708465576172, 35.90047073364258, 7.416175842285156, 119.13859558105469, 66.36992645263672, 44.506813049316406, 134.8914794921875, 140.5048828125, 155.94332885742188, 71.44522857666016], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000416.npy"} +{"epoch": 0.6288737717309146, "step": 417, "batch_size": 64, "mean": 65.2762451171875, "std": 113.14049530029297, "min": -318.9845886230469, "p10": -50.560872650146486, "median": 67.08871841430664, "p90": 173.65714111328126, "max": 518.1786499023438, "pos_frac": 0.765625, "sample": [68.29524230957031, 2.5483932495117188, -20.052146911621094, -198.44326782226562, 177.92507934570312, 130.30816650390625, -318.9845886230469, 173.3197021484375, 79.78367614746094, 103.58662414550781, 518.1786499023438, 144.6527557373047, 35.16210174560547, 59.27063751220703, 88.30294036865234, -67.32708740234375, 177.52589416503906, 77.56172943115234, 20.012313842773438, 65.96742248535156, 73.82386779785156, 95.89755249023438, 78.81973266601562, 39.22795104980469, 48.25861358642578, -37.49971008300781, 194.36041259765625, 60.21253967285156, 47.01398849487305, 171.22210693359375, -50.66588592529297, 173.8017578125, 170.22621154785156, -50.31584167480469, 120.30712890625, -56.98951721191406, 27.468063354492188, -17.069236755371094, 175.36874389648438, 106.99227142333984, 158.95228576660156, 15.099174499511719, 167.30740356445312, 89.01143646240234, -127.82080078125, 91.16819763183594, -7.99714469909668, 68.21001434326172, -5.658721923828125, 42.51667785644531, 44.475616455078125, 114.58705139160156, 16.233505249023438, -80.08956909179688, 63.23255157470703, -34.632568359375, 77.55084228515625, 62.55350112915039, 47.6591796875, -5.464750289916992, 138.8174285888672, 99.97496032714844, 147.14576721191406, 306.79290771484375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000417.npy"} +{"epoch": 0.6303854875283447, "step": 418, "batch_size": 64, "mean": 40.86838150024414, "std": 117.13098907470703, "min": -311.50360107421875, "p10": -99.07761306762694, "median": 43.012840270996094, "p90": 165.08371276855473, "max": 401.47283935546875, "pos_frac": 0.6875, "sample": [68.34910583496094, 132.0714111328125, 104.47052764892578, 57.700286865234375, 34.70949935913086, 144.14120483398438, 61.21809387207031, 97.15621185302734, 193.54461669921875, -66.01326751708984, 153.0714111328125, 232.4026336669922, -66.4876708984375, 121.99136352539062, 54.102928161621094, -106.61924743652344, 141.75677490234375, -1.8404312133789062, 93.08096313476562, -26.31713104248047, 0.33158111572265625, 366.5042724609375, 41.860023498535156, -4.6193389892578125, 195.68597412109375, 45.108802795410156, -132.97793579101562, 12.365264892578125, 51.6466178894043, -11.185516357421875, -311.50360107421875, -116.70587158203125, 121.61317443847656, 54.053497314453125, 170.98915100097656, -41.756103515625, 10.741024017333984, -9.711273193359375, 32.07624816894531, 0.5247726440429688, -264.8668212890625, -102.82913970947266, -90.32405090332031, 10.063215255737305, 89.66493225097656, -26.114479064941406, 0.09156036376953125, 44.16565704345703, 37.166709899902344, -28.80571746826172, 27.056968688964844, 401.47283935546875, 170.23184204101562, 137.26266479492188, 29.97869110107422, 59.048248291015625, -125.15007019042969, 63.098419189453125, 59.243011474609375, 133.05093383789062, 58.464210510253906, -54.61878967285156, 100.63916015625, -9.943679809570312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000418.npy"} +{"epoch": 0.6318972033257747, "step": 419, "batch_size": 64, "mean": 77.22879028320312, "std": 98.50492095947266, "min": -99.34135437011719, "p10": -25.33814964294433, "median": 64.5001449584961, "p90": 202.46942749023438, "max": 331.31414794921875, "pos_frac": 0.8125, "sample": [-13.887401580810547, 283.63677978515625, 25.363304138183594, 6.272544860839844, -93.77664184570312, -2.574554443359375, 188.826904296875, 61.587615966796875, 14.194206237792969, 22.62350845336914, -99.34135437011719, 152.362060546875, 130.4438018798828, 141.5816192626953, -9.570541381835938, 91.69233703613281, 67.93910217285156, 103.96853637695312, 198.66958618164062, 25.4920654296875, 69.99443054199219, 132.87460327148438, -33.87916564941406, 67.41267395019531, 11.383255004882812, 18.6171875, 204.09793090820312, -18.541793823242188, 273.80120849609375, 104.18765258789062, 44.59108352661133, -95.1333236694336, -28.250873565673828, 173.48995971679688, 331.31414794921875, 115.60879516601562, 134.816650390625, 31.071361541748047, 1.9016876220703125, 73.4603271484375, 9.864065170288086, 182.7897186279297, 28.746734619140625, -1.3367462158203125, 156.0929412841797, 11.842926025390625, 92.76725006103516, 51.48006820678711, 0.051990509033203125, 301.4984436035156, 280.1157531738281, 15.143634796142578, 71.82060241699219, 32.82655334472656, 137.30499267578125, 19.465890884399414, 141.53305053710938, 105.1768569946289, 215.01246643066406, 47.591156005859375, 81.7962646484375, -47.083740234375, -79.41170501708984, 179.23194885253906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000419.npy"} +{"epoch": 0.6334089191232048, "step": 420, "batch_size": 64, "mean": 43.607688903808594, "std": 107.15522003173828, "min": -258.2600402832031, "p10": -79.64102249145508, "median": 22.66450309753418, "p90": 191.41047058105477, "max": 336.4543762207031, "pos_frac": 0.671875, "sample": [1.9251556396484375, -60.941654205322266, 150.10472106933594, 15.127250671386719, 153.46347045898438, -118.28732299804688, 9.679794311523438, 10.354366302490234, 71.90680694580078, -170.77835083007812, -25.597732543945312, 68.73506927490234, -12.18280029296875, 132.62881469726562, 22.1944580078125, 132.2914581298828, 276.41851806640625, 94.06312561035156, 205.20330810546875, 160.27191162109375, 24.35675621032715, -52.79111862182617, -2.8917064666748047, 60.18645095825195, 93.99726867675781, 162.6508026123047, -90.0572280883789, -85.80237579345703, 64.12857055664062, 13.59640121459961, -100.83120727539062, 58.70318603515625, -35.68275451660156, 39.08208084106445, 0.24525070190429688, -80.63865661621094, -77.3132095336914, -23.242591857910156, 200.14852905273438, 19.83104705810547, -5.321235656738281, -258.2600402832031, -47.07042694091797, 1.8681488037109375, 132.55685424804688, 77.05073547363281, 56.52109146118164, 207.63272094726562, 3.5651626586914062, 80.9693603515625, 171.02166748046875, 45.28691864013672, 111.77371215820312, -8.266847610473633, 23.13454818725586, 130.57135009765625, 202.78692626953125, 336.4543762207031, -8.636280059814453, 88.28120422363281, 4.744743347167969, 239.1785888671875, -69.27224731445312, -29.93490219116211], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000420.npy"} +{"epoch": 0.6349206349206349, "step": 421, "batch_size": 64, "mean": 55.25156784057617, "std": 116.32733917236328, "min": -240.47027587890625, "p10": -92.5903381347656, "median": 55.040279388427734, "p90": 182.05905303955078, "max": 410.6519470214844, "pos_frac": 0.75, "sample": [-138.96688842773438, 303.9447937011719, 8.228096008300781, 127.10714721679688, 166.19146728515625, 29.110107421875, 91.45243835449219, 46.91546630859375, 2.3885269165039062, 410.6519470214844, 283.4840087890625, -66.72314453125, 181.71957397460938, 84.41341400146484, -5.159641265869141, 72.21624755859375, 31.765724182128906, -11.851943969726562, 167.886474609375, 94.36465454101562, 136.810546875, 59.61317443847656, 9.11064338684082, 13.070384979248047, 72.11160278320312, 10.468135833740234, 148.30108642578125, -174.09071350097656, -136.50958251953125, -75.62704467773438, 16.42918586730957, 182.2045440673828, 161.54705810546875, 14.030233383178711, 89.79964447021484, 165.98651123046875, 206.0084686279297, 101.81661224365234, 57.78447723388672, 4.555812835693359, 161.3581085205078, -37.57538986206055, 55.83216857910156, -5.06011962890625, -178.73780822753906, 109.93851470947266, -29.571304321289062, 54.248390197753906, -139.739501953125, 0.2039642333984375, 198.4537353515625, 206.1468505859375, 4.888298034667969, 80.79820251464844, 36.9002685546875, -240.47027587890625, 20.556678771972656, 81.06291198730469, 78.39854431152344, -99.86032104492188, 150.2664031982422, 99.62261199951172, -1.9901123046875, -12.12961196899414], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000421.npy"} +{"epoch": 0.636432350718065, "step": 422, "batch_size": 64, "mean": 76.93031311035156, "std": 113.14125061035156, "min": -121.27159118652344, "p10": -47.13641357421873, "median": 69.92859649658203, "p90": 181.0971221923828, "max": 583.5241088867188, "pos_frac": 0.75, "sample": [177.88348388671875, 323.5701599121094, -64.99522399902344, -8.311981201171875, 147.9701690673828, -13.576019287109375, 149.94943237304688, -121.27159118652344, 101.378173828125, 583.5241088867188, 75.78285217285156, 58.160667419433594, 1.6799468994140625, 121.23629760742188, 343.37359619140625, -16.201709747314453, 123.08987426757812, 195.37806701660156, -11.295059204101562, 120.2635269165039, 99.37664794921875, -82.37384033203125, -17.687564849853516, 3.3921470642089844, 95.29399108886719, 182.47439575195312, 138.56492614746094, 124.84191131591797, 277.6323547363281, 45.9254150390625, 81.74554443359375, 88.02815246582031, 81.14555358886719, -12.795106887817383, 67.93610382080078, 58.2579231262207, 113.47213745117188, 25.11814308166504, 154.77029418945312, 17.565841674804688, -28.40898895263672, -57.80421829223633, 138.82638549804688, 88.82867431640625, 86.28628540039062, -55.162452697753906, -9.953109741210938, 114.00965118408203, 16.477249145507812, 66.79043579101562, 70.90789794921875, 266.8609313964844, 40.04962921142578, -64.15706634521484, 1.7092094421386719, 68.94929504394531, 16.201879501342773, -68.85807037353516, 40.62055206298828, 104.06458282470703, 146.92442321777344, 42.639259338378906, -28.027145385742188, 95.49150085449219], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000422.npy"} +{"epoch": 0.6379440665154951, "step": 423, "batch_size": 64, "mean": 69.64105987548828, "std": 87.10491180419922, "min": -116.97428131103516, "p10": -28.022230529785155, "median": 58.10650062561035, "p90": 190.69688110351564, "max": 234.1474151611328, "pos_frac": 0.765625, "sample": [66.3604736328125, -27.648101806640625, 150.3728790283203, -71.10137939453125, 181.27268981933594, 15.950918197631836, 43.32209396362305, -44.34577178955078, 233.05677795410156, 44.761146545410156, -28.182571411132812, 158.6775360107422, 18.989593505859375, -40.415504455566406, -13.959503173828125, 32.92999267578125, 26.18792724609375, -6.8399505615234375, 184.9906463623047, -88.00836944580078, 123.82771301269531, 53.20355987548828, -55.38343811035156, -3.9175586700439453, 50.175594329833984, 208.6419219970703, 171.285888671875, 2.536785125732422, -116.97428131103516, 81.62103271484375, 182.67172241210938, 213.00970458984375, 151.65122985839844, 102.75444030761719, 104.11029815673828, 87.67642211914062, 55.516082763671875, 118.44618225097656, 193.1424102783203, 203.62496948242188, 16.56659698486328, 31.90703582763672, 2.039093017578125, 129.77713012695312, 130.33935546875, 136.3790283203125, 131.1614990234375, 102.26509094238281, -12.987640380859375, 71.42064666748047, 234.1474151611328, 86.20819091796875, 7.754480361938477, 67.53430938720703, 127.54476165771484, 230.50924682617188, 15.574752807617188, 3.6853790283203125, -26.50860595703125, 14.761306762695312, 161.11669921875, -9.101776123046875, 60.69691848754883, -19.755048751831055], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000423.npy"} +{"epoch": 0.6394557823129252, "step": 424, "batch_size": 64, "mean": 73.80777740478516, "std": 87.98133850097656, "min": -117.9170913696289, "p10": -24.659905624389644, "median": 66.10863494873047, "p90": 203.02516174316412, "max": 328.31988525390625, "pos_frac": 0.84375, "sample": [-32.18727111816406, 230.21292114257812, 79.07806396484375, 132.0303497314453, -16.69268798828125, 129.898193359375, 328.31988525390625, 142.448486328125, -83.74681091308594, 13.298355102539062, 20.80484390258789, 39.593833923339844, 53.520225524902344, -117.9170913696289, 171.00360107421875, 29.91608428955078, 156.2423858642578, -53.11522674560547, 60.920989990234375, 43.40205383300781, -19.945669174194336, 86.25786590576172, 62.64586639404297, 98.1875228881836, -26.186649322509766, 136.0806121826172, 210.49673461914062, 36.60784912109375, 217.6822509765625, 11.827653884887695, 100.68553924560547, 97.6780776977539, 69.57140350341797, 31.312965393066406, 69.86991882324219, 13.850555419921875, 145.29225158691406, 217.5196990966797, 125.38529205322266, 160.48965454101562, 90.03797912597656, 36.297279357910156, 13.694156646728516, 219.00125122070312, 20.524024963378906, 208.56826782226562, 114.55978393554688, 4.127634048461914, -111.8245620727539, 2.7165889739990234, 146.70741271972656, 53.56840515136719, 103.5701904296875, 190.09124755859375, 24.10828399658203, 34.44487762451172, 88.68077087402344, -74.22833251953125, 61.5615234375, 151.61424255371094, 10.985054016113281, 100.81124114990234, -21.097503662109375, 82.83744812011719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000424.npy"} +{"epoch": 0.6409674981103552, "step": 425, "batch_size": 64, "mean": 57.4749755859375, "std": 93.46161651611328, "min": -139.2124786376953, "p10": -61.82873725891113, "median": 53.499549865722656, "p90": 191.8937423706055, "max": 238.665283203125, "pos_frac": 0.71875, "sample": [89.36173248291016, 119.32491302490234, 19.790321350097656, 12.415512084960938, 132.81060791015625, -11.202617645263672, 235.15304565429688, -4.839496612548828, 109.41561126708984, 93.89495849609375, 213.4730224609375, -68.25786590576172, 65.01036071777344, -26.159698486328125, -60.78971481323242, 133.8975067138672, 45.36913299560547, 201.96658325195312, 2.967071533203125, 68.517578125, -139.2124786376953, 45.12300109863281, -16.838260650634766, 60.1422119140625, 155.85992431640625, 13.466087341308594, 2.0336971282958984, 204.77108764648438, 13.352867126464844, 225.05459594726562, 158.8096923828125, -38.25168228149414, 80.57890319824219, 9.787582397460938, 123.33584594726562, 56.600685119628906, 50.75537109375, 176.23416137695312, 40.57210159301758, 69.09736633300781, 195.39663696289062, -116.7136459350586, 63.15363311767578, 82.30500793457031, 35.6207275390625, -87.4011001586914, 51.86146545410156, 65.02377319335938, 77.32635498046875, -23.330154418945312, 180.28697204589844, -16.434947967529297, -96.13312530517578, 181.07369995117188, -97.51703643798828, -3.841907501220703, 156.1096649169922, 55.13763427734375, -25.791568756103516, 238.665283203125, -49.802059173583984, 28.565383911132812, 183.72032165527344, -62.27403259277344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000425.npy"} +{"epoch": 0.6424792139077853, "step": 426, "batch_size": 64, "mean": 37.01103973388672, "std": 105.56592559814453, "min": -307.48846435546875, "p10": -67.2007667541504, "median": 33.44241142272949, "p90": 156.29331665039064, "max": 397.778076171875, "pos_frac": 0.671875, "sample": [3.1530799865722656, -5.952335357666016, 113.65970611572266, 129.3705596923828, 56.145652770996094, 135.59693908691406, 43.98283386230469, 200.406005859375, 40.44176483154297, 68.27389526367188, -61.816810607910156, -30.769363403320312, 14.631294250488281, -145.34024047851562, 181.87379455566406, 17.186920166015625, 91.78988647460938, 14.5728759765625, 38.29029083251953, -47.29370880126953, -173.77029418945312, 62.931671142578125, -1.4599151611328125, 47.41661071777344, -27.085670471191406, 0.3188629150390625, 11.858488082885742, -307.48846435546875, 102.13223266601562, 46.815757751464844, 132.27334594726562, 102.9451675415039, 128.61965942382812, -32.35078430175781, -194.02769470214844, 71.37245178222656, -0.4079132080078125, 18.943593978881836, -12.674331665039062, 397.778076171875, 169.0596466064453, 238.29615783691406, -69.24327087402344, -7.4021759033203125, 1.3455047607421875, 193.48463439941406, -34.84837341308594, 1.5249748229980469, -0.21570205688476562, -86.78570556640625, 41.171661376953125, 102.49835968017578, 11.182327270507812, 152.54237365722656, 61.8092041015625, -67.62032318115234, 28.594532012939453, 116.13378143310547, 47.09642028808594, -66.2218017578125, 69.65463256835938, 81.87730407714844, 157.90086364746094, -5.472332000732422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000426.npy"} +{"epoch": 0.6439909297052154, "step": 427, "batch_size": 64, "mean": 57.05039978027344, "std": 105.33907318115234, "min": -159.07728576660156, "p10": -77.5641616821289, "median": 48.87931251525879, "p90": 199.4476516723633, "max": 407.60699462890625, "pos_frac": 0.703125, "sample": [84.44898986816406, 68.95207977294922, -11.618938446044922, -73.94178009033203, -90.36282348632812, 97.84526062011719, 197.03575134277344, 23.85062026977539, 0.3721466064453125, -74.6767578125, 207.184814453125, -48.64552688598633, -90.14167785644531, -119.1046142578125, 62.090187072753906, 157.447998046875, 111.67777252197266, -20.337661743164062, -13.787620544433594, -38.50816345214844, -0.9502506256103516, 48.074459075927734, 209.36453247070312, 79.27288818359375, 30.264312744140625, 153.39132690429688, 113.91869354248047, 106.33458709716797, 49.765472412109375, 38.38213348388672, 182.46466064453125, 23.0286865234375, 217.41409301757812, 43.43750762939453, 49.684165954589844, 174.78558349609375, 30.95635986328125, -159.07728576660156, 200.4813232421875, 191.07345581054688, -7.074287414550781, 25.96636962890625, -32.01739501953125, 29.361961364746094, -93.37252807617188, -116.82144927978516, 134.2687225341797, 95.13504028320312, 36.045257568359375, -78.80162048339844, 171.99449157714844, 255.45492553710938, 5.5345306396484375, 60.657432556152344, 71.53414154052734, 49.79389953613281, 89.09063720703125, 58.71918869018555, 42.34327697753906, -36.02392578125, 71.34998321533203, -27.656539916992188, 407.60699462890625, 226.28970336914062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000427.npy"} +{"epoch": 0.6455026455026455, "step": 428, "batch_size": 64, "mean": 68.15831756591797, "std": 116.8813705444336, "min": -248.48245239257812, "p10": -38.00276412963867, "median": 63.29154014587402, "p90": 187.9177993774414, "max": 518.587646484375, "pos_frac": 0.765625, "sample": [-166.61705017089844, 91.93309783935547, 188.627685546875, -3.3565902709960938, 0.31685638427734375, 29.140914916992188, 20.07022476196289, -155.27687072753906, -44.88201904296875, 67.5567855834961, 235.222412109375, -29.849205017089844, 117.39966583251953, -34.526695251464844, 145.03414916992188, 62.650970458984375, 218.77452087402344, 31.391319274902344, -26.47997283935547, 48.880615234375, 174.24293518066406, 9.755138397216797, 8.853004455566406, 4.8737640380859375, 204.87115478515625, 95.04904174804688, 65.67024230957031, -124.64479064941406, 163.4229278564453, -248.48245239257812, 114.5667724609375, -30.790184020996094, 63.93210983276367, 92.42120361328125, 171.5496826171875, 288.7568054199219, 162.95877075195312, 164.45875549316406, 57.980384826660156, 12.134912490844727, 186.2613983154297, 518.587646484375, 72.98259735107422, 157.38836669921875, -39.49250793457031, -31.366180419921875, 105.89227294921875, 40.629356384277344, 6.615942001342773, -61.130035400390625, 26.27959442138672, 1.7982559204101562, 20.492050170898438, 19.730335235595703, 163.48849487304688, 177.4526824951172, 203.2678680419922, 92.3603515625, 87.76458740234375, 137.637939453125, 183.86944580078125, 94.61375427246094, -16.873756408691406, -33.7089729309082], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000428.npy"} +{"epoch": 0.6470143613000756, "step": 429, "batch_size": 64, "mean": 54.32763671875, "std": 91.0197525024414, "min": -192.57058715820312, "p10": -56.88866157531738, "median": 55.18610763549805, "p90": 162.97681427001953, "max": 301.01025390625, "pos_frac": 0.734375, "sample": [138.9016876220703, -49.316070556640625, 0.6469955444335938, -52.679222106933594, 125.87085723876953, 111.14713287353516, 96.15934753417969, 85.8912582397461, -18.168514251708984, 49.43000793457031, 26.836380004882812, -98.33673095703125, 79.27268981933594, 2.9184112548828125, 26.10071563720703, 106.9927978515625, -192.57058715820312, 58.87886047363281, -1.159210205078125, 98.16625213623047, 100.51283264160156, 1.8900032043457031, 123.40289306640625, 159.4872589111328, 152.83120727539062, 162.6055908203125, 176.870361328125, 39.0189208984375, 74.67455291748047, -45.781681060791016, -78.79029846191406, -58.69270706176758, 35.47284698486328, -12.571006774902344, 51.49335479736328, -10.058090209960938, -92.22604370117188, 111.81201934814453, -16.71282196044922, -18.44304656982422, 184.4143829345703, 210.0091552734375, 26.33116912841797, 163.1359100341797, 151.58163452148438, -146.09619140625, -70.92626953125, 62.84550857543945, 20.42377471923828, 116.90641021728516, 15.450294494628906, 92.77774810791016, 41.146888732910156, -26.046306610107422, 91.72587585449219, 38.23335266113281, 201.27066040039062, 68.56758880615234, 301.01025390625, 188.71591186523438, 17.938941955566406, 108.60931396484375, 76.55732727050781, 90.60619354248047], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000429.npy"} +{"epoch": 0.6485260770975056, "step": 430, "batch_size": 64, "mean": 75.25626373291016, "std": 91.98902130126953, "min": -204.7791748046875, "p10": -17.144953536987302, "median": 69.58518600463867, "p90": 181.925146484375, "max": 312.3896789550781, "pos_frac": 0.8125, "sample": [30.168701171875, 31.51832389831543, -33.316986083984375, 42.54779815673828, -10.314018249511719, 164.81524658203125, 6.211597442626953, 180.97610473632812, 63.90650177001953, 107.41887664794922, -114.5061264038086, 116.87052154541016, 221.15133666992188, 38.402976989746094, 165.5011749267578, 112.90897369384766, 84.16626739501953, 58.25364685058594, 211.1788330078125, -11.661474227905273, 35.97123718261719, 231.77499389648438, 100.2273178100586, 81.05857849121094, 3.3457717895507812, 164.7601776123047, 49.50135803222656, 182.33187866210938, 156.9212188720703, 157.6333465576172, 98.6187744140625, 36.419097900390625, 104.99400329589844, 112.86039733886719, -13.658435821533203, 312.3896789550781, 194.23324584960938, 115.7221450805664, -96.48516845703125, 145.5509033203125, -2.833831787109375, 135.95242309570312, 29.477642059326172, 43.411476135253906, 74.02064514160156, 147.89581298828125, 65.14972686767578, 3.392080307006836, -204.7791748046875, -63.628562927246094, 29.06109619140625, 157.48638916015625, -10.538063049316406, 133.66500854492188, 53.06756591796875, 191.13412475585938, 165.18699645996094, 112.19650268554688, 139.26840209960938, -18.639175415039062, 43.876556396484375, -92.32171630859375, 21.758407592773438, 22.771697998046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000430.npy"} +{"epoch": 0.6500377928949358, "step": 431, "batch_size": 64, "mean": 54.29351806640625, "std": 91.21353149414062, "min": -166.8983154296875, "p10": -40.80088729858399, "median": 47.64607810974121, "p90": 173.83442993164064, "max": 271.4884948730469, "pos_frac": 0.75, "sample": [-164.9361572265625, 131.13088989257812, 77.68234252929688, 22.513648986816406, 48.54403305053711, 145.64309692382812, 248.48468017578125, 111.30193328857422, 169.2572784423828, -7.741952896118164, 12.432449340820312, 185.75865173339844, 136.35885620117188, -33.186065673828125, 32.49744415283203, 27.371023178100586, 271.4884948730469, -4.961540222167969, 180.05075073242188, 66.25088500976562, 52.20984649658203, -128.6598663330078, -40.42369842529297, 94.444091796875, -65.34234619140625, 174.91661071777344, 92.74909973144531, -64.65866088867188, 40.514129638671875, -32.79005432128906, 24.68427848815918, 7.753299713134766, 136.3079833984375, 146.32308959960938, -37.437713623046875, 4.504997253417969, 152.16329956054688, 98.40199279785156, -166.8983154296875, -27.62237548828125, -22.828262329101562, 21.083770751953125, -9.462669372558594, 114.95423889160156, 20.75562286376953, 12.125343322753906, 171.30934143066406, 59.653961181640625, 49.10809326171875, 229.0167236328125, 49.563720703125, 74.77537536621094, 5.8134765625, 46.577152252197266, 64.15070343017578, 20.882511138916016, 59.371734619140625, -44.16643524169922, 2.2936744689941406, 184.79644775390625, 160.583251953125, 81.5613784790039, 46.74812316894531, -40.96253967285156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000431.npy"} +{"epoch": 0.6515495086923658, "step": 432, "batch_size": 64, "mean": 63.599205017089844, "std": 89.57939910888672, "min": -168.30357360839844, "p10": -46.1397590637207, "median": 51.186506271362305, "p90": 178.6106964111328, "max": 227.54917907714844, "pos_frac": 0.734375, "sample": [-7.4344482421875, -27.240983963012695, -54.90586853027344, -168.30357360839844, 10.873720169067383, 78.27825927734375, 11.6778564453125, 86.03945922851562, -12.481788635253906, 131.74620056152344, 87.31062316894531, -123.40352630615234, 28.22991943359375, 25.918075561523438, 159.1094970703125, -98.68121337890625, 42.753929138183594, 3.01446533203125, 217.4877471923828, 178.39474487304688, -2.8337783813476562, 125.54520416259766, 178.7032470703125, -48.572784423828125, 8.05539321899414, 193.16610717773438, 5.848457336425781, 157.71304321289062, 114.93339538574219, -9.188680648803711, -69.99807739257812, 31.98281478881836, -4.575582504272461, -11.347488403320312, 70.85098266601562, 77.63460540771484, 172.2881622314453, 181.90798950195312, 52.6895751953125, 173.24679565429688, -30.23898696899414, 10.28750228881836, 131.43145751953125, 32.57391357421875, 162.43069458007812, 21.015090942382812, 45.90386962890625, 179.69895935058594, 49.68343734741211, 227.54917907714844, 104.21634674072266, 96.41046142578125, 158.91827392578125, 125.54280090332031, 128.59307861328125, 201.68841552734375, -13.037467956542969, 124.48445129394531, 108.63217163085938, 31.82811737060547, -46.86260223388672, 158.3587646484375, -44.453125, 139.26210021972656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000432.npy"} +{"epoch": 0.6530612244897959, "step": 433, "batch_size": 64, "mean": 73.24200439453125, "std": 94.89810180664062, "min": -166.85311889648438, "p10": -38.26738891601562, "median": 52.471811294555664, "p90": 202.50918884277348, "max": 250.28298950195312, "pos_frac": 0.796875, "sample": [50.14876174926758, 119.06668090820312, 23.832935333251953, 64.40438842773438, 14.769889831542969, 54.79486083984375, 2.6491641998291016, 162.61764526367188, -2.001239776611328, 32.13397216796875, 10.434249877929688, 19.850730895996094, 71.80709838867188, 31.055931091308594, 250.28298950195312, -75.5105209350586, 82.53565216064453, 134.3411407470703, 107.97632598876953, -16.787967681884766, 97.83677673339844, 44.56477355957031, -71.19886016845703, 162.72572326660156, -166.85311889648438, 39.57625961303711, 136.9900665283203, 45.59095001220703, 125.28191375732422, 135.00628662109375, 12.628929138183594, 145.78509521484375, -27.28617286682129, 37.03986358642578, 26.955974578857422, -12.694580078125, -50.057552337646484, 24.278060913085938, 129.5115966796875, 221.95785522460938, 243.09776306152344, 233.51614379882812, 236.16226196289062, 44.148162841796875, 207.97450256347656, -152.27041625976562, 25.098691940307617, 116.56074523925781, -34.294288635253906, 166.6778564453125, -8.989219665527344, 121.2565689086914, 156.8057861328125, -39.97014617919922, 16.533302307128906, 163.40914916992188, 161.98216247558594, 99.64273071289062, 223.14047241210938, 172.47042846679688, 189.7567901611328, 10.766647338867188, 180.14796447753906, -42.1783447265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000433.npy"} +{"epoch": 0.654572940287226, "step": 434, "batch_size": 64, "mean": 77.98149871826172, "std": 110.6633529663086, "min": -199.52720642089844, "p10": -50.83959350585937, "median": 85.03753280639648, "p90": 200.94341430664062, "max": 336.3844909667969, "pos_frac": 0.78125, "sample": [-177.49288940429688, 105.8348388671875, 132.0234832763672, 30.219085693359375, 174.6373291015625, 179.921142578125, -55.292118072509766, 201.14669799804688, -85.4014892578125, 123.21380615234375, 25.211618423461914, -44.30809020996094, -199.52720642089844, 58.044525146484375, 149.26385498046875, -53.63880920410156, 138.04258728027344, 112.12423706054688, 199.38238525390625, 187.77816772460938, 85.14636993408203, 2.641855239868164, 329.56536865234375, -3.0413284301757812, -125.1677474975586, 45.93357849121094, 200.46908569335938, 66.47178649902344, -1.4323616027832031, 107.14437866210938, 9.159847259521484, 79.83061218261719, 84.80345153808594, 336.3844909667969, 270.69976806640625, 211.826416015625, -30.433975219726562, 105.37201690673828, 228.255126953125, -11.069740295410156, 168.88015747070312, 4.7349700927734375, 90.71261596679688, 105.84959411621094, 14.93548583984375, 8.370765686035156, 43.42108154296875, 214.53817749023438, 46.33489990234375, 136.1405029296875, 7.171497344970703, 197.69833374023438, -158.2782440185547, -23.977783203125, 9.496879577636719, 91.93659210205078, 147.31077575683594, 171.8480224609375, 117.14703369140625, -18.491226196289062, 84.92869567871094, 128.86135864257812, 157.18728637695312, 50.31641387939453], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000434.npy"} +{"epoch": 0.656084656084656, "step": 435, "batch_size": 64, "mean": 56.183860778808594, "std": 97.27378845214844, "min": -159.50732421875, "p10": -89.3999443054199, "median": 53.747379302978516, "p90": 179.81280517578125, "max": 264.5434265136719, "pos_frac": 0.671875, "sample": [95.33654022216797, 73.70616149902344, -20.851219177246094, 61.31944274902344, -102.85820770263672, 54.87903594970703, -30.700088500976562, 4.402626037597656, 107.70423126220703, -1.319915771484375, 125.8118896484375, 179.41610717773438, -53.8189697265625, 117.40196990966797, 165.18405151367188, 172.385498046875, 23.104270935058594, -68.79170227050781, -11.189300537109375, 202.03790283203125, 59.63178253173828, -15.069656372070312, 165.538818359375, 198.9354705810547, 37.89912414550781, 42.777671813964844, -13.652557373046875, 52.07708740234375, 219.06597900390625, 204.69952392578125, -18.050010681152344, 179.98281860351562, -159.50732421875, 112.46488952636719, 78.18775939941406, 91.24349975585938, -25.910545349121094, -106.2540283203125, 120.23233795166016, 177.878662109375, 125.24407958984375, 197.1453857421875, 264.5434265136719, 85.95958709716797, -5.2689208984375, 52.039207458496094, 34.3798713684082, -40.234657287597656, -21.212566375732422, 171.91024780273438, -108.96934509277344, 30.603012084960938, -28.97895622253418, 89.8967514038086, 127.29345703125, 64.87614440917969, 151.7789764404297, -106.86114501953125, -98.23204803466797, 12.177932739257812, 13.743194580078125, -98.68569946289062, 134.67172241210938, 52.61572265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000435.npy"} +{"epoch": 0.6575963718820862, "step": 436, "batch_size": 64, "mean": 47.17478561401367, "std": 98.29715728759766, "min": -194.85720825195312, "p10": -93.40069046020507, "median": 50.3359317779541, "p90": 173.62600555419922, "max": 256.5484619140625, "pos_frac": 0.71875, "sample": [93.65522766113281, -40.17190170288086, 151.57826232910156, 6.45646858215332, 134.1275634765625, 4.539644241333008, 79.84066009521484, 146.50929260253906, -97.76944732666016, 39.12226104736328, 34.06629943847656, 173.9591064453125, 25.798240661621094, 71.50527954101562, -9.441951751708984, 180.008544921875, 151.6148681640625, 152.11875915527344, -129.09925842285156, 27.886878967285156, 27.771602630615234, 59.64722442626953, -44.09461212158203, 8.743492126464844, 15.075899124145508, 15.201286315917969, 90.76321411132812, -22.52705192565918, 196.47142028808594, 60.35466766357422, 76.85104370117188, -194.85720825195312, -137.8492431640625, 73.41285705566406, -43.57091522216797, 49.779747009277344, 119.80162048339844, -72.77490997314453, -24.008148193359375, -42.79449462890625, 172.84877014160156, 132.55502319335938, 56.25326156616211, 29.097396850585938, -106.27728271484375, 125.5211181640625, -104.3425064086914, 221.8546600341797, 66.96058654785156, 256.5484619140625, 117.03583526611328, 3.3981857299804688, 203.66961669921875, 111.42254638671875, -12.281227111816406, 50.89211654663086, 166.1976318359375, -83.20692443847656, 93.34073638916016, 8.256990432739258, -63.15606689453125, 202.49082946777344, -109.4283447265625, 71.83257293701172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000436.npy"} +{"epoch": 0.6591080876795162, "step": 437, "batch_size": 64, "mean": 64.58769226074219, "std": 130.37399291992188, "min": -283.3462219238281, "p10": -73.59621963500976, "median": 50.187875747680664, "p90": 220.3121368408203, "max": 525.9622802734375, "pos_frac": 0.6875, "sample": [525.9622802734375, 220.37808227539062, 142.62396240234375, 83.37484741210938, 51.670875549316406, -58.59636306762695, 176.29127502441406, 114.57118225097656, 245.75177001953125, 31.827167510986328, 157.99708557128906, -18.645751953125, -283.3462219238281, 83.62821960449219, -106.07890319824219, -41.79467010498047, 93.08039855957031, 48.70487594604492, 134.21829223632812, -130.25375366210938, 240.94468688964844, 247.21063232421875, 146.27069091796875, -20.033977508544922, -28.148212432861328, 114.66233825683594, 220.15826416015625, -68.71722412109375, -158.451171875, -75.68721771240234, -25.73504638671875, -55.447425842285156, 74.96190643310547, -22.456382751464844, -15.472526550292969, 204.96658325195312, 62.30799865722656, -44.592529296875, 85.81165313720703, 61.750980377197266, 31.62139892578125, 1.6538238525390625, -50.58794021606445, 149.3734588623047, 148.71453857421875, 177.90103149414062, 25.14795684814453, 168.0, 206.7583770751953, -121.65189361572266, 6.536748886108398, 25.045055389404297, 142.5517578125, 236.71937561035156, 45.127220153808594, 27.96228790283203, 19.396286010742188, 143.33692932128906, -90.85751342773438, 1.5198631286621094, -10.948915481567383, 57.53797912597656, 13.455924987792969, 363.6296081542969], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000437.npy"} +{"epoch": 0.6606198034769464, "step": 438, "batch_size": 64, "mean": 70.64216613769531, "std": 102.86995697021484, "min": -126.7245864868164, "p10": -43.34255332946777, "median": 72.10211944580078, "p90": 175.24692077636718, "max": 411.54364013671875, "pos_frac": 0.734375, "sample": [221.70066833496094, -47.88275146484375, 411.54364013671875, -101.06150817871094, 8.605762481689453, -29.669837951660156, -33.289737701416016, 110.53631591796875, 30.005647659301758, 320.2125244140625, -43.984004974365234, 244.68276977539062, -41.84583282470703, 17.002403259277344, 142.77789306640625, 0.7434844970703125, 93.31693267822266, -126.7245864868164, 139.1829376220703, 93.52155303955078, 137.52935791015625, 143.0272216796875, -20.92425537109375, 76.22250366210938, 74.98927307128906, 136.61395263671875, 89.00755310058594, 5.901943206787109, 161.73204040527344, 112.54862976074219, 127.71200561523438, -30.430328369140625, 134.12542724609375, 16.80567741394043, -14.655494689941406, 168.56973266601562, 174.89096069335938, -24.170774459838867, 64.30105590820312, 29.406782150268555, 119.123046875, -44.83219909667969, 69.63317108154297, 302.027099609375, 133.8798065185547, -40.184661865234375, 80.3368911743164, 86.18292999267578, -55.81976318359375, 41.38002395629883, -11.117198944091797, 8.168977737426758, 37.20002746582031, 175.39947509765625, 144.06082153320312, -94.36011505126953, 149.421630859375, 74.5710678100586, -12.578041076660156, 189.5343780517578, 9.640628814697266, 10.998170852661133, 116.457275390625, 59.397438049316406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000438.npy"} +{"epoch": 0.6621315192743764, "step": 439, "batch_size": 64, "mean": 70.016845703125, "std": 93.20208740234375, "min": -267.5533447265625, "p10": -18.742807006835935, "median": 54.75010871887207, "p90": 210.70955505371094, "max": 297.65374755859375, "pos_frac": 0.75, "sample": [100.51996612548828, 10.27664566040039, 23.750890731811523, -42.287376403808594, 81.13430786132812, 139.73910522460938, 87.76177978515625, 29.180419921875, -12.816261291503906, 208.37644958496094, 218.1370086669922, 16.855161666870117, 199.50848388671875, 28.972488403320312, 58.892730712890625, 223.9041748046875, 297.65374755859375, 75.22101593017578, 162.06671142578125, -2.1319427490234375, 17.330303192138672, 111.37566375732422, 82.53197479248047, -0.4805717468261719, 163.60147094726562, 21.21593475341797, 215.36874389648438, -54.40547561645508, 42.508026123046875, 211.70945739746094, -25.25703239440918, 216.00599670410156, -13.577613830566406, 12.076416015625, 123.51477813720703, 22.927780151367188, -21.949134826660156, 18.46955108642578, -267.5533447265625, -4.453891754150391, -1.040985107421875, 55.77384948730469, 127.08598327636719, 110.42947387695312, 58.41958999633789, 184.6103057861328, 100.87307739257812, 240.01513671875, 45.76130676269531, 38.158111572265625, 53.72636795043945, 85.45310974121094, -4.378028869628906, 136.14352416992188, 19.757387161254883, 175.80735778808594, 108.52020263671875, -9.28167724609375, -18.96318817138672, -19.642959594726562, 23.69460678100586, 131.42039489746094, 81.2890396118164, -18.22858428955078], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000439.npy"} +{"epoch": 0.6636432350718064, "step": 440, "batch_size": 64, "mean": 65.23944091796875, "std": 93.48800659179688, "min": -118.03546142578125, "p10": -54.37421836853026, "median": 51.7603645324707, "p90": 186.16954650878907, "max": 294.603271484375, "pos_frac": 0.75, "sample": [-40.28355407714844, 37.65141296386719, 62.64453125, 261.2852478027344, 187.22000122070312, 294.603271484375, 179.54248046875, -13.619720458984375, 183.54550170898438, 62.493621826171875, 47.38953399658203, 186.9305419921875, 49.99291229248047, 127.94644165039062, 97.94834899902344, 86.52557373046875, 163.7868194580078, -65.15738677978516, -82.5718765258789, 52.9127197265625, 50.608009338378906, -15.752527236938477, 14.245216369628906, 121.5602035522461, 64.97838592529297, 137.84490966796875, 79.86099243164062, 136.6458740234375, 28.035367965698242, 27.190719604492188, 12.54871940612793, 65.1468734741211, 92.9566650390625, -4.889820098876953, 19.09180450439453, 40.83891296386719, 45.890968322753906, -115.11540985107422, 30.9940128326416, 188.63136291503906, 151.84767150878906, 182.7089385986328, -24.401687622070312, 184.39389038085938, -60.4130744934082, 10.413848876953125, 192.48634338378906, 31.035545349121094, 165.60174560546875, -15.244132995605469, -25.66824722290039, -88.66553497314453, 91.10408020019531, -34.49449920654297, 64.26287078857422, 29.30362319946289, -23.009037017822266, -98.4946060180664, 173.80374145507812, -118.03546142578125, 17.440872192382812, 181.19424438476562, 189.74301147460938, 96.3120346069336], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000440.npy"} +{"epoch": 0.6651549508692366, "step": 441, "batch_size": 64, "mean": 74.07671356201172, "std": 85.20906066894531, "min": -92.88330841064453, "p10": -32.83969497680663, "median": 64.45094299316406, "p90": 188.79526672363284, "max": 245.47157287597656, "pos_frac": 0.78125, "sample": [164.54530334472656, 57.009544372558594, 195.24693298339844, 169.1885986328125, -7.881856918334961, 130.5419921875, 164.36422729492188, -11.084815979003906, 107.8155517578125, -35.96046447753906, -45.999263763427734, 137.73922729492188, -25.557899475097656, 199.23666381835938, 55.199275970458984, 26.825714111328125, 157.275390625, -7.7449951171875, -53.17085647583008, 154.21104431152344, 55.236358642578125, 116.18547821044922, 229.8604736328125, 199.59703063964844, 40.210113525390625, 185.1705322265625, -1.1916618347167969, -12.938804626464844, 245.47157287597656, 65.27227783203125, 24.76032257080078, 57.251922607421875, -89.2269515991211, 80.98043823242188, 128.27276611328125, 4.115142822265625, -65.57630920410156, 178.66917419433594, 117.25811767578125, 55.70367431640625, 63.629608154296875, 163.62261962890625, 50.764122009277344, 16.159801483154297, 119.88130187988281, 92.13900756835938, 123.46546936035156, 12.971626281738281, 96.51264953613281, -24.946334838867188, 145.01895141601562, 9.744363784790039, 190.34872436523438, 226.2242431640625, -59.13087463378906, 72.01372528076172, 72.62262725830078, 1.6140823364257812, 9.481651306152344, 105.0406265258789, -92.88330841064453, 42.22808074951172, 145.676025390625, 11.830156326293945], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000441.npy"} +{"epoch": 0.6666666666666666, "step": 442, "batch_size": 64, "mean": 64.3536148071289, "std": 87.98307800292969, "min": -135.9493408203125, "p10": -38.27338409423827, "median": 54.061912536621094, "p90": 171.16529235839843, "max": 272.94873046875, "pos_frac": 0.75, "sample": [45.517234802246094, 234.40899658203125, 83.47898864746094, -73.89701843261719, 56.81117248535156, 256.1999206542969, 210.30465698242188, 169.4167938232422, 49.2450065612793, 137.84121704101562, 59.87546157836914, 124.95481872558594, 162.19589233398438, 15.7349853515625, 140.7318115234375, 120.05216979980469, -2.3797454833984375, 29.776580810546875, 51.312652587890625, -21.08411407470703, -80.64413452148438, 104.85539245605469, 81.05902099609375, -25.331085205078125, 94.70343017578125, 5.809074401855469, -105.20763397216797, 96.76239013671875, -11.264169692993164, 179.10902404785156, 1.8953399658203125, 62.738014221191406, 126.20316314697266, 171.67092895507812, 236.20904541015625, 83.51354217529297, -6.886390686035156, -1.6421928405761719, 272.94873046875, 102.77897644042969, 74.94583892822266, 2.4346237182617188, 6.860868453979492, 15.457805633544922, -0.7027187347412109, -135.9493408203125, -44.059539794921875, 82.53024291992188, -43.82008361816406, 20.228378295898438, 143.36248779296875, 120.0665512084961, -70.98138427734375, 35.91742706298828, 46.837860107421875, -11.943822860717773, 48.86756896972656, 33.747314453125, 93.86700439453125, 169.9854736328125, 135.22010803222656, 142.0944061279297, -19.30188751220703, 33.188316345214844], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000442.npy"} +{"epoch": 0.6681783824640968, "step": 443, "batch_size": 64, "mean": 96.82554626464844, "std": 125.316162109375, "min": -165.0845947265625, "p10": -24.067544746398923, "median": 101.09907531738281, "p90": 201.1503829956055, "max": 765.289306640625, "pos_frac": 0.8125, "sample": [9.256549835205078, 163.7683563232422, 123.41923522949219, 169.45338439941406, 34.95551300048828, 116.18124389648438, 192.99559020996094, -76.05177307128906, 74.23088073730469, 143.7345428466797, 14.212486267089844, -1.430856704711914, 57.08628463745117, 164.09695434570312, 51.69997787475586, 204.76083374023438, 1.6401920318603516, 156.61740112304688, 74.86017608642578, 162.14508056640625, 765.289306640625, -165.0845947265625, -31.25151824951172, 148.17041015625, -19.805688858032227, 270.5048828125, 160.40087890625, 61.96105194091797, 185.19387817382812, 153.17132568359375, 175.81919860839844, 105.44778442382812, 96.7503662109375, 138.8120880126953, 143.82101440429688, 184.22445678710938, 152.5469970703125, 187.75814819335938, 124.92451477050781, 204.64529418945312, 16.37415313720703, 148.4413604736328, 1.910299301147461, 73.7513198852539, 162.89866638183594, 177.0019073486328, -11.278556823730469, 20.592559814453125, 19.54821014404297, 10.771856307983398, 14.38455581665039, -64.3918228149414, 228.9569091796875, 35.42439270019531, 43.444515228271484, -8.733516693115234, -132.51856994628906, -26.32159423828125, 207.9571075439453, 89.70928192138672, -25.894054412841797, -15.483161926269531, 137.8531036376953, 211.50389099121094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000443.npy"} +{"epoch": 0.6696900982615268, "step": 444, "batch_size": 64, "mean": 39.99380874633789, "std": 119.8778305053711, "min": -195.3801727294922, "p10": -84.7328643798828, "median": 23.63353443145752, "p90": 174.37715759277344, "max": 549.2127685546875, "pos_frac": 0.625, "sample": [138.23577880859375, -17.252574920654297, -64.82390594482422, 5.213274002075195, 48.49114990234375, 45.65578079223633, 117.96543884277344, -70.71064758300781, 2.2456016540527344, 100.10327911376953, -4.219429016113281, 73.46123504638672, -90.74238586425781, -195.3801727294922, 181.91590881347656, 17.670928955078125, 74.74095153808594, 91.225830078125, 50.7691764831543, -155.07470703125, 244.6685791015625, 207.167724609375, -4.706855773925781, -19.27801513671875, -3.0965576171875, 28.45795440673828, 12.694679260253906, -22.975950241088867, -49.662841796875, -44.848175048828125, 147.69833374023438, 15.283100128173828, 109.23326873779297, 27.451278686523438, 27.5701904296875, 170.22479248046875, 38.41706848144531, 175.04266357421875, -20.824058532714844, -183.7213134765625, 27.029632568359375, 549.2127685546875, -168.09429931640625, -36.45543670654297, 33.985137939453125, 170.2084197998047, 137.20486450195312, 303.52960205078125, 182.12710571289062, 68.70773315429688, 12.334360122680664, 95.26994323730469, -17.252117156982422, 0.080230712890625, 172.82431030273438, -45.686363220214844, -45.223026275634766, 20.237436294555664, 49.89271545410156, -106.97968292236328, -131.34332275390625, 113.54588317871094, -15.76463508605957, -14.073999404907227], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000444.npy"} +{"epoch": 0.671201814058957, "step": 445, "batch_size": 64, "mean": 71.56088256835938, "std": 91.84803009033203, "min": -136.36492919921875, "p10": -26.13474597930908, "median": 66.2099494934082, "p90": 179.50385131835938, "max": 309.1935729980469, "pos_frac": 0.78125, "sample": [30.03939437866211, 230.24472045898438, 11.639335632324219, -10.113204956054688, -103.44589233398438, 49.09857177734375, 150.23712158203125, 111.07432556152344, 59.73614501953125, 70.6954345703125, -3.33197021484375, -136.36492919921875, -21.495513916015625, 52.95066833496094, -83.71878051757812, 147.88348388671875, -27.364517211914062, 19.946868896484375, 35.477996826171875, 87.67086029052734, -8.252487182617188, 177.4710693359375, -115.57146453857422, 62.299285888671875, -123.6581039428711, 179.60186767578125, 11.829780578613281, 171.68002319335938, 60.395973205566406, 10.678436279296875, -86.73921203613281, -9.190923690795898, 90.05890655517578, 194.4344940185547, 162.41802978515625, 117.99104309082031, 179.275146484375, 158.1463165283203, 80.29668426513672, 20.332969665527344, 185.14398193359375, 117.13428497314453, 65.96556091308594, 195.04176330566406, 96.53965759277344, 177.99037170410156, 309.1935729980469, 10.123659133911133, 179.9552459716797, -23.26527976989746, 108.427001953125, -1.4887619018554688, 84.05731201171875, 64.60306549072266, 90.55924224853516, 22.897064208984375, 154.62408447265625, 168.65554809570312, 54.77385711669922, 66.45433807373047, 103.87354278564453, 108.52538299560547, 175.47665405273438, 60.27696990966797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000445.npy"} +{"epoch": 0.672713529856387, "step": 446, "batch_size": 64, "mean": 85.02166748046875, "std": 91.10144805908203, "min": -120.61662292480469, "p10": -20.906588745117183, "median": 87.22777557373047, "p90": 198.98925476074223, "max": 316.3750305175781, "pos_frac": 0.78125, "sample": [10.340681076049805, -23.29975128173828, -120.61662292480469, 87.01699829101562, 55.397743225097656, 134.7487335205078, 87.43855285644531, -13.958854675292969, 316.3750305175781, 94.42332458496094, 30.89139175415039, 136.10244750976562, 109.00547790527344, 119.57450103759766, 105.5211410522461, 190.71046447753906, 47.904640197753906, 97.49514770507812, 92.5036392211914, 210.32644653320312, 3.459705352783203, 156.90821838378906, -23.911529541015625, 130.49435424804688, 179.67593383789062, 127.7315902709961, 11.986835479736328, 169.07980346679688, 257.578857421875, -17.449623107910156, 126.23607635498047, 12.461233139038086, -88.19713592529297, -11.990379333496094, -22.388145446777344, 11.82331657409668, 21.64080047607422, -42.51760482788086, 82.36710357666016, 140.00729370117188, 51.86964416503906, 231.5142364501953, 202.5373077392578, 132.36163330078125, 269.9586181640625, -2.944570541381836, 221.1558837890625, 172.2809600830078, -8.015741348266602, 44.483951568603516, 75.13533782958984, 48.27717590332031, 159.94830322265625, 152.84136962890625, 148.5784149169922, 176.47802734375, 33.322818756103516, 59.31377410888672, -7.43206787109375, 152.51626586914062, -7.7904205322265625, 21.70577621459961, -43.37266540527344, 163.76527404785156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000446.npy"} +{"epoch": 0.674225245653817, "step": 447, "batch_size": 64, "mean": 88.62079620361328, "std": 104.56442260742188, "min": -159.66925048828125, "p10": -44.3318000793457, "median": 89.88582229614258, "p90": 202.03457946777345, "max": 418.76263427734375, "pos_frac": 0.765625, "sample": [253.340087890625, 207.3826141357422, 139.03707885742188, 27.59160614013672, 52.9891357421875, -48.25355529785156, 107.14653015136719, -15.4351806640625, -84.09400177001953, 79.83354187011719, 90.45026397705078, 118.1135482788086, 14.640846252441406, 78.87442016601562, 161.3326416015625, 169.40020751953125, 72.00942993164062, 95.93374633789062, 61.68711471557617, 79.4798583984375, 60.190338134765625, -33.54712677001953, -9.428741455078125, -35.18103790283203, 13.349838256835938, 245.48696899414062, 195.91139221191406, 149.4374542236328, 89.32138061523438, -53.581787109375, 97.56060791015625, 65.70201110839844, 161.740478515625, 250.42181396484375, 67.95457458496094, 152.23373413085938, 202.73919677734375, 155.39501953125, 197.64813232421875, -10.04599380493164, 28.55413818359375, 48.24815368652344, 187.20053100585938, -83.84786224365234, -14.349414825439453, -123.72738647460938, 24.22734832763672, 160.70254516601562, 162.13916015625, 138.82476806640625, -7.0778656005859375, 153.15521240234375, -77.78132629394531, 213.06338500976562, 99.25260925292969, 79.78240966796875, 113.62150573730469, -159.66925048828125, 141.86090087890625, 418.76263427734375, 198.05126953125, -17.403587341308594, 200.39047241210938, 162.9822998046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000447.npy"} +{"epoch": 0.6757369614512472, "step": 448, "batch_size": 64, "mean": 65.66925811767578, "std": 91.83324432373047, "min": -176.28016662597656, "p10": -44.775712585449206, "median": 58.747859954833984, "p90": 185.5690643310547, "max": 237.924560546875, "pos_frac": 0.8125, "sample": [175.58258056640625, -115.74869537353516, 186.7325439453125, 59.31346130371094, 48.912574768066406, 108.52130889892578, -14.1966552734375, 141.82510375976562, 59.844970703125, 109.1633071899414, 234.9764862060547, 108.7393569946289, 93.41062927246094, 19.444366455078125, 122.03614044189453, 203.2967071533203, -25.044055938720703, 202.0557861328125, 59.835472106933594, -4.823825836181641, 3.6446380615234375, 148.35414123535156, 36.363319396972656, 108.97627258300781, 98.33738708496094, 2.3294944763183594, 166.3202667236328, 42.549041748046875, 27.617958068847656, -50.04499816894531, -50.561988830566406, 29.190292358398438, 36.533668518066406, 13.147796630859375, -5.706718444824219, 83.27719116210938, -51.07122802734375, 26.00993537902832, 237.924560546875, -32.480712890625, 2.804403305053711, -176.28016662597656, 78.35940551757812, 30.863388061523438, 147.12469482421875, 130.11244201660156, 87.19407653808594, 149.59735107421875, 58.18225860595703, 12.716279983520508, -54.253196716308594, 37.69401550292969, 65.88360595703125, 168.49874877929688, 6.205772399902344, 182.85427856445312, 3.4103240966796875, -172.13925170898438, 194.1196746826172, 178.35467529296875, 149.23220825195312, 23.898162841796875, 26.39167022705078, 227.41970825195312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000448.npy"} +{"epoch": 0.6772486772486772, "step": 449, "batch_size": 64, "mean": 40.81220245361328, "std": 100.51680755615234, "min": -207.3458251953125, "p10": -84.07744369506835, "median": 32.503347396850586, "p90": 174.9749206542969, "max": 310.19781494140625, "pos_frac": 0.671875, "sample": [3.311624526977539, 240.2779541015625, -96.41450500488281, 89.65927124023438, 49.680503845214844, -28.774856567382812, 5.5567169189453125, 308.4183349609375, 57.03807830810547, 184.25961303710938, 101.1325912475586, 90.62670135498047, 20.96776580810547, -66.78646850585938, -52.347259521484375, 32.196380615234375, 226.17263793945312, 12.834312438964844, -11.590560913085938, 51.88858413696289, -88.87924194335938, 39.84160614013672, 167.56399536132812, 178.15103149414062, -128.79605102539062, 24.04355239868164, 107.99555206298828, -207.3458251953125, 108.16271209716797, 46.01042175292969, 107.93763732910156, -88.59019470214844, 14.42657470703125, -116.19888305664062, 310.19781494140625, 5.087493896484375, 219.95436096191406, -13.97384262084961, 32.8103141784668, 53.84527587890625, 27.927526473999023, -43.44667053222656, -38.86249923706055, 35.914886474609375, -59.25298309326172, 84.35690307617188, -1.1968536376953125, 94.21865844726562, 19.37514877319336, 60.32695770263672, 134.66970825195312, -5.5804595947265625, 78.43949127197266, 33.95829772949219, -88.69308471679688, 130.79669189453125, 136.71153259277344, -73.54769134521484, 126.01992797851562, -22.60558319091797, 42.720855712890625, -28.764801025390625, 16.179798126220703, -38.03630828857422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000449.npy"} +{"epoch": 0.6787603930461074, "step": 450, "batch_size": 64, "mean": 61.01060104370117, "std": 94.25581359863281, "min": -135.37486267089844, "p10": -51.85222129821777, "median": 54.34152412414551, "p90": 191.91402282714847, "max": 381.4224853515625, "pos_frac": 0.71875, "sample": [-2.140748977661133, -14.97467041015625, -32.522071838378906, -32.028297424316406, 213.68099975585938, 216.16452026367188, 63.33778381347656, 96.93203735351562, 125.92219543457031, 197.67718505859375, 123.5942153930664, 44.97380065917969, 66.36668395996094, 55.54869079589844, 9.309318542480469, 204.6475830078125, 20.8505859375, 57.11271667480469, 144.9705047607422, 204.4046630859375, 1.8046226501464844, 53.13435745239258, 3.8958358764648438, -38.51592254638672, 74.77250671386719, -52.003604888916016, 145.88385009765625, 173.53192138671875, 72.78974151611328, 5.479913711547852, -6.735466003417969, 184.96356201171875, 59.0484619140625, 78.21084594726562, -135.37486267089844, 194.89279174804688, 144.97940063476562, -3.6887550354003906, 138.4398193359375, 57.65786361694336, 35.163089752197266, 145.05694580078125, 103.7397232055664, -80.86717987060547, 38.518280029296875, 26.84670066833496, 126.27996826171875, 13.5953369140625, 108.98977661132812, -51.9109992980957, -2.0991573333740234, -82.44828033447266, 17.303972244262695, 74.43063354492188, 381.4224853515625, 4.964691162109375, -77.22621154785156, 142.6111602783203, -51.71507263183594, 175.53050231933594, -16.628890991210938, 45.8209228515625, -63.05348205566406, -26.64092254638672], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000450.npy"} +{"epoch": 0.6802721088435374, "step": 451, "batch_size": 64, "mean": 54.11791229248047, "std": 90.43482971191406, "min": -154.17974853515625, "p10": -45.23507804870605, "median": 42.8194465637207, "p90": 173.91231079101564, "max": 290.4814758300781, "pos_frac": 0.703125, "sample": [110.37458801269531, -10.799880981445312, 59.77201843261719, 45.50636291503906, 196.6733856201172, 28.03271484375, -154.17974853515625, 17.88690185546875, 35.33251190185547, 183.66766357421875, 82.25016784667969, 102.41690826416016, 285.69293212890625, 29.545135498046875, 136.40989685058594, 64.84945678710938, 71.98998260498047, 12.067024230957031, 65.35594177246094, 93.13794708251953, 33.07514190673828, -132.79931640625, -85.82176208496094, 290.4814758300781, 152.48251342773438, -65.12615966796875, 152.2718505859375, -50.1739501953125, -44.03636169433594, -10.227180480957031, 40.132530212402344, 176.69476318359375, 5.308506011962891, 12.244039535522461, 102.29838562011719, 53.996498107910156, 167.419921875, 112.52554321289062, -18.303497314453125, 36.90648651123047, -3.028076171875, 110.97642517089844, 6.725212097167969, 15.016120910644531, 111.14168548583984, 58.86383056640625, 7.562793731689453, 153.0057830810547, 217.73533630371094, 66.65240478515625, -0.36517333984375, 108.88846588134766, 65.93659973144531, -32.633872985839844, -14.615646362304688, -14.929458618164062, 157.7495880126953, -2.9349632263183594, -8.740625381469727, 58.89564895629883, 192.15652465820312, -45.74881362915039, -94.14862060546875, -35.94635772705078], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000451.npy"} +{"epoch": 0.6817838246409675, "step": 452, "batch_size": 64, "mean": 59.5404167175293, "std": 101.74258422851562, "min": -160.1061248779297, "p10": -58.41956787109374, "median": 39.57575035095215, "p90": 178.33256988525392, "max": 332.978759765625, "pos_frac": 0.71875, "sample": [32.81635284423828, 29.060325622558594, -13.846376419067383, 92.84260559082031, -17.376502990722656, 125.21580505371094, 25.18183135986328, 149.47438049316406, 137.5883026123047, 180.32305908203125, 31.588817596435547, 249.9179229736328, -147.40655517578125, 141.90814208984375, 170.153076171875, 86.77336883544922, 264.24774169921875, 98.27447509765625, 110.01272583007812, 190.38491821289062, 17.87000274658203, 10.768356323242188, 159.95211791992188, 102.82444763183594, 8.421810150146484, -29.07634925842285, 146.40280151367188, 63.8289794921875, -8.607322692871094, 22.78839874267578, -3.3273277282714844, 332.978759765625, -127.09188842773438, 156.7855224609375, -49.84651184082031, 99.37579345703125, 24.282859802246094, 95.96882629394531, -81.0967788696289, 34.414913177490234, 98.1307144165039, -121.10665893554688, 88.23644256591797, 44.73658752441406, 67.80859375, -123.73554992675781, 201.79458618164062, 139.3680419921875, 27.043899536132812, 3.1107444763183594, 129.8299102783203, -18.671031951904297, 23.541025161743164, 121.29660034179688, -1.1539649963378906, 14.804550170898438, -36.24437713623047, 173.68809509277344, 146.99839782714844, -62.09373474121094, -22.433364868164062, 202.65142822265625, -160.1061248779297, -41.659820556640625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000452.npy"} +{"epoch": 0.6832955404383976, "step": 453, "batch_size": 64, "mean": 65.1996078491211, "std": 104.884765625, "min": -154.29644775390625, "p10": -72.79046401977537, "median": 51.79945373535156, "p90": 195.28594360351562, "max": 287.165771484375, "pos_frac": 0.75, "sample": [53.416831970214844, 91.47947692871094, 40.9776611328125, 6.51641845703125, 287.165771484375, 51.69892883300781, -11.873550415039062, 171.44351196289062, 139.24415588378906, -13.054450988769531, -102.3353271484375, 23.250144958496094, 187.66656494140625, 0.9970626831054688, 121.1371841430664, 159.90634155273438, 260.3564453125, 14.767822265625, 173.10891723632812, 53.1324577331543, 8.683820724487305, -2.8765716552734375, 89.66743469238281, 22.825332641601562, -19.33026123046875, -154.29644775390625, -147.17855834960938, 175.35137939453125, 5.417228698730469, 173.00704956054688, 195.13162231445312, 41.834266662597656, 67.029541015625, -114.77906799316406, 2.9614219665527344, 52.561790466308594, 160.06915283203125, 96.74691772460938, -80.1141128540039, 216.32540893554688, 84.34026336669922, 257.53460693359375, -35.504573822021484, -5.607166290283203, -55.70195007324219, 51.89997863769531, -102.57271575927734, 39.992313385009766, 156.64959716796875, 37.43537902832031, 8.67264175415039, -13.722721099853516, 29.82484245300293, 93.58772277832031, 87.41111755371094, -98.10934448242188, -21.970088958740234, 275.06805419921875, 195.35208129882812, 129.44549560546875, 174.18780517578125, 171.62350463867188, 203.09915161132812, 11.799163818359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000453.npy"} +{"epoch": 0.6848072562358276, "step": 454, "batch_size": 64, "mean": 71.45110321044922, "std": 104.32642364501953, "min": -259.8680419921875, "p10": -32.468282318115236, "median": 76.56587219238281, "p90": 177.82530822753907, "max": 397.11798095703125, "pos_frac": 0.796875, "sample": [170.69680786132812, -56.36701202392578, -31.60454559326172, 150.51983642578125, -259.8680419921875, -32.83845520019531, 137.43638610839844, 8.304740905761719, 37.84891128540039, 45.247337341308594, 76.00111389160156, 162.6485137939453, 5.1429443359375, -173.58450317382812, 204.3018798828125, 102.12651062011719, 151.2528076171875, 68.65126037597656, 178.68161010742188, 280.1951904296875, -26.9208984375, -148.50857543945312, 7.67772102355957, 142.9481201171875, 187.50537109375, 397.11798095703125, -12.605165481567383, 33.42005920410156, 128.89974975585938, 24.658348083496094, 28.196243286132812, 117.72960662841797, 175.8272705078125, 19.77977752685547, 130.62521362304688, 112.56440734863281, 205.9151611328125, -40.57669448852539, 193.41981506347656, 77.90050506591797, 77.68708801269531, 127.49897766113281, 27.55120086669922, 7.142352104187012, 9.461502075195312, 46.385520935058594, 164.28504943847656, 77.13063049316406, -6.157621383666992, 31.275909423828125, 121.23991394042969, 126.35189819335938, 9.923900604248047, 174.1334228515625, 122.50835418701172, 25.09716796875, 130.35769653320312, 105.63401794433594, -14.374954223632812, 162.5526580810547, 97.11591339111328, 48.522151947021484, -30.418630599975586, -48.40065002441406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000454.npy"} +{"epoch": 0.6863189720332578, "step": 455, "batch_size": 64, "mean": 69.42816925048828, "std": 97.40458679199219, "min": -136.30093383789062, "p10": -46.69647827148437, "median": 58.64081573486328, "p90": 202.1236114501953, "max": 339.5719909667969, "pos_frac": 0.765625, "sample": [85.31002807617188, 73.90631103515625, 62.788230895996094, 242.86871337890625, 77.60062408447266, 2.5724430084228516, 232.5228729248047, 157.5778350830078, -64.16129302978516, 33.593345642089844, 112.87261962890625, -96.60570526123047, 102.19132995605469, 87.43714904785156, 79.56201171875, 305.7575378417969, -30.10057830810547, 4.4568023681640625, 10.497749328613281, -50.027198791503906, 202.68649291992188, 8.425569534301758, 157.70944213867188, -9.532485961914062, -26.761390686035156, 201.8447265625, -6.8514862060546875, 155.67630004882812, 179.4645233154297, 16.793052673339844, 41.958072662353516, 54.49340057373047, 202.24313354492188, 101.20124053955078, 44.63945770263672, -9.610130310058594, -86.75186157226562, 107.1321792602539, 154.40298461914062, 167.63497924804688, -27.357303619384766, 41.692955017089844, 149.23220825195312, -136.30093383789062, 138.8327178955078, 185.2710418701172, 74.20992279052734, 71.45458221435547, 19.505279541015625, 16.401824951171875, 213.873779296875, 53.293846130371094, 4.735683441162109, 122.35482788085938, 4.8387908935546875, -4.353057861328125, 111.61747741699219, -38.92479705810547, -51.36375427246094, 42.883331298828125, 64.00296783447266, 16.659488677978516, -56.14908218383789, 339.5719909667969], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000455.npy"} +{"epoch": 0.6878306878306878, "step": 456, "batch_size": 64, "mean": 55.658138275146484, "std": 90.61427307128906, "min": -157.12176513671875, "p10": -24.805916213989256, "median": 45.55440139770508, "p90": 170.24640502929688, "max": 334.2774353027344, "pos_frac": 0.75, "sample": [91.32856750488281, -21.35126495361328, 11.193933486938477, 77.55152130126953, 334.2774353027344, 3.464977264404297, -4.392742156982422, 108.06021118164062, -18.56039047241211, 116.60050964355469, 57.88059997558594, 83.11006164550781, 258.57672119140625, 132.8048095703125, 12.476608276367188, 13.055740356445312, 54.564544677734375, 82.0655746459961, 11.366910934448242, 168.0096435546875, -72.58802795410156, 46.203575134277344, 68.35138702392578, -25.54082489013672, 158.55809020996094, -32.011505126953125, -157.12176513671875, -137.4319305419922, 95.63410186767578, -22.827545166015625, -0.4861946105957031, 8.311660766601562, 37.391693115234375, 99.31013488769531, 35.13764190673828, 94.34953308105469, 55.91368103027344, -2.5446014404296875, 77.25698852539062, 7.812679290771484, -14.366491317749023, 25.848346710205078, 73.98400115966797, 44.90522766113281, -84.88761901855469, -128.93618774414062, 89.12644958496094, 28.121124267578125, -19.47771453857422, -23.091129302978516, 171.20501708984375, 182.11883544921875, 30.369552612304688, 230.1090545654297, 51.138336181640625, 116.61180114746094, 178.92579650878906, 6.455970764160156, 144.0849609375, 153.6015625, 40.4384765625, 33.47230529785156, 97.67198181152344, 228.92845153808594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000456.npy"} +{"epoch": 0.6893424036281179, "step": 457, "batch_size": 64, "mean": 58.38153839111328, "std": 87.96611785888672, "min": -290.15087890625, "p10": -27.628947257995602, "median": 41.063228607177734, "p90": 169.89544677734375, "max": 255.7285919189453, "pos_frac": 0.8125, "sample": [53.379737854003906, -29.433517456054688, 11.25552749633789, 122.9483642578125, 72.94671630859375, -99.42341613769531, 26.77231216430664, 27.36810302734375, 38.72455596923828, 25.103851318359375, 38.71394348144531, 202.0455322265625, 111.72892761230469, 118.76610565185547, -15.659378051757812, -75.3214111328125, 187.8701934814453, 32.916015625, -0.1710796356201172, 139.49281311035156, 161.73995971679688, 35.10160827636719, 255.7285919189453, 43.40190124511719, -14.72531509399414, 21.815595626831055, -23.418283462524414, 161.1558837890625, 118.54324340820312, 22.077564239501953, -99.54084777832031, -87.4244155883789, 24.12775421142578, 77.54827880859375, 37.07881546020508, -48.18827819824219, -290.15087890625, 101.30958557128906, 90.37032318115234, 23.989105224609375, 74.60027313232422, 13.90032958984375, 177.9287109375, 108.33518981933594, -4.184520721435547, 25.48444366455078, 133.5071563720703, 33.08332824707031, 12.382835388183594, 172.8121337890625, 126.8942642211914, 76.24433135986328, 170.40951538085938, 77.59284210205078, 91.61561584472656, 182.4608612060547, 168.69595336914062, 77.15599822998047, 30.077478408813477, 161.56373596191406, 112.28433227539062, 12.527450561523438, 70.66567993164062, 29.81666374206543], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000457.npy"} +{"epoch": 0.690854119425548, "step": 458, "batch_size": 64, "mean": 78.98739624023438, "std": 111.53781127929688, "min": -242.5091552734375, "p10": -43.18722534179685, "median": 68.87090301513672, "p90": 208.28358001708986, "max": 386.8734130859375, "pos_frac": 0.796875, "sample": [112.65864562988281, 157.49948120117188, 15.496915817260742, 173.70062255859375, 76.28540802001953, 313.1656494140625, 78.38229370117188, -242.5091552734375, -2.1405715942382812, 10.537246704101562, -6.0198974609375, -102.53044128417969, -11.014083862304688, 134.4591827392578, 191.76611328125, 28.31160545349121, 71.78478240966797, 164.00196838378906, 66.84212493896484, 130.7630157470703, 45.497581481933594, 197.9579620361328, 70.8996810913086, -16.703468322753906, 217.9639434814453, 33.89231872558594, 251.85768127441406, 46.931312561035156, 42.47844696044922, 156.7669677734375, 206.38211059570312, 110.00498962402344, 61.94966506958008, -136.76882934570312, 104.80857849121094, -3.526552200317383, 16.66175079345703, 53.66419982910156, 90.90989685058594, 159.02011108398438, 130.0341033935547, 52.35238265991211, -150.96804809570312, -8.285968780517578, 216.4464111328125, 16.17751693725586, 132.6450653076172, -54.53740692138672, 185.2495880126953, 246.01597595214844, -57.33401107788086, 386.8734130859375, 110.59768676757812, 43.98058319091797, -121.96074676513672, 179.34202575683594, 33.05223846435547, 176.010498046875, 11.1129150390625, 11.561538696289062, 21.07339096069336, 44.84100341796875, 209.09849548339844, 169.725341796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000458.npy"} +{"epoch": 0.6923658352229781, "step": 459, "batch_size": 64, "mean": 84.41233825683594, "std": 108.79188537597656, "min": -176.522705078125, "p10": -39.913134765624996, "median": 79.42035675048828, "p90": 202.40299224853516, "max": 471.1334228515625, "pos_frac": 0.828125, "sample": [43.94983673095703, 128.9438934326172, -35.748435974121094, 141.3897705078125, 14.402626037597656, -176.522705078125, 3.413301467895508, 116.77688598632812, 26.271926879882812, 145.42486572265625, 166.44772338867188, -25.231510162353516, 96.70379638671875, -67.93376922607422, 180.14381408691406, 286.93017578125, 23.701431274414062, 97.24996948242188, 52.957969665527344, 152.96701049804688, 134.43359375, 139.45237731933594, 51.168460845947266, 20.041831970214844, 19.95932960510254, 151.31381225585938, 83.2755355834961, 140.22763061523438, 37.60943603515625, 128.91436767578125, 319.3674011230469, 24.70288848876953, 3.149930953979492, 14.406927108764648, -2.3308792114257812, 161.8762969970703, 67.74622344970703, -105.52027893066406, 151.4046173095703, -44.62411117553711, 10.927640914916992, 83.06430053710938, 174.9453125, -4.013031005859375, 220.44581604003906, -90.03079223632812, 164.90573120117188, 64.11524200439453, -95.2666015625, 265.3238830566406, 7.558876037597656, 13.497337341308594, -41.69800567626953, 175.52072143554688, 102.44764709472656, 46.03739929199219, 144.29779052734375, 75.77641296386719, 202.71273803710938, 208.08795166015625, 201.6802520751953, 30.44446563720703, 471.1334228515625, 101.66068267822266], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000459.npy"} +{"epoch": 0.6938775510204082, "step": 460, "batch_size": 64, "mean": 89.6301498413086, "std": 110.56551361083984, "min": -62.48230743408203, "p10": -35.58395233154296, "median": 71.23187637329102, "p90": 215.5860092163086, "max": 527.2881469726562, "pos_frac": 0.765625, "sample": [202.6385040283203, 131.470947265625, 527.2881469726562, -48.68461608886719, 14.031120300292969, 208.52049255371094, 211.96412658691406, 58.935638427734375, 307.455322265625, 8.004596710205078, -62.48230743408203, 29.5609130859375, 60.160423278808594, 59.44915008544922, -1.30126953125, 36.04889678955078, 61.56849670410156, 150.65029907226562, -37.772186279296875, 173.5001220703125, 104.0759048461914, 260.02545166015625, 26.47785186767578, 229.67715454101562, 192.92730712890625, 71.45751190185547, 28.671688079833984, 50.54612731933594, 181.0078582763672, 71.00624084472656, 143.8729248046875, 312.62579345703125, 156.01882934570312, 110.51968383789062, -28.926298141479492, 78.89337158203125, 12.518146514892578, 12.02862548828125, 83.93673706054688, -8.749017715454102, 76.66788482666016, 171.8633575439453, -23.141395568847656, 8.778718948364258, 109.02101135253906, 184.6043243408203, -52.263427734375, -30.478073120117188, 150.540771484375, -50.36700439453125, -10.577146530151367, 38.854522705078125, 161.81358337402344, 96.26304626464844, -14.856124877929688, 217.13824462890625, 79.55260467529297, 242.257568359375, -59.32365036010742, -45.22859191894531, 129.125, 14.5521240234375, -16.79193878173828, 178.70518493652344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000460.npy"} +{"epoch": 0.6953892668178382, "step": 461, "batch_size": 64, "mean": 85.33160400390625, "std": 95.70442199707031, "min": -70.22331237792969, "p10": -28.077251434326172, "median": 77.41772079467773, "p90": 186.85396881103517, "max": 330.0133056640625, "pos_frac": 0.765625, "sample": [178.94764709472656, 61.96021270751953, 69.70506286621094, 53.51475524902344, 181.3270263671875, 190.13526916503906, 77.38184356689453, 130.57810974121094, 113.71407318115234, -67.77116394042969, 29.050949096679688, 82.49665832519531, 79.37428283691406, 8.510713577270508, 106.94500732421875, -70.22331237792969, -50.98674011230469, 185.27218627929688, 61.513343811035156, 8.805059432983398, 116.43923950195312, 167.142333984375, 155.0045166015625, -0.954498291015625, 77.45359802246094, 135.78126525878906, -47.48731231689453, -1.3645477294921875, -21.25371551513672, 73.6675033569336, 163.26882934570312, -28.76441192626953, -2.0980167388916016, 86.63553619384766, 159.88351440429688, 285.2622375488281, 320.6466979980469, 161.50518798828125, 43.489166259765625, 311.5000305175781, 185.19175720214844, 198.10719299316406, 133.81964111328125, 39.051048278808594, 21.260587692260742, -16.108001708984375, 83.220458984375, -2.2210445404052734, 157.0581512451172, 23.01117706298828, 187.53187561035156, 84.86713409423828, 61.205467224121094, 25.436492919921875, 146.2816925048828, -22.4615478515625, 330.0133056640625, -42.45149612426758, 16.147993087768555, -43.07960510253906, 158.10018920898438, 13.964149475097656, 163.74130249023438, -26.473876953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000461.npy"} +{"epoch": 0.6969009826152683, "step": 462, "batch_size": 64, "mean": 82.46420288085938, "std": 94.78068542480469, "min": -141.06202697753906, "p10": -22.416261291503904, "median": 70.04828643798828, "p90": 187.58633728027345, "max": 406.4537048339844, "pos_frac": 0.796875, "sample": [154.4226531982422, 200.92706298828125, 185.38766479492188, 161.11671447753906, 26.3409423828125, 39.29158020019531, 169.41238403320312, 10.309986114501953, 39.872520446777344, 9.236900329589844, -4.343620300292969, 188.52862548828125, 148.46005249023438, 84.90825653076172, 176.4661407470703, 212.26785278320312, -14.237594604492188, 192.2543182373047, 2.3190231323242188, 135.39840698242188, 148.44174194335938, 70.40296936035156, 69.693603515625, 142.4490966796875, 74.69403076171875, 6.052978515625, 127.54145812988281, 215.04940795898438, -24.768138885498047, 106.69622802734375, -32.1837158203125, -22.705490112304688, -16.180885314941406, 163.4174346923828, 184.25, -21.74139404296875, -57.923377990722656, 109.41767883300781, 40.851890563964844, 76.56967163085938, 172.32635498046875, 178.6095733642578, 49.50171661376953, 49.77717590332031, 62.94828796386719, 198.55154418945312, -16.706008911132812, 46.3120231628418, 158.49832153320312, 136.4069366455078, 54.60670471191406, -141.06202697753906, 149.4757080078125, 181.15994262695312, 112.40153503417969, -0.33933258056640625, 24.80522346496582, 406.4537048339844, 45.42053985595703, 56.65845489501953, -137.1986083984375, -29.26677703857422, 18.851003646850586, 21.152301788330078], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000462.npy"} +{"epoch": 0.6984126984126984, "step": 463, "batch_size": 64, "mean": 62.233642578125, "std": 90.15949249267578, "min": -112.36507415771484, "p10": -51.1054286956787, "median": 41.288818359375, "p90": 179.62866058349613, "max": 336.62445068359375, "pos_frac": 0.75, "sample": [5.362720489501953, 122.79902648925781, 95.69427490234375, -110.69583129882812, 92.19940185546875, 159.83392333984375, -2.3297672271728516, 182.26651000976562, 135.09075927734375, 130.86447143554688, -40.40987014770508, -84.58961486816406, -0.48592376708984375, 95.1687240600586, 189.66482543945312, -97.7381591796875, -0.19860076904296875, 23.429412841796875, 9.411056518554688, -29.316551208496094, 61.82823181152344, 20.51959228515625, -1.895416259765625, 216.0904541015625, 65.98723602294922, 20.03558349609375, 43.673606872558594, 38.904029846191406, -18.679969787597656, 21.999370574951172, 8.187850952148438, 14.256523132324219, 189.39590454101562, 168.48085021972656, 44.489444732666016, 151.48309326171875, 134.05776977539062, 24.49652862548828, 34.070472717285156, 213.6564483642578, 25.330211639404297, 16.21758270263672, 92.38897705078125, -55.689239501953125, -56.969425201416016, -61.51947021484375, 336.62445068359375, 116.30876159667969, 170.75247192382812, 165.91615295410156, 35.791622161865234, -112.36507415771484, -0.474273681640625, 65.25846862792969, 173.4736785888672, 100.74099731445312, 97.51708984375, 5.652618408203125, 112.82672882080078, 114.2525863647461, -17.309776306152344, 135.60096740722656, 12.211210250854492, 183.35745239257812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000463.npy"} +{"epoch": 0.6999244142101285, "step": 464, "batch_size": 64, "mean": 80.25096130371094, "std": 115.26067352294922, "min": -172.9957275390625, "p10": -44.16797523498534, "median": 79.68611145019531, "p90": 225.53352813720707, "max": 408.36126708984375, "pos_frac": 0.765625, "sample": [-33.47047424316406, 48.056861877441406, 173.88287353515625, -57.099365234375, 169.6297149658203, 1.03936767578125, 12.979988098144531, 159.03436279296875, 108.09164428710938, 96.57025146484375, 163.3337860107422, -34.52916717529297, 362.4774169921875, 99.25760650634766, 66.81144714355469, -165.43496704101562, 232.98318481445312, 90.36894226074219, 82.57804870605469, 155.1244659423828, 30.74010467529297, 275.73699951171875, -25.670862197875977, 82.84748840332031, 43.90977478027344, 152.974609375, 74.71485137939453, 254.66204833984375, 125.9117431640625, 140.69093322753906, 76.79417419433594, 31.373695373535156, 149.22486877441406, 408.36126708984375, -159.5479736328125, -20.608856201171875, 178.80905151367188, 87.93573760986328, -82.42974853515625, -88.79804992675781, 147.54391479492188, 22.769264221191406, 31.640209197998047, 229.10055541992188, 14.003334045410156, 30.09905242919922, 22.28862762451172, 51.72906494140625, -10.522109985351562, 257.1163330078125, 111.36618041992188, 217.21046447753906, -172.9957275390625, -30.514163970947266, 43.14386749267578, -48.298892974853516, 2.8460159301757812, 169.94342041015625, 120.9813461303711, 203.67080688476562, -15.231971740722656, 133.4704132080078, 141.045166015625, -5.6613311767578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000464.npy"} +{"epoch": 0.7014361300075586, "step": 465, "batch_size": 64, "mean": 66.13145446777344, "std": 106.1205062866211, "min": -121.90177917480469, "p10": -70.433642578125, "median": 53.477909088134766, "p90": 203.09040527343757, "max": 298.145751953125, "pos_frac": 0.65625, "sample": [-78.09751892089844, 229.55706787109375, 163.57940673828125, -76.80978393554688, 140.47360229492188, 160.66778564453125, -77.23881530761719, 185.06027221679688, -36.44020080566406, 101.45899200439453, 43.448333740234375, 172.1796875, 147.08477783203125, 249.5242919921875, -44.40845489501953, 158.47113037109375, -17.702186584472656, 187.04470825195312, 167.21371459960938, -3.4067916870117188, 0.3030242919921875, -67.88286590576172, 179.7133331298828, 293.345703125, 136.79115295410156, 218.56983947753906, -58.052818298339844, -81.80577087402344, -36.636085510253906, 52.30237579345703, -15.009681701660156, -71.5268325805664, 298.145751953125, 99.20645904541016, 121.892822265625, 51.264373779296875, -59.911865234375, 0.4780731201171875, -0.8733348846435547, 153.15956115722656, -91.24290466308594, -57.04814910888672, 15.542720794677734, 21.937965393066406, -12.130775451660156, -121.90177917480469, -33.49650955200195, 54.6534423828125, 63.761688232421875, 221.69918823242188, -11.200241088867188, 168.29867553710938, 27.790016174316406, 92.507568359375, 19.04717254638672, -33.29845428466797, 86.90274810791016, 3.8077335357666016, 109.129150390625, 114.3978042602539, 209.96713256835938, 122.21415710449219, 126.12210083007812, 149.81910705566406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000465.npy"} +{"epoch": 0.7029478458049887, "step": 466, "batch_size": 64, "mean": 39.400794982910156, "std": 97.22801971435547, "min": -320.34967041015625, "p10": -73.50071411132812, "median": 28.690753936767578, "p90": 157.86607360839844, "max": 328.6960144042969, "pos_frac": 0.6875, "sample": [-1.5373916625976562, 158.9458465576172, -73.67367553710938, -82.52853393554688, 17.48716163635254, 45.5228271484375, 21.83721923828125, -6.891845703125, 0.8917770385742188, -320.34967041015625, 106.38002014160156, 72.83711242675781, 169.35968017578125, -117.66449737548828, 82.17618560791016, -22.70648193359375, 82.73513793945312, 328.6960144042969, 49.76783752441406, 212.00601196289062, 52.137332916259766, 23.585460662841797, 155.3466033935547, -1.8837242126464844, 9.2138671875, 13.980567932128906, 109.39985656738281, -85.22724914550781, 203.0948486328125, 78.34255981445312, -2.3975753784179688, 20.475936889648438, -46.75640106201172, 29.436019897460938, 145.1486053466797, 45.47135543823242, 154.2785186767578, 69.48957061767578, -54.10574722290039, 42.25035858154297, -40.932106018066406, -80.18658447265625, -34.268898010253906, -74.86036682128906, 74.26924133300781, 23.48626708984375, 20.891433715820312, 86.73748016357422, 252.32540893554688, -10.750072479248047, -73.09713745117188, 104.75216674804688, -49.56554412841797, 55.58778381347656, 18.217308044433594, -0.9518337249755859, 33.03260040283203, 10.75440788269043, 59.929779052734375, 144.5869140625, 173.2480926513672, 27.94548797607422, 47.212806701660156, 68.71480560302734], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000466.npy"} +{"epoch": 0.7044595616024187, "step": 467, "batch_size": 64, "mean": 80.3714828491211, "std": 137.0307159423828, "min": -119.84841918945312, "p10": -30.141210174560545, "median": 44.34965133666992, "p90": 185.3143447875977, "max": 667.166015625, "pos_frac": 0.78125, "sample": [-10.360824584960938, 550.98828125, 69.61642456054688, 67.9111328125, 33.231597900390625, 41.11072540283203, -0.0933990478515625, -47.34482955932617, 21.59781265258789, 39.073951721191406, 114.51473999023438, -51.495609283447266, 174.6024627685547, -26.251432418823242, 87.11344909667969, 101.4452133178711, 76.29702758789062, 34.62537384033203, 61.24629211425781, 14.952896118164062, 60.840911865234375, -89.9915771484375, -29.476036071777344, 198.74139404296875, 188.6432342529297, 35.59496307373047, 69.79691314697266, 77.87947082519531, -119.84841918945312, 172.56710815429688, -30.426284790039062, 88.1500244140625, 21.279027938842773, 45.566162109375, 5.9857330322265625, 42.10682678222656, 171.73846435546875, 177.54693603515625, 55.66398620605469, 430.9385681152344, 3.7936248779296875, 3.814970016479492, -1.8184280395507812, 36.564910888671875, 15.13746452331543, -70.98442077636719, 145.81719970703125, -1.1896286010742188, 149.2789306640625, 144.03370666503906, 667.166015625, 54.57714080810547, -14.439735412597656, 23.2220516204834, 50.060325622558594, 61.914451599121094, 43.133140563964844, 201.06097412109375, 26.701934814453125, 451.3648681640625, -63.60443115234375, 152.3268585205078, 104.2229995727539, 35.541343688964844], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000467.npy"} +{"epoch": 0.7059712773998488, "step": 468, "batch_size": 64, "mean": 84.70193481445312, "std": 103.03248596191406, "min": -196.97293090820312, "p10": -45.97263870239256, "median": 91.36135482788086, "p90": 213.09049224853516, "max": 288.723388671875, "pos_frac": 0.8125, "sample": [19.673479080200195, 31.833284378051758, 125.77696228027344, 270.65643310546875, 46.661476135253906, -23.331899642944336, 12.291845321655273, 151.24681091308594, 141.19393920898438, 74.62751770019531, 99.58399963378906, 210.907958984375, 214.02586364746094, -115.87532043457031, 15.684585571289062, 71.94964599609375, 69.93355560302734, -76.34249877929688, -58.544708251953125, 127.55181121826172, 205.21043395996094, 56.722625732421875, 125.17224884033203, 219.470458984375, 117.06702423095703, 132.85354614257812, 181.1590576171875, 16.548959732055664, -24.85449981689453, -132.330078125, 239.75973510742188, -196.97293090820312, 167.50897216796875, -14.850955963134766, -4.927986145019531, -60.711585998535156, 121.2497329711914, 159.13687133789062, -55.02326965332031, 158.7178192138672, 54.74120330810547, 58.358062744140625, 162.94552612304688, 216.2762451171875, 154.26895141601562, 0.5648231506347656, 32.301109313964844, 126.64317321777344, 18.637794494628906, 3.032217025756836, 171.14425659179688, 83.13871002197266, 108.86495971679688, 74.6519775390625, 286.6925964355469, 101.17108154296875, -10.754608154296875, 288.723388671875, 178.98480224609375, 184.25633239746094, 170.77352905273438, 114.26952362060547, 20.051345825195312, 0.7754898071289062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000468.npy"} +{"epoch": 0.7074829931972789, "step": 469, "batch_size": 64, "mean": 69.41971588134766, "std": 88.01841735839844, "min": -89.88784790039062, "p10": -23.466023063659666, "median": 64.5510425567627, "p90": 166.70706176757815, "max": 341.41546630859375, "pos_frac": 0.78125, "sample": [9.162887573242188, 156.41954040527344, 48.74452209472656, 115.42388153076172, 31.147239685058594, 19.507129669189453, 7.4910125732421875, 137.09027099609375, 91.461181640625, -89.88784790039062, 176.49099731445312, 58.24264144897461, 91.90415954589844, -9.638175964355469, 35.49469757080078, 144.3747100830078, 141.16616821289062, 46.2984619140625, 94.20218658447266, 161.87890625, 184.4990234375, 12.298011779785156, 89.50745391845703, -2.10125732421875, 133.24362182617188, -69.67520904541016, 70.85944366455078, 18.454559326171875, -23.971811294555664, 87.22615051269531, -4.722259521484375, 2.1710033416748047, 145.19558715820312, 222.21319580078125, 162.63668823242188, -22.128036499023438, -1.9436569213867188, -22.285850524902344, 162.0146484375, 180.3858184814453, 150.08181762695312, 81.3876724243164, 13.141368865966797, 100.97600555419922, -17.01526641845703, 20.995086669921875, 92.98287963867188, 7.575996398925781, 168.45150756835938, 20.902175903320312, -72.58779907226562, -60.59464645385742, 314.9364013671875, 2.2824935913085938, 341.41546630859375, 22.417564392089844, 78.17204284667969, 80.2795639038086, 131.97019958496094, -52.885826110839844, 115.68218994140625, 13.80592155456543, -40.258453369140625, 137.89572143554688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000469.npy"} +{"epoch": 0.708994708994709, "step": 470, "batch_size": 64, "mean": 71.13740539550781, "std": 104.9635238647461, "min": -213.9816436767578, "p10": -50.33885650634765, "median": 63.91158676147461, "p90": 201.82865905761724, "max": 398.93890380859375, "pos_frac": 0.734375, "sample": [185.71640014648438, 79.58354949951172, 292.41131591796875, -213.9816436767578, 69.36424255371094, 239.51214599609375, 61.515472412109375, 54.111427307128906, 186.05612182617188, 158.41506958007812, 143.04501342773438, -70.62770080566406, 177.819091796875, 40.04590606689453, 162.32492065429688, 12.866722106933594, 86.18370056152344, -62.539527893066406, -40.487548828125, 136.1383819580078, -53.48881912231445, 219.96978759765625, 7.545656204223633, -45.85823059082031, 173.6156005859375, 42.57981872558594, 224.61190795898438, 128.6433563232422, 18.274803161621094, -26.31004524230957, 88.91596984863281, -3.8741722106933594, 105.89558410644531, 3.8098831176757812, -52.259124755859375, 12.868400573730469, 80.72581481933594, 5.935575485229492, 83.91403198242188, -32.03717803955078, 398.93890380859375, 174.90817260742188, 212.34860229492188, 9.239921569824219, -60.07585906982422, -37.12541198730469, 55.831626892089844, 157.11383056640625, 111.13479614257812, 131.896728515625, -1.8238716125488281, 183.49371337890625, -22.175086975097656, 121.51380920410156, 23.737037658691406, 84.49066925048828, 66.30770111083984, 95.85028076171875, -0.4347686767578125, -93.07495880126953, 208.58831787109375, -0.5583286285400391, 38.59243392944336, 13.12398910522461], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000470.npy"} +{"epoch": 0.7105064247921391, "step": 471, "batch_size": 64, "mean": 82.46802520751953, "std": 104.73779296875, "min": -211.08595275878906, "p10": -31.074555969238254, "median": 67.16579055786133, "p90": 197.40470581054694, "max": 502.989990234375, "pos_frac": 0.796875, "sample": [-4.2908935546875, 231.37600708007812, 127.57044219970703, 142.17222595214844, 51.7552490234375, -62.963409423828125, 162.84280395507812, 100.16682434082031, 178.73460388183594, 213.94866943359375, 104.27617645263672, 121.37022399902344, 207.41574096679688, 39.05400085449219, -140.4453125, 0.8234500885009766, 41.70890808105469, 203.9798583984375, 146.40357971191406, -64.72404479980469, 55.42392349243164, 43.421817779541016, 143.06869506835938, 57.66289520263672, 136.41455078125, -211.08595275878906, 73.08850860595703, 210.04568481445312, 181.2559051513672, 32.32013702392578, 143.73985290527344, 82.0506591796875, 36.006771087646484, 29.04632568359375, 56.49566650390625, 96.0373764038086, 163.48565673828125, -2.22265625, 47.19286346435547, 165.76803588867188, 32.956748962402344, 81.50326538085938, 170.30032348632812, 182.06268310546875, -1.5476360321044922, -2.6735382080078125, 44.96131896972656, 130.37538146972656, -42.55326843261719, 102.81037902832031, -45.7296142578125, 179.90310668945312, 61.243072509765625, 43.00330352783203, 117.5931167602539, -2.632984161376953, 13.262725830078125, 118.45738220214844, -77.42909240722656, 502.989990234375, 36.27162170410156, 43.43019104003906, -0.584320068359375, 249.58750915527344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000471.npy"} +{"epoch": 0.7120181405895691, "step": 472, "batch_size": 64, "mean": 63.87194061279297, "std": 114.94014739990234, "min": -218.4609832763672, "p10": -61.72214584350586, "median": 51.34267044067383, "p90": 193.11768493652343, "max": 504.242431640625, "pos_frac": 0.734375, "sample": [13.448265075683594, 31.164772033691406, 94.27725982666016, -56.99143981933594, 176.0727996826172, 158.90786743164062, -27.87469482421875, 126.52926635742188, 206.74676513671875, 202.82034301757812, 39.4903450012207, 239.61917114257812, 26.496763229370117, -38.94737243652344, -5.364263534545898, 174.02642822265625, -7.739099502563477, 67.61663055419922, 171.79476928710938, 68.75111389160156, -66.5804443359375, 12.43902587890625, -23.814289093017578, 193.65509033203125, -23.402008056640625, 145.77801513671875, 53.60212707519531, 191.86373901367188, 184.3856201171875, -40.91075897216797, 42.57438659667969, 58.784881591796875, 87.56981658935547, -121.10005187988281, -63.728759765625, 36.959503173828125, 211.0013427734375, 504.242431640625, -8.149784088134766, 2.2228164672851562, 175.6711883544922, 186.0807647705078, -218.4609832763672, 195.70684814453125, -168.0410614013672, -57.04004669189453, 152.24664306640625, 1.067148208618164, -116.43206024169922, 107.74108123779297, 15.659103393554688, 61.341033935546875, 81.26304626464844, 11.369087219238281, 26.832778930664062, 49.083213806152344, 92.74209594726562, 34.38793182373047, 168.35411071777344, 40.73997497558594, 85.67408752441406, 88.67383575439453, -111.99151611328125, 146.897216796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000472.npy"} +{"epoch": 0.7135298563869993, "step": 473, "batch_size": 64, "mean": 88.3228759765625, "std": 130.75997924804688, "min": -159.50442504882812, "p10": -77.7364791870117, "median": 61.52157211303711, "p90": 276.3165496826172, "max": 399.08477783203125, "pos_frac": 0.765625, "sample": [241.11447143554688, 205.994873046875, 48.30342102050781, -112.35418701171875, -84.57009887695312, 173.1946258544922, 60.72425842285156, 48.944061279296875, 49.939491271972656, 280.9898681640625, 141.89309692382812, 17.61182975769043, -5.018915176391602, 150.8956756591797, 25.60230255126953, -50.74213409423828, 354.2122802734375, 265.4121398925781, -1.6474990844726562, 228.127685546875, -113.37211608886719, 3.7732772827148438, 187.459716796875, -103.60413360595703, -49.5948486328125, 153.9512939453125, 124.70420837402344, 301.9283142089844, 10.85624885559082, 49.262306213378906, 210.2418212890625, 8.515937805175781, -138.24496459960938, -23.20294952392578, -44.20696258544922, 100.9576416015625, 348.86993408203125, 252.885009765625, 40.816898345947266, 18.224695205688477, 307.47735595703125, 154.89463806152344, 399.08477783203125, 86.13487243652344, 85.88441467285156, 63.3898811340332, 56.75244903564453, 5.13433837890625, 89.33684539794922, 160.71173095703125, 116.61135864257812, 3.97503662109375, 109.31057739257812, 62.318885803222656, -14.393836975097656, 136.40675354003906, 242.21728515625, -109.89894104003906, 31.245391845703125, -61.79136657714844, 39.734413146972656, 292.04168701171875, 176.74139404296875, -159.50442504882812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000473.npy"} +{"epoch": 0.7150415721844293, "step": 474, "batch_size": 64, "mean": 59.041351318359375, "std": 108.38822174072266, "min": -186.2510986328125, "p10": -74.50480079650877, "median": 43.46187973022461, "p90": 222.01775054931642, "max": 285.2090148925781, "pos_frac": 0.703125, "sample": [244.88003540039062, 169.659423828125, 88.5380859375, -86.53663635253906, -27.93082046508789, 20.577754974365234, 158.85781860351562, 222.09323120117188, 94.35980224609375, 7.5893096923828125, 35.83763122558594, 56.65354919433594, -186.2510986328125, 108.87258911132812, 56.7139892578125, 221.8416290283203, -79.16902923583984, -7.258518218994141, -19.86370086669922, 47.895904541015625, -28.907930374145508, -16.60297393798828, 140.5541229248047, 57.97454833984375, 94.87554931640625, 82.27537536621094, 3.4904918670654297, -81.95933532714844, 13.636802673339844, 15.652549743652344, 170.15328979492188, 90.10079956054688, -143.90965270996094, -16.065963745117188, -63.62160110473633, 167.315673828125, 43.89942932128906, 225.68045043945312, 191.14007568359375, 9.622718811035156, 32.25000762939453, 33.49098205566406, 235.51272583007812, 86.32243347167969, -29.125812530517578, 257.991943359375, 246.14950561523438, 76.995849609375, 115.61856842041016, 18.5007381439209, 169.25225830078125, 285.2090148925781, -0.5773773193359375, 43.024330139160156, 38.163429260253906, -56.85590744018555, 199.1947479248047, -131.36715698242188, -31.936534881591797, 126.28022766113281, 122.018798828125, -167.03712463378906, -4.7430572509765625, 31.648296356201172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000474.npy"} +{"epoch": 0.7165532879818595, "step": 475, "batch_size": 64, "mean": 96.18260192871094, "std": 111.33463287353516, "min": -137.40206909179688, "p10": -21.06639060974121, "median": 82.1339340209961, "p90": 227.60757446289062, "max": 505.9415283203125, "pos_frac": 0.8125, "sample": [-19.902536392211914, 18.38779067993164, 76.98113250732422, -16.32904052734375, 145.84620666503906, 95.89899444580078, 182.9755096435547, 225.08786010742188, 15.47810173034668, -62.705238342285156, 158.17904663085938, -137.40206909179688, 201.84246826171875, 295.4812927246094, 109.60803985595703, 27.050399780273438, 156.94337463378906, 105.779541015625, 15.391365051269531, 161.6593017578125, 63.340538024902344, 8.931838989257812, -7.466455459594727, 21.833824157714844, 129.6668701171875, 8.12656021118164, 47.24071502685547, 505.9415283203125, 159.188232421875, -20.269371032714844, 257.06158447265625, 183.6824951171875, 227.35507202148438, 227.71578979492188, 44.48723602294922, -21.407970428466797, 129.11244201660156, 6.2929840087890625, 206.588134765625, 84.39480590820312, 178.14093017578125, 117.38336181640625, 27.985088348388672, 79.87306213378906, 150.25453186035156, -53.05573654174805, 17.670833587646484, 45.389495849609375, 151.49252319335938, 56.24542999267578, 216.5034637451172, 250.4783935546875, 246.44439697265625, 134.55316162109375, 148.14031982421875, -58.903526306152344, -36.95587921142578, 278.1304626464844, 66.27804565429688, 156.9971466064453, 17.022571563720703, -10.048896789550781, -73.77229309082031, 31.370861053466797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000475.npy"} +{"epoch": 0.7180650037792895, "step": 476, "batch_size": 64, "mean": 61.05390167236328, "std": 103.30299377441406, "min": -181.97927856445312, "p10": -57.05845642089844, "median": 64.66777610778809, "p90": 177.33148345947265, "max": 372.5747375488281, "pos_frac": 0.703125, "sample": [228.56512451171875, 58.0180778503418, -23.515050888061523, 37.29991912841797, -58.53263854980469, 81.48651123046875, -28.81304931640625, 173.3743896484375, 372.5747375488281, 18.363449096679688, 211.37612915039062, -7.696987152099609, 164.33761596679688, 111.82849884033203, 102.84033203125, 85.99182891845703, 269.89324951171875, 37.608741760253906, 81.59175872802734, -2.09429931640625, -64.82496643066406, 18.579273223876953, 125.74746704101562, 176.23182678222656, 136.7473907470703, -111.04387664794922, 27.588470458984375, -31.361976623535156, -43.6044921875, 7.133579254150391, 185.68540954589844, -33.40440368652344, 105.81121826171875, -43.32625961303711, 79.10218048095703, -100.04335021972656, -64.07429504394531, -105.87945556640625, 128.09217834472656, 37.33085632324219, -52.53199768066406, 113.29954528808594, 9.939544677734375, 0.28411865234375, 95.1793212890625, 80.16264343261719, 140.81936645507812, 16.577293395996094, -43.899383544921875, 122.17850494384766, 78.23100280761719, -181.97927856445312, 177.80276489257812, 149.72293090820312, 92.79464721679688, 231.94573974609375, 16.86991310119629, 45.831939697265625, 154.77224731445312, 71.31747436523438, -40.136634826660156, 174.29022216796875, 162.61105346679688, -53.61869812011719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000476.npy"} +{"epoch": 0.7195767195767195, "step": 477, "batch_size": 64, "mean": 67.83621215820312, "std": 120.12913513183594, "min": -244.29986572265625, "p10": -100.22731323242188, "median": 67.56830596923828, "p90": 217.07749786376957, "max": 265.7448425292969, "pos_frac": 0.734375, "sample": [-127.88243103027344, -53.514251708984375, 122.88837432861328, 47.423912048339844, 35.03936767578125, 178.5130615234375, -7.714576721191406, 31.026193618774414, 92.6575927734375, 6.9180450439453125, 198.82095336914062, 43.23387145996094, -98.36061096191406, -10.380210876464844, 20.99650001525879, 8.62905502319336, -101.02732849121094, 248.39218139648438, 91.13246154785156, -60.258514404296875, 203.80271911621094, 141.3246307373047, 2.87652587890625, 72.84117889404297, 159.62989807128906, -30.70958709716797, -110.05675506591797, 21.67552947998047, 171.97146606445312, 21.905715942382812, -112.00526428222656, 45.90575408935547, 76.98701477050781, 28.848201751708984, -199.3538818359375, -44.98820114135742, -44.36732482910156, 62.295433044433594, 120.39814758300781, -2.074127197265625, 250.04588317871094, 2.9685211181640625, 195.49656677246094, 209.09593200683594, 88.86680603027344, 197.04263305664062, -157.6114501953125, 265.7448425292969, 220.4981689453125, -5.541229248046875, 228.69415283203125, 127.92281341552734, 202.73680114746094, 182.0364227294922, 208.9890594482422, 99.63645935058594, -244.29986572265625, 261.15692138671875, 148.74554443359375, 225.12142944335938, 83.21497344970703, 165.84384155273438, 1.7006683349609375, 129.9711456298828], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000477.npy"} +{"epoch": 0.7210884353741497, "step": 478, "batch_size": 64, "mean": 58.93254470825195, "std": 107.68193054199219, "min": -156.7161865234375, "p10": -82.92779693603515, "median": 48.557228088378906, "p90": 187.58192291259766, "max": 300.566162109375, "pos_frac": 0.671875, "sample": [-38.047080993652344, 182.89720153808594, 167.48287963867188, 145.74118041992188, 113.36105346679688, -35.044944763183594, 229.36672973632812, 205.45877075195312, -1.4718875885009766, 148.7188262939453, 9.4168701171875, 15.483795166015625, 90.0845947265625, 300.566162109375, -41.780128479003906, 26.64373779296875, -52.42762756347656, 67.29983520507812, -24.443565368652344, 65.23112487792969, -54.80530548095703, -8.38853645324707, 172.8726348876953, 148.4695281982422, 176.89501953125, -120.82608795166016, 140.07864379882812, 148.01467895507812, 176.97195434570312, -156.7161865234375, 36.72349548339844, 6.557086944580078, -149.61260986328125, -141.80612182617188, 93.79325866699219, 100.99073791503906, 206.7435302734375, 62.87213134765625, 172.46243286132812, 45.37995147705078, 189.58966064453125, -9.446245193481445, -83.12384796142578, 56.45026397705078, 173.80337524414062, 36.63764953613281, -6.806571960449219, 208.3086700439453, 39.077110290527344, -26.703449249267578, 143.35562133789062, 95.3997573852539, -35.1146240234375, 146.60797119140625, 20.991561889648438, 241.95132446289062, 153.5099639892578, -82.47034454345703, -0.6744155883789062, 34.57539367675781, -100.67237854003906, -128.59942626953125, 51.73450469970703, 22.093629837036133], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000478.npy"} +{"epoch": 0.7226001511715797, "step": 479, "batch_size": 64, "mean": 56.313148498535156, "std": 116.8681869506836, "min": -361.4267272949219, "p10": -55.836172866821286, "median": 47.61836624145508, "p90": 192.57289428710945, "max": 444.60943603515625, "pos_frac": 0.734375, "sample": [7.877819061279297, 360.6802062988281, 80.43856811523438, -117.44755554199219, -22.51258659362793, 35.91808319091797, 223.2515106201172, 106.88706970214844, 100.22294616699219, -51.29676055908203, 54.721614837646484, -57.78163528442383, -1.5029296875, -11.015470504760742, 17.27552032470703, 64.69207763671875, 259.96881103515625, 134.97366333007812, -49.634681701660156, -5.229991912841797, -24.357952117919922, 33.82314682006836, 444.60943603515625, 200.6947021484375, 26.777307510375977, -118.13038635253906, 17.384628295898438, 43.91595458984375, -81.3940658569336, -63.76891326904297, 73.64109802246094, 88.45726776123047, 3.0378494262695312, 258.51629638671875, 130.63552856445312, 64.43946838378906, -123.04830932617188, 22.554580688476562, 28.745315551757812, 156.84719848632812, 206.9183349609375, -361.4267272949219, 20.14008331298828, 78.58241271972656, 173.62200927734375, 173.59506225585938, 77.55175018310547, -5.29339599609375, 2.3677749633789062, 51.320777893066406, 124.830078125, 104.11771392822266, -42.748085021972656, 82.96865844726562, -14.303850173950195, 67.68891906738281, 82.36773681640625, 30.36954689025879, 65.62320709228516, 118.92182159423828, 3.6775569915771484, 35.519195556640625, 69.24093627929688, 144.52340698242188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000479.npy"} +{"epoch": 0.7241118669690099, "step": 480, "batch_size": 64, "mean": 65.35197448730469, "std": 101.82392120361328, "min": -198.64517211914062, "p10": -48.66094284057617, "median": 55.50465393066406, "p90": 179.729638671875, "max": 317.72674560546875, "pos_frac": 0.75, "sample": [89.6353530883789, 31.16363525390625, 169.1780548095703, -127.91940307617188, 132.5796356201172, 19.97870445251465, 114.3580093383789, -198.64517211914062, 21.884719848632812, 302.19207763671875, -10.529350280761719, 48.692012786865234, 183.4661102294922, -50.15190887451172, 86.31700134277344, -45.18202209472656, -20.812530517578125, 116.09233093261719, -19.458251953125, 22.332927703857422, -79.68733215332031, 162.77999877929688, 37.44947052001953, -50.911521911621094, 178.14447021484375, 153.76783752441406, 317.72674560546875, 58.99754333496094, 177.41348266601562, 52.01176452636719, 93.02725219726562, 2.978466033935547, 48.54474639892578, 108.70172119140625, 66.50041961669922, 112.08370208740234, 160.27716064453125, 72.28895568847656, -180.37857055664062, 116.88407897949219, 164.64723205566406, 208.99462890625, 177.65975952148438, 132.75477600097656, -62.19325256347656, 115.12417602539062, -16.0018310546875, -21.6968994140625, 34.905696868896484, 34.65520095825195, 0.8063526153564453, -37.107513427734375, 180.40899658203125, 24.521865844726562, 143.07431030273438, 115.37982940673828, 183.99302673339844, 159.4906768798828, 18.402080535888672, -43.058258056640625, 1.3777694702148438, 199.27902221679688, 4.327484130859375, -10.9912109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000480.npy"} +{"epoch": 0.7256235827664399, "step": 481, "batch_size": 64, "mean": 67.03060150146484, "std": 107.80232238769531, "min": -177.2430419921875, "p10": -50.656002807617185, "median": 47.30038642883301, "p90": 171.54360809326172, "max": 430.3074951171875, "pos_frac": 0.78125, "sample": [110.03408813476562, -85.36957550048828, 318.59014892578125, 3.8929061889648438, -50.81092834472656, 107.3861083984375, 149.13955688476562, 48.32011795043945, -64.9698715209961, -85.389404296875, 94.82595825195312, 54.02752685546875, 84.63630676269531, 105.1042709350586, 11.586219787597656, 128.78677368164062, 156.10589599609375, 46.28065490722656, 155.7416534423828, 110.5478744506836, -177.2430419921875, 1.1265792846679688, 150.01510620117188, 114.39643859863281, -6.191898345947266, -4.042791366577148, -10.035171508789062, 159.9354248046875, 2.2186927795410156, 106.8609619140625, 6.876413345336914, 153.57192993164062, 7.1952362060546875, 22.99981689453125, 38.72291564941406, 43.22315979003906, 130.59353637695312, 11.327091217041016, 12.79758071899414, 0.7268829345703125, 69.10623168945312, 66.66136932373047, 166.7722625732422, 240.9813232421875, 9.867225646972656, 199.43197631835938, -28.716552734375, -60.43128204345703, 235.35617065429688, 173.58847045898438, 134.67767333984375, 38.59059143066406, 45.968406677246094, 25.47850799560547, -46.87013244628906, 430.3074951171875, -171.06324768066406, 93.73738861083984, 104.85606384277344, 318.7551574707031, 104.22332763671875, 45.85235595703125, -50.29450988769531, -20.418827056884766], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000481.npy"} +{"epoch": 0.72713529856387, "step": 482, "batch_size": 64, "mean": 79.36741638183594, "std": 183.64077758789062, "min": -294.35296630859375, "p10": -87.34483413696289, "median": 49.38640213012695, "p90": 201.73969421386718, "max": 964.668701171875, "pos_frac": 0.703125, "sample": [-17.594375610351562, -294.35296630859375, -21.530174255371094, 121.3615493774414, 9.666725158691406, 106.74771881103516, 29.036163330078125, -93.65380096435547, 18.843273162841797, 310.9644775390625, 122.51441955566406, 157.28536987304688, 180.16139221191406, 49.33483123779297, 142.37689208984375, 89.24088287353516, 714.4154052734375, 21.828880310058594, -182.06121826171875, 87.28052520751953, 200.17129516601562, -5.02984619140625, 427.2068786621094, -64.09571838378906, 6.310676574707031, 125.65341186523438, 161.61431884765625, 81.98550415039062, -47.701904296875, 16.94644546508789, -91.48088836669922, 115.10892486572266, -94.95962524414062, 79.13357543945312, 202.411865234375, 160.384033203125, 71.7874984741211, 27.87448501586914, -3.935108184814453, -65.625244140625, 964.668701171875, 58.423065185546875, 30.683387756347656, 36.596527099609375, 19.217098236083984, 177.71055603027344, 59.083045959472656, 0.5772800445556641, 357.6458740234375, -91.21255493164062, 95.20354461669922, 163.34095764160156, 68.47251892089844, 49.43797302246094, 159.1157989501953, -0.5796947479248047, -78.32015228271484, 266.8511962890625, -75.80034637451172, -97.0172119140625, 131.66502380371094, -39.79719543457031, -74.373046875, 42.29553985595703], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000482.npy"} +{"epoch": 0.7286470143613001, "step": 483, "batch_size": 64, "mean": 75.43028259277344, "std": 104.47210693359375, "min": -201.01881408691406, "p10": -12.03556671142578, "median": 48.131526947021484, "p90": 201.24524230957033, "max": 376.5318603515625, "pos_frac": 0.796875, "sample": [28.646568298339844, 186.4599151611328, -2.8601341247558594, 77.70771026611328, 141.087646484375, 146.8885498046875, 150.04763793945312, 7.7087249755859375, -10.777252197265625, -6.511600494384766, 66.37680053710938, 167.6726837158203, 227.62594604492188, 177.71263122558594, -51.86884689331055, 145.17291259765625, 90.71878051757812, -174.3172607421875, 5.4031219482421875, 156.15170288085938, 104.15786743164062, 188.1083526611328, 242.5235595703125, 22.972915649414062, 201.68829345703125, 62.616859436035156, 226.3035430908203, 31.639686584472656, -12.574844360351562, 43.885162353515625, 87.60511016845703, -155.69432067871094, 240.5084228515625, 11.146734237670898, 26.621307373046875, 139.18377685546875, 32.61717987060547, -201.01881408691406, 46.14411926269531, -23.853065490722656, 35.935508728027344, 31.53044891357422, 182.39794921875, 59.56427764892578, 0.5284423828125, 19.701061248779297, 376.5318603515625, 265.1554260253906, 40.626869201660156, 124.73806762695312, 31.935646057128906, 82.42408752441406, 50.118934631347656, 200.21145629882812, -3.890016555786133, -10.09514045715332, 31.684391021728516, 17.1074161529541, 196.9503173828125, 117.54903411865234, 0.4456634521484375, -15.120582580566406, -0.6142692565917969, 148.4930877685547], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000483.npy"} +{"epoch": 0.7301587301587301, "step": 484, "batch_size": 64, "mean": 57.61237335205078, "std": 84.88481903076172, "min": -126.42450714111328, "p10": -42.597100830078126, "median": 62.42337226867676, "p90": 139.3385879516602, "max": 353.373779296875, "pos_frac": 0.765625, "sample": [15.6273193359375, 34.06925964355469, 5.7266998291015625, 5.9047393798828125, 202.75485229492188, 11.268350601196289, 82.89219665527344, -26.391952514648438, 127.7449951171875, -21.050025939941406, 45.088462829589844, -126.42450714111328, 63.59294509887695, 35.37665939331055, 95.71157836914062, 112.30534362792969, -26.108020782470703, -15.724910736083984, 73.36949157714844, 82.16219329833984, 6.277761459350586, 143.3408660888672, 123.12562561035156, 38.42890930175781, 98.50423431396484, 104.37870788574219, 205.2373504638672, 17.95965576171875, 108.80567932128906, -27.738981246948242, 39.19992446899414, 49.24957275390625, -46.70506286621094, -70.1055908203125, 210.27694702148438, -10.657535552978516, 64.57684326171875, 43.228050231933594, 91.15457153320312, -0.6093597412109375, 108.44422149658203, -42.790977478027344, 257.4163513183594, -119.34829711914062, 128.4254150390625, 74.07950592041016, 127.70662689208984, -51.78495407104492, 85.01397705078125, 61.25379943847656, 353.373779296875, 42.48396301269531, 78.0450439453125, 76.35255432128906, -75.75672149658203, -42.14472198486328, 75.03504943847656, 172.83193969726562, 70.3271484375, 129.99993896484375, 84.30267333984375, 91.18667602539062, 27.551273345947266, 9.363815307617188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000484.npy"} +{"epoch": 0.7316704459561603, "step": 485, "batch_size": 64, "mean": 67.26914978027344, "std": 102.76319122314453, "min": -183.7435302734375, "p10": -31.557588195800772, "median": 42.36069107055664, "p90": 199.92513885498047, "max": 391.5993347167969, "pos_frac": 0.8125, "sample": [22.919456481933594, 126.38146209716797, 82.25341796875, 50.09938049316406, 96.73124694824219, 19.192955017089844, 19.952104568481445, 22.715612411499023, 198.3686065673828, -59.40302276611328, -93.92753601074219, 43.644744873046875, 200.59222412109375, 150.44180297851562, 32.703369140625, 336.251708984375, 44.15134811401367, 169.7400360107422, 229.55555725097656, 31.477415084838867, 13.531408309936523, 66.15948486328125, -56.19578552246094, 285.70501708984375, -2.6751327514648438, 5.526737213134766, -183.7435302734375, 74.94398498535156, 57.224388122558594, 67.41667175292969, -23.327728271484375, 11.51425552368164, 391.5993347167969, 3.3083057403564453, 107.42527770996094, -87.76646423339844, 292.3892822265625, 200.90948486328125, -35.08467102050781, 132.3909454345703, 38.42642593383789, -16.226327896118164, 61.738311767578125, 3.4806747436523438, 41.076637268066406, 0.018114089965820312, -5.690999984741211, 161.26223754882812, 105.96875, -36.39140319824219, 36.236080169677734, 31.223840713500977, 80.33887481689453, 187.00831604003906, 45.656578063964844, 127.08892822265625, 105.1010971069336, 180.8507843017578, 8.894966125488281, -17.511526107788086, 15.529739379882812, 11.070552825927734, 84.31770324707031, 10.66387939453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000485.npy"} +{"epoch": 0.7331821617535903, "step": 486, "batch_size": 64, "mean": 65.19187927246094, "std": 114.50569152832031, "min": -148.9646759033203, "p10": -66.9633186340332, "median": 55.501102447509766, "p90": 194.43849182128906, "max": 405.26824951171875, "pos_frac": 0.703125, "sample": [-1.6861953735351562, 1.9807891845703125, 187.9744873046875, 18.10625648498535, 20.892822265625, -7.531642913818359, 43.85887145996094, 386.2845153808594, 52.703575134277344, 13.936294555664062, -22.62479591369629, 155.24484252929688, 123.05425262451172, 66.46063995361328, 72.99443817138672, 38.420684814453125, 24.132568359375, 197.4853057861328, -3.3878707885742188, 191.86935424804688, 102.08130645751953, 54.41070556640625, -62.132450103759766, -70.89422607421875, 219.16073608398438, -109.09529113769531, 295.9275817871094, -68.69795227050781, 56.59149932861328, -145.2085418701172, 185.28204345703125, 81.62098693847656, 233.9114990234375, -113.70433044433594, 7.449167251586914, 145.06744384765625, 63.00507354736328, 135.87490844726562, -92.1189193725586, 72.05348205566406, 4.343166351318359, 163.234130859375, 97.69381713867188, 195.142578125, 192.79562377929688, -62.91584014892578, 164.85743713378906, 118.7510986328125, 57.079833984375, 174.38734436035156, 84.87918090820312, -56.96958923339844, 42.13700866699219, 135.06561279296875, -2.1117019653320312, -62.499664306640625, -29.52542495727539, 94.13856506347656, 405.26824951171875, -31.51006507873535, 58.74513244628906, -148.9646759033203, 53.25327682495117, -25.74823760986328], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000486.npy"} +{"epoch": 0.7346938775510204, "step": 487, "batch_size": 64, "mean": 68.10958862304688, "std": 107.30957794189453, "min": -163.91448974609375, "p10": -32.788143920898435, "median": 47.346744537353516, "p90": 179.49514617919922, "max": 475.8360290527344, "pos_frac": 0.75, "sample": [66.21293640136719, -65.35711669921875, 105.61551666259766, 128.78494262695312, 50.204689025878906, 31.883955001831055, -44.136810302734375, 88.55278778076172, 91.11080169677734, 145.01296997070312, 20.915924072265625, -33.044281005859375, 47.490745544433594, -30.60760498046875, -25.63475799560547, 203.8809051513672, 86.40571594238281, 19.847171783447266, 326.58154296875, 41.739959716796875, -15.038612365722656, 365.2154541015625, 30.500322341918945, 184.71177673339844, 91.14391326904297, -56.05654525756836, 35.278785705566406, -163.91448974609375, 32.24854278564453, 104.27633666992188, 147.4844207763672, 37.14453887939453, 118.60626220703125, -3.422842025756836, 176.600341796875, -29.632713317871094, 137.21507263183594, -32.19049072265625, 4.516023635864258, 111.77574157714844, 220.66604614257812, -2.4597129821777344, 39.969207763671875, 132.10684204101562, -88.83859252929688, 89.14794921875, 16.763545989990234, -5.1216278076171875, 62.796409606933594, -159.7242431640625, -19.78972053527832, 177.26580810546875, 41.33429718017578, 57.85516357421875, 47.20274353027344, 154.85464477539062, 46.02103042602539, 74.35389709472656, 34.9178466796875, 180.45057678222656, 110.57233428955078, 29.2458438873291, 475.8360290527344, 111.66576385498047], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000487.npy"} +{"epoch": 0.7362055933484505, "step": 488, "batch_size": 64, "mean": 83.77322387695312, "std": 111.55167388916016, "min": -137.8828887939453, "p10": -23.602989959716798, "median": 60.15691375732422, "p90": 202.10030212402344, "max": 466.22222900390625, "pos_frac": 0.75, "sample": [40.31278991699219, -14.896781921386719, -14.442377090454102, -20.234647750854492, 84.70647430419922, -56.71002197265625, -23.754173278808594, 172.4532012939453, 370.974853515625, 187.1805877685547, 28.37050437927246, 7.09881591796875, 192.103759765625, 7.730377197265625, -5.574958801269531, 287.5058898925781, 17.2220458984375, 174.78802490234375, 55.87792205810547, 51.20278549194336, 117.00357818603516, 64.43590545654297, 21.265724182128906, -27.411911010742188, -4.925697326660156, 65.23125457763672, -105.812255859375, 162.9609375, 42.569915771484375, 198.47840881347656, 30.491714477539062, -57.11906433105469, 4.664743423461914, 203.6525421142578, 123.79304504394531, 64.65274047851562, 168.9610137939453, -23.250228881835938, 253.614990234375, 466.22222900390625, 299.50311279296875, 156.16293334960938, 164.2741241455078, 41.303070068359375, 134.37918090820312, 82.31656646728516, -4.9712371826171875, 172.04302978515625, -49.25050354003906, 79.5465087890625, 19.916168212890625, 76.2386474609375, -137.8828887939453, 133.974365234375, -4.38336181640625, 98.71845245361328, 221.1195068359375, 83.21182250976562, 197.07859802246094, 47.77909469604492, 34.380523681640625, 19.319580078125, -6.813102722167969, 192.12789916992188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000488.npy"} +{"epoch": 0.7377173091458806, "step": 489, "batch_size": 64, "mean": 42.79768753051758, "std": 96.23109436035156, "min": -149.71548461914062, "p10": -61.85826225280761, "median": 29.33307933807373, "p90": 173.02806701660157, "max": 280.26568603515625, "pos_frac": 0.65625, "sample": [-143.4494171142578, 87.3002700805664, 44.07957458496094, 160.2506103515625, 270.02410888671875, 147.22659301757812, -37.10646057128906, 92.1153335571289, 59.37990951538086, 280.26568603515625, 99.90306854248047, 17.187400817871094, 52.610321044921875, -64.48970031738281, -5.736347198486328, -117.47453308105469, -31.78021240234375, -9.85300064086914, 128.96554565429688, 20.658540725708008, 205.01312255859375, 88.53890991210938, 110.0587387084961, 59.57737350463867, 107.14566040039062, 138.09364318847656, -24.87249755859375, -19.71343231201172, 28.3226318359375, 35.87971115112305, -144.17648315429688, -26.79372787475586, 179.03067016601562, 25.04125213623047, 16.680648803710938, -33.71735382080078, -48.987998962402344, 30.094266891479492, 174.88003540039062, 93.20250701904297, 40.727081298828125, 51.041168212890625, 6.514472961425781, 205.15826416015625, 50.07598114013672, -62.2597770690918, 170.93838500976562, 25.968429565429688, 68.47444915771484, 165.5189666748047, -118.95882415771484, 30.53038787841797, -26.27041244506836, 12.554670333862305, -7.72064208984375, -149.71548461914062, 173.14828491210938, 28.57189178466797, -51.61821746826172, -9.984588623046875, -60.92139434814453, 172.74755859375, -25.743310928344727, 6.8995819091796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000489.npy"} +{"epoch": 0.7392290249433107, "step": 490, "batch_size": 64, "mean": 60.69693374633789, "std": 115.67782592773438, "min": -197.5562744140625, "p10": -84.58471221923828, "median": 67.00878143310547, "p90": 208.51779632568363, "max": 331.3657531738281, "pos_frac": 0.625, "sample": [-54.78233337402344, -104.38194274902344, -197.5562744140625, 191.62948608398438, -9.696382522583008, 75.26243591308594, 87.68751525878906, 154.26622009277344, 264.3627014160156, 331.3657531738281, -166.238525390625, 212.86485290527344, -182.7652587890625, 17.184354782104492, 62.847412109375, 122.42810821533203, 198.37466430664062, 108.8305892944336, 135.43328857421875, 19.93311309814453, 10.594673156738281, 196.7022705078125, 127.12498474121094, 183.08807373046875, -82.48457336425781, -0.226043701171875, 216.63511657714844, -4.048589706420898, 82.35971069335938, 27.61688995361328, -96.84469604492188, 111.04086303710938, 52.86309814453125, 129.47955322265625, -21.933927536010742, -7.23699951171875, -20.62451934814453, 234.85916137695312, -98.87774658203125, -7.444938659667969, 110.26246643066406, -62.45849609375, 90.05280303955078, 146.85110473632812, -3.227388381958008, 124.81257629394531, -7.9461212158203125, -7.600683212280273, -76.5005874633789, -21.611328125, 276.623291015625, 135.70245361328125, -17.056636810302734, 67.70965576171875, 276.41131591796875, 70.49870300292969, 162.69903564453125, 87.49903106689453, 26.230804443359375, 172.738525390625, -85.48477172851562, -30.406505584716797, 66.30790710449219, 82.80462646484375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000490.npy"} +{"epoch": 0.7407407407407407, "step": 491, "batch_size": 64, "mean": 57.09498596191406, "std": 94.19248962402344, "min": -167.58609008789062, "p10": -36.92892723083496, "median": 25.639135360717773, "p90": 174.0160339355469, "max": 424.21490478515625, "pos_frac": 0.75, "sample": [124.31211853027344, -28.04547882080078, 134.98980712890625, 51.261474609375, 138.99957275390625, 88.48332977294922, -9.772462844848633, 85.69760131835938, 147.4918212890625, 26.023242950439453, 8.685150146484375, -102.2495346069336, 195.4284210205078, 18.672325134277344, 145.86988830566406, -61.40271759033203, 4.973651885986328, 110.04049682617188, 10.11367416381836, 75.7933349609375, 5.33331298828125, 128.32211303710938, 192.96249389648438, -0.6621589660644531, 25.042409896850586, 170.70440673828125, 13.593101501464844, -13.483657836914062, 25.255027770996094, 11.523258209228516, 185.438232421875, 73.72235107421875, -35.864013671875, -19.60863494873047, 144.9462890625, 32.516910552978516, 15.988227844238281, 424.21490478515625, 55.337799072265625, -64.46671295166016, 175.435302734375, 11.964553833007812, 9.000350952148438, -19.869340896606445, -3.0468788146972656, 79.28700256347656, 4.49847412109375, -45.232696533203125, 122.22357940673828, 158.169189453125, -4.8158111572265625, 113.30033874511719, 207.35195922851562, 149.375244140625, 35.49999237060547, 0.1041107177734375, 37.44447326660156, 14.301902770996094, 74.7354507446289, -167.58609008789062, -40.395477294921875, 4.486730575561523, -37.385318756103516, 239.05056762695312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000491.npy"} +{"epoch": 0.7422524565381708, "step": 492, "batch_size": 64, "mean": 55.13876724243164, "std": 120.06695556640625, "min": -173.791748046875, "p10": -85.46680374145507, "median": 53.96885871887207, "p90": 206.84336853027347, "max": 383.0360107421875, "pos_frac": 0.671875, "sample": [318.6429443359375, 317.6741943359375, -87.8240966796875, 17.162944793701172, 199.63861083984375, -10.973703384399414, 114.92349243164062, 59.142215728759766, 84.20365905761719, -31.66965675354004, 143.3871307373047, 162.09664916992188, 93.21075439453125, -81.22148895263672, 35.69059753417969, 139.06759643554688, -136.7754364013672, 63.86231994628906, 48.795501708984375, 305.13507080078125, 142.24574279785156, -28.953346252441406, -18.347999572753906, -56.242897033691406, 16.70836639404297, 71.84661865234375, 93.47428131103516, 281.9093017578125, -87.28622436523438, -66.00861358642578, 173.8341064453125, 17.352724075317383, -17.355484008789062, 62.45103454589844, -30.869890213012695, 15.167556762695312, -121.24488830566406, 383.0360107421875, -166.59298706054688, -35.096275329589844, 145.33262634277344, 17.897422790527344, 71.60311126708984, 31.10379409790039, 23.465499877929688, 65.06397247314453, 30.888072967529297, 59.480934143066406, 67.92351531982422, 125.33325958251953, -173.791748046875, -41.734596252441406, -52.01934814453125, 163.36842346191406, 212.40579223632812, 61.62180709838867, 186.97935485839844, 84.68978881835938, 95.55274963378906, -70.15205383300781, 2.4127330780029297, -117.17964172363281, -55.4920654296875, 209.93112182617188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000492.npy"} +{"epoch": 0.7437641723356009, "step": 493, "batch_size": 64, "mean": 76.14266204833984, "std": 116.06671905517578, "min": -175.80697631835938, "p10": -45.04623870849609, "median": 63.82611656188965, "p90": 205.92827758789065, "max": 625.727294921875, "pos_frac": 0.796875, "sample": [30.206146240234375, 9.184551239013672, -38.979217529296875, 141.77572631835938, 4.997077941894531, 28.909912109375, -147.8229217529297, 63.119876861572266, 15.142786026000977, -22.367820739746094, 42.2818603515625, 149.38668823242188, 173.00241088867188, 189.24459838867188, 16.533580780029297, -59.01527786254883, 223.326416015625, 0.10465621948242188, 122.24887084960938, 147.19967651367188, 9.184503555297852, 87.45336151123047, -7.760103225708008, 28.224227905273438, 56.19536590576172, -64.13250732421875, 155.6085662841797, 35.465667724609375, 208.15579223632812, 96.2756118774414, 95.89903259277344, 167.34043884277344, -43.233551025390625, 90.41664123535156, -74.15248107910156, 85.01152801513672, 193.49183654785156, -66.38168334960938, 200.73074340820312, 14.187494277954102, 239.34872436523438, 80.07456970214844, 118.29930114746094, 625.727294921875, 72.08251190185547, 221.98910522460938, -45.82310485839844, 169.0618896484375, 122.585693359375, -35.665611267089844, -17.057357788085938, 156.79876708984375, 51.808189392089844, 6.5816192626953125, 224.00927734375, 138.73464965820312, 77.7745361328125, 41.11053466796875, 86.90850830078125, 212.02542114257812, 64.53235626220703, 56.479148864746094, 25.09113311767578, -175.80697631835938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000493.npy"} +{"epoch": 0.745275888133031, "step": 494, "batch_size": 64, "mean": 61.725894927978516, "std": 126.09285736083984, "min": -262.5610046386719, "p10": -69.10837936401366, "median": 54.84212112426758, "p90": 198.62838897705083, "max": 530.0652465820312, "pos_frac": 0.671875, "sample": [105.94932556152344, 26.21326446533203, 70.63890838623047, 37.90106201171875, 160.884033203125, -18.836593627929688, -55.870704650878906, 114.30255126953125, 112.69954681396484, -16.9456787109375, 11.384300231933594, -56.233558654785156, -7.6840972900390625, 117.15550994873047, 91.95222473144531, 54.801666259765625, 169.2321014404297, 22.07317352294922, 121.40632629394531, -116.24603271484375, 23.954113006591797, 530.0652465820312, 129.4200439453125, -5.2385406494140625, 80.56116485595703, 274.3905944824219, 155.6260528564453, -35.91084289550781, 91.18356323242188, 155.80764770507812, 122.01842498779297, -201.8702392578125, -198.17556762695312, 161.88560485839844, -15.645898818969727, -76.07716369628906, -79.48775482177734, 100.19254302978516, 33.14813995361328, 85.27133178710938, 89.15919494628906, -3.9704456329345703, -262.5610046386719, -13.229549407958984, 30.681209564208984, 54.88257598876953, 204.19883728027344, 12.303018569946289, -48.26428985595703, 118.90487670898438, 185.63067626953125, 46.928131103515625, -14.525009155273438, 285.3492736816406, 101.55269622802734, 95.16559600830078, -74.62615966796875, -27.763736724853516, 240.80056762695312, 297.4552001953125, 164.02999877929688, -39.294677734375, 0.6850471496582031, 231.0692138671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000494.npy"} +{"epoch": 0.7467876039304611, "step": 495, "batch_size": 64, "mean": 51.61387634277344, "std": 112.35897064208984, "min": -236.1790313720703, "p10": -67.7881103515625, "median": 27.235326766967773, "p90": 175.14807739257816, "max": 494.2293701171875, "pos_frac": 0.65625, "sample": [-92.79155731201172, -87.67745208740234, -236.1790313720703, -137.1057891845703, 167.45452880859375, -26.513572692871094, -82.502197265625, 47.12092590332031, 87.07731628417969, 15.926376342773438, -6.634010314941406, -109.48664093017578, -54.98912048339844, 79.13267517089844, 91.3061752319336, 209.72100830078125, -23.700340270996094, 84.87380981445312, 93.5801010131836, -0.3113384246826172, 51.023136138916016, 118.52104187011719, 134.79725646972656, -4.261175155639648, 131.79144287109375, 178.4453125, -0.9023017883300781, 100.30957794189453, 46.184661865234375, 26.99508285522461, 494.2293701171875, 22.5914249420166, 17.973304748535156, -73.27339172363281, 48.552406311035156, 27.475570678710938, 2.2929534912109375, 120.03535461425781, 157.28152465820312, 128.32000732421875, -41.82829284667969, 34.72046661376953, 48.6985969543457, 96.86483764648438, 215.642578125, 17.28929901123047, 92.40506744384766, -0.5465354919433594, -11.419075012207031, 202.0434112548828, -13.66716194152832, 155.35792541503906, 22.832847595214844, -18.106910705566406, -34.728904724121094, 389.86517333984375, 26.428878784179688, -1.4379959106445312, -20.314414978027344, 83.84768676757812, 72.3738784790039, 25.82152557373047, 205.2971954345703, 9.163715362548828], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000495.npy"} +{"epoch": 0.7482993197278912, "step": 496, "batch_size": 64, "mean": 80.66172790527344, "std": 139.05718994140625, "min": -137.4669189453125, "p10": -52.872110748291014, "median": 66.90849304199219, "p90": 232.39197082519533, "max": 728.813232421875, "pos_frac": 0.6875, "sample": [108.1951904296875, 68.68740844726562, 234.732177734375, 147.9696807861328, -23.191261291503906, 65.12957763671875, 78.73750305175781, 320.7077331542969, 110.1199722290039, 260.9776611328125, -95.38681030273438, 24.8089599609375, 95.02420806884766, 31.235305786132812, 1.3555736541748047, 22.807411193847656, -18.950336456298828, 48.276153564453125, 118.56510925292969, 294.2850036621094, -50.541969299316406, 728.813232421875, -1.3110542297363281, 183.816162109375, 75.373291015625, -120.27291870117188, 367.1016540527344, 173.9285888671875, -9.78609848022461, 198.856689453125, -48.068885803222656, 22.797443389892578, 187.24522399902344, 226.93148803710938, 20.887344360351562, 24.29026985168457, -137.4669189453125, 15.739805221557617, 88.6285400390625, 79.0587387084961, 101.39205932617188, 85.25201416015625, 187.8472900390625, 106.16351318359375, 106.0167007446289, 58.606544494628906, -42.0908203125, 107.7653579711914, -21.430328369140625, -17.912097930908203, 338.248046875, -90.35233306884766, -53.87074279785156, 2.917593002319336, 208.532470703125, 150.12123107910156, -18.73827362060547, 133.3758544921875, -72.39010620117188, -2.350494384765625, -85.64344787597656, 103.85424041748047, -18.435256958007812, -24.63524627685547], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000496.npy"} +{"epoch": 0.7498110355253212, "step": 497, "batch_size": 64, "mean": 66.37499237060547, "std": 101.35519409179688, "min": -230.4759521484375, "p10": -32.16245422363281, "median": 61.62667465209961, "p90": 185.09416656494145, "max": 284.88751220703125, "pos_frac": 0.71875, "sample": [219.61026000976562, 62.027488708496094, 284.88751220703125, 141.55081176757812, 128.494384765625, 1.3993988037109375, -12.851150512695312, -7.6797943115234375, 34.63694763183594, 43.52732849121094, 141.8201446533203, 102.09112548828125, 109.83267974853516, -24.884727478027344, 128.27420043945312, 11.399574279785156, 133.9154815673828, -185.707275390625, 167.66656494140625, 162.92105102539062, -12.534042358398438, 53.22568130493164, -3.1540603637695312, -7.288429260253906, 49.77464294433594, -64.51658630371094, -28.448638916015625, 156.2440185546875, 176.8653106689453, 158.39938354492188, -9.87376594543457, 93.43659973144531, 111.68038940429688, 148.0636444091797, -166.4141845703125, 135.37954711914062, 194.58038330078125, 74.88420867919922, 137.61439514160156, -33.75408935546875, 174.30844116210938, 41.47222137451172, -27.187213897705078, 45.55499267578125, 83.10179138183594, -43.02134704589844, 261.2852783203125, 208.6734619140625, -0.68109130859375, 68.57078552246094, 37.44211959838867, 57.280670166015625, 86.48332214355469, 110.02728271484375, 29.745338439941406, 229.7934112548828, -230.4759521484375, 18.096023559570312, -15.707523345947266, 188.62081909179688, 61.225860595703125, 110.22940826416016, -99.73561096191406, 45.800872802734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000497.npy"} +{"epoch": 0.7513227513227513, "step": 498, "batch_size": 64, "mean": 81.07511901855469, "std": 108.03237915039062, "min": -324.9131164550781, "p10": -24.92751998901367, "median": 53.73740005493164, "p90": 199.13794250488283, "max": 387.42901611328125, "pos_frac": 0.84375, "sample": [-43.60026550292969, 95.83910369873047, 332.3563232421875, 84.40692138671875, 52.865142822265625, -32.32465362548828, 156.63412475585938, 281.978271484375, -13.009540557861328, -23.961318969726562, 131.26361083984375, 71.24234008789062, 85.63888549804688, -25.34160614013672, 176.10104370117188, 92.17547607421875, 23.0223388671875, 1.5024051666259766, 48.03034210205078, 153.20529174804688, 322.8759765625, 46.93067169189453, 153.10479736328125, 37.526554107666016, 68.72908782958984, 119.42506408691406, -32.336631774902344, 195.74330139160156, 153.83245849609375, 32.95563507080078, 166.99365234375, 145.95387268066406, 6.795238494873047, 46.54679870605469, 45.780418395996094, -37.167694091796875, -23.34687042236328, 137.10787963867188, 4.135198593139648, 23.42832374572754, 130.7562255859375, 186.9231414794922, -324.9131164550781, 107.28680419921875, 47.109779357910156, 79.14505767822266, 19.65865707397461, 54.609657287597656, 28.967609405517578, 42.74539566040039, 192.21630859375, 204.8576202392578, 252.90972900390625, 3.9895553588867188, 200.59278869628906, 21.265718460083008, 134.3192901611328, 45.055511474609375, 26.980958938598633, 0.8013916015625, 63.5343017578125, -26.924659729003906, 46.453208923339844, 387.42901611328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000498.npy"} +{"epoch": 0.7528344671201814, "step": 499, "batch_size": 64, "mean": 74.3385238647461, "std": 117.82959747314453, "min": -196.2066192626953, "p10": -80.91017608642578, "median": 80.32981491088867, "p90": 228.38889923095712, "max": 388.77801513671875, "pos_frac": 0.75, "sample": [94.53844451904297, 261.1094665527344, -135.98260498046875, 206.05047607421875, 28.36431884765625, 43.16952133178711, 81.68635559082031, -134.187744140625, 19.925121307373047, 91.87306213378906, 77.54560089111328, 136.89505004882812, 115.47206115722656, -81.13969421386719, 246.34405517578125, 30.41265106201172, 388.77801513671875, -101.6050033569336, 207.2212371826172, 177.12362670898438, 72.88369750976562, -78.14703369140625, -57.41725158691406, -88.2608413696289, -64.6329116821289, 145.42431640625, 185.30996704101562, 166.3657684326172, -51.98155212402344, 97.58731842041016, -20.44015884399414, 23.790813446044922, 88.9956283569336, 127.57533264160156, 102.07919311523438, -32.42913818359375, 4.5102081298828125, 137.78237915039062, -23.171350479125977, 77.37312316894531, 237.46075439453125, 33.804222106933594, 199.4964599609375, 38.76535415649414, -196.2066192626953, 121.84105682373047, 44.245567321777344, 115.7029037475586, 246.27584838867188, 297.12518310546875, 183.26475524902344, 141.02120971679688, 34.829193115234375, -80.3746337890625, -113.31930541992188, 150.71673583984375, 131.16159057617188, 78.97327423095703, -43.5631103515625, 240.62232971191406, 19.674659729003906, 182.06781005859375, 106.31969451904297, 20.968826293945312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000499.npy"} +{"epoch": 0.7543461829176115, "step": 500, "batch_size": 64, "mean": 61.60002517700195, "std": 100.93450164794922, "min": -141.79052734375, "p10": -82.14627761840819, "median": 55.32122802734375, "p90": 195.4790222167969, "max": 296.84002685546875, "pos_frac": 0.75, "sample": [-25.505599975585938, 87.52985382080078, 25.671287536621094, -5.57908821105957, 134.12738037109375, 40.35130310058594, 80.13037109375, 178.47360229492188, 56.08392333984375, 189.8873748779297, 48.68621826171875, 180.04489135742188, 189.70468139648438, -30.61878204345703, 206.80128479003906, 26.666427612304688, 197.8754425048828, 47.16522216796875, 50.622962951660156, -51.54345703125, 203.23989868164062, 131.17428588867188, 117.49723052978516, 128.4544677734375, -9.341781616210938, 216.6988067626953, 92.26483154296875, 6.894695281982422, -46.299537658691406, -113.26786041259766, -85.56848907470703, 49.01662826538086, 23.645671844482422, 57.792877197265625, -134.2810821533203, 1.1454830169677734, 54.55853271484375, 132.02049255371094, 200.27899169921875, -8.024559020996094, 109.77812194824219, 20.13898277282715, 149.9003143310547, 42.6590461730957, -74.16111755371094, 78.28044128417969, 42.541412353515625, -113.429443359375, 154.17481994628906, -141.79052734375, 232.95970153808594, -119.95272827148438, 5.222755432128906, 89.6688003540039, 132.47682189941406, 62.80892562866211, 2.1476211547851562, 114.62055969238281, 104.74356842041016, -51.626495361328125, -122.59314727783203, 127.65303802490234, 154.8651885986328, 296.84002685546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000500.npy"} +{"epoch": 0.7558578987150416, "step": 501, "batch_size": 64, "mean": 63.03109359741211, "std": 76.28224182128906, "min": -128.75860595703125, "p10": -12.894710159301757, "median": 55.93686103820801, "p90": 172.00767517089847, "max": 272.7264099121094, "pos_frac": 0.75, "sample": [32.477867126464844, 155.68215942382812, 160.74671936035156, 66.0614013671875, 138.16331481933594, 141.6729278564453, 41.61347961425781, 52.97252655029297, 58.90119552612305, 178.6529541015625, 176.18678283691406, -5.001131057739258, 68.0223388671875, 14.361557006835938, -10.782407760620117, 22.382020950317383, 86.7798843383789, 166.61636352539062, -16.34095001220703, -9.596208572387695, 91.72919464111328, -12.098625183105469, -2.196392059326172, 182.78733825683594, 204.20645141601562, 137.61062622070312, 108.01397705078125, 174.3182373046875, 34.29143142700195, -1.26080322265625, 36.21857452392578, -30.378509521484375, -13.235889434814453, 73.35054016113281, 99.46661376953125, 59.0987548828125, 110.48377227783203, -1.1095657348632812, 9.00771713256836, 83.57426452636719, 34.547882080078125, 135.07913208007812, 12.460018157958984, 74.56226348876953, -21.97614288330078, -128.75860595703125, 24.964981079101562, 128.5294952392578, -61.684078216552734, 44.318199157714844, 63.392364501953125, 272.7264099121094, 19.925270080566406, -11.131362915039062, 73.17869567871094, -1.3414859771728516, 3.48309326171875, 7.970888137817383, 183.1248016357422, 161.31761169433594, 77.15705871582031, 43.53606033325195, 107.39402770996094, -72.237060546875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000501.npy"} +{"epoch": 0.7573696145124716, "step": 502, "batch_size": 64, "mean": 60.97166442871094, "std": 87.10174560546875, "min": -92.0244140625, "p10": -27.846190643310546, "median": 49.39532470703125, "p90": 178.4539535522461, "max": 332.6250305175781, "pos_frac": 0.78125, "sample": [-26.87779998779297, 40.600399017333984, 1.4930877685546875, -39.06816101074219, 119.36279296875, 78.8028564453125, 29.59162139892578, 3.473419189453125, -80.94611358642578, 66.60379028320312, 33.05239486694336, 184.61167907714844, -16.76296615600586, -70.85041046142578, 332.6250305175781, 270.93084716796875, 14.969451904296875, -28.261215209960938, -7.552467346191406, 33.00315856933594, 38.23200988769531, -24.095943450927734, 70.3284683227539, 128.51878356933594, 113.03668212890625, 59.184974670410156, 110.48353576660156, -11.33438491821289, 31.649850845336914, 124.03591918945312, 34.899169921875, 8.641950607299805, 61.1888427734375, 3.3286190032958984, 55.119747161865234, 64.81248474121094, 213.18060302734375, 259.196533203125, -62.87518310546875, 1.9498939514160156, -8.207145690917969, 7.474479675292969, 81.78605651855469, 29.43475341796875, 88.11293029785156, 6.310943603515625, 47.40869140625, 127.51653289794922, 98.58110809326172, -92.0244140625, 71.60623168945312, 143.38888549804688, 177.13473510742188, 77.96885681152344, 117.80007934570312, -79.52163696289062, 51.3819580078125, 171.01376342773438, -24.24420166015625, 179.0193328857422, 17.132205963134766, 110.62230682373047, 213.56268310546875, 70.64351654052734], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000502.npy"} +{"epoch": 0.7588813303099018, "step": 503, "batch_size": 64, "mean": 62.25983428955078, "std": 93.0461654663086, "min": -160.30926513671875, "p10": -36.224616622924806, "median": 48.41883850097656, "p90": 182.17982330322266, "max": 271.73046875, "pos_frac": 0.75, "sample": [170.4072265625, 4.654003143310547, -14.116119384765625, 40.933250427246094, 105.6797103881836, 70.45845794677734, 271.73046875, -160.30926513671875, -7.221717834472656, 127.7289810180664, 139.4215087890625, -0.1654205322265625, 29.226593017578125, 178.9787139892578, -33.15814208984375, 88.23672485351562, -11.875837326049805, 40.05084228515625, 261.29559326171875, -62.457603454589844, 21.65532684326172, 98.34954833984375, 145.6900177001953, 49.160003662109375, 75.41130828857422, 46.9249267578125, 99.44859313964844, 190.22601318359375, -1.9020195007324219, 162.15237426757812, -36.365718841552734, 86.70348358154297, 189.8899383544922, 183.55172729492188, 187.05804443359375, -35.89537811279297, 111.29214477539062, -28.14525604248047, -147.0614471435547, -3.2362899780273438, 8.991985321044922, 20.7022705078125, 20.367380142211914, 87.27159118652344, 39.05080032348633, 24.00006103515625, 47.67767333984375, 200.77484130859375, 94.3448257446289, 17.530990600585938, 156.97357177734375, 137.1143035888672, -113.03877258300781, 22.069561004638672, 145.16273498535156, 135.32955932617188, 3.2676334381103516, -69.28279113769531, 72.08952331542969, 170.61277770996094, 28.37741470336914, 121.11479949951172, 106.0776138305664, -126.356201171875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000503.npy"} +{"epoch": 0.7603930461073318, "step": 504, "batch_size": 64, "mean": 83.31722259521484, "std": 91.1971435546875, "min": -128.59353637695312, "p10": -10.040137863159178, "median": 73.25373840332031, "p90": 182.4282943725586, "max": 387.1138610839844, "pos_frac": 0.84375, "sample": [-4.15252685546875, 160.15162658691406, -11.524978637695312, 168.39393615722656, 205.39552307128906, 10.699600219726562, 50.8106689453125, 44.311805725097656, 201.53665161132812, 10.298280715942383, 43.887115478515625, 96.54315185546875, -1.0930023193359375, 387.1138610839844, 19.953815460205078, 159.955810546875, 170.616943359375, -61.47528839111328, 93.06135559082031, 178.07015991210938, -15.22006607055664, 54.11349868774414, 3.1245994567871094, 179.18858337402344, 198.9794158935547, -49.38960266113281, 105.26355743408203, 153.26754760742188, 89.95465087890625, 311.26611328125, 176.1767578125, 183.81674194335938, 22.290103912353516, 72.88302612304688, 101.38568115234375, 7.34075927734375, 2.886566162109375, -128.59353637695312, 160.09393310546875, 52.94805908203125, 152.65875244140625, -11.078125, -7.618167877197266, 55.39447021484375, 47.32545471191406, 132.8823699951172, 65.640869140625, 48.32654571533203, 40.67681884765625, 84.46699523925781, 75.97758483886719, 26.580562591552734, -120.75745391845703, 143.13638305664062, 104.10647583007812, 50.999168395996094, 26.521848678588867, 163.9878692626953, 106.52764892578125, 87.04676055908203, 56.609413146972656, 221.0543212890625, 103.88046264648438, 73.62445068359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000504.npy"} +{"epoch": 0.7619047619047619, "step": 505, "batch_size": 64, "mean": 64.82728576660156, "std": 122.79749298095703, "min": -157.85235595703125, "p10": -82.35462493896483, "median": 45.45790100097656, "p90": 202.74161376953128, "max": 418.89691162109375, "pos_frac": 0.671875, "sample": [138.29214477539062, -6.825672149658203, 53.92164611816406, 22.28548812866211, 4.443592071533203, 360.58795166015625, -69.26123046875, 329.7694091796875, 142.33164978027344, 154.43910217285156, 166.2711639404297, -38.57762145996094, 55.20636749267578, -1.3172149658203125, 49.15251159667969, 331.5277099609375, -5.3096160888671875, -14.608806610107422, 101.55022430419922, 24.601871490478516, -106.95437622070312, 154.95266723632812, 117.55681610107422, 41.35397720336914, 97.0136489868164, 262.8163146972656, 76.7530746459961, -68.59989929199219, 41.76329040527344, -3.111682891845703, -70.3201904296875, -73.58533477783203, 160.12841796875, -150.77987670898438, 206.56582641601562, 165.07241821289062, 94.06021881103516, 153.25958251953125, -40.42420959472656, -29.864044189453125, 130.73626708984375, 10.335899353027344, 29.85736083984375, -92.4218978881836, 418.89691162109375, 66.97694396972656, 104.21269989013672, -86.1128921508789, 213.01036071777344, -157.85235595703125, 3.0485610961914062, 153.776123046875, 174.22154235839844, 59.11083984375, 162.53936767578125, -43.92005920410156, 123.79962158203125, 35.62969970703125, 26.47765350341797, 193.81845092773438, 25.593900680541992, -3.2950439453125, -93.2148208618164, -132.41598510742188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000505.npy"} +{"epoch": 0.763416477702192, "step": 506, "batch_size": 64, "mean": 59.379947662353516, "std": 122.7752456665039, "min": -180.09304809570312, "p10": -82.4573318481445, "median": 57.59806251525879, "p90": 177.5694793701172, "max": 525.3619995117188, "pos_frac": 0.703125, "sample": [39.602699279785156, 127.54624938964844, 21.37527847290039, -42.066993713378906, 1.1345977783203125, 172.8942108154297, 100.648193359375, -48.00544357299805, 123.33147430419922, -92.64230346679688, 9.739233016967773, 103.1305923461914, 75.8676986694336, 44.03777313232422, 75.64926147460938, 100.64437866210938, -151.2061767578125, 525.3619995117188, 394.10723876953125, -6.561637878417969, 180.15899658203125, -133.59988403320312, -58.69239807128906, -172.6246337890625, -128.2916259765625, -180.09304809570312, 24.8680419921875, 179.5731658935547, 225.65328979492188, 169.49072265625, 186.59146118164062, -24.19476318359375, 343.5578918457031, -9.31591796875, 160.02276611328125, 34.82799530029297, 169.95321655273438, 75.55194854736328, 123.6397476196289, -50.4666748046875, -107.98370361328125, 3.5486984252929688, 36.01420593261719, 65.73033905029297, 87.02890014648438, 59.09031295776367, -12.146541595458984, 56.105812072753906, 65.83183288574219, 140.6177978515625, 75.79144287109375, 92.75421142578125, -3.8017807006835938, 133.0745849609375, 96.52430725097656, 41.174217224121094, -12.633399963378906, 102.92671966552734, -47.02490997314453, -43.64170455932617, 43.04639434814453, 93.69735717773438, 112.58314514160156, 30.80987548828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000506.npy"} +{"epoch": 0.764928193499622, "step": 507, "batch_size": 64, "mean": 70.04043579101562, "std": 110.87735748291016, "min": -105.1453857421875, "p10": -54.12370147705078, "median": 48.18800735473633, "p90": 219.81976013183612, "max": 442.2586669921875, "pos_frac": 0.671875, "sample": [169.07672119140625, -4.5970458984375, -22.84368133544922, 113.57220458984375, 355.86163330078125, -12.887153625488281, 253.84457397460938, -105.1453857421875, -42.739593505859375, 124.95970153808594, 75.63642883300781, 54.79241943359375, -65.923583984375, -2.65374755859375, -55.472900390625, 82.5777587890625, 41.84172058105469, -50.97557067871094, 166.20257568359375, 31.33559799194336, 40.77378845214844, 87.14907836914062, 141.6488800048828, -26.361291885375977, 171.01007080078125, 1.3001232147216797, 53.959632873535156, 271.7989501953125, 91.20458984375, 13.482391357421875, 42.122955322265625, -0.616790771484375, 442.2586669921875, -8.936698913574219, 175.19845581054688, 136.932373046875, -5.91839599609375, 122.39097595214844, -72.44754791259766, -28.627540588378906, -42.776641845703125, 42.4163818359375, -3.2188491821289062, 145.48883056640625, 131.32298278808594, 21.920822143554688, 90.86516571044922, 41.997833251953125, 305.513671875, 238.94317626953125, -55.845947265625, 77.36422729492188, -64.88257598876953, 9.676460266113281, 126.48403930664062, 104.6269302368164, 94.0045166015625, -49.21916198730469, 73.57942962646484, -97.57638549804688, 21.41265869140625, 253.69735717773438, 100.42362976074219, 161.58425903320312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000507.npy"} +{"epoch": 0.7664399092970522, "step": 508, "batch_size": 64, "mean": 93.63215637207031, "std": 139.07028198242188, "min": -147.1406707763672, "p10": -39.40258865356444, "median": 54.070533752441406, "p90": 232.0777328491211, "max": 542.943603515625, "pos_frac": 0.75, "sample": [135.34767150878906, 19.196029663085938, 473.8837890625, 1.1316070556640625, 26.164077758789062, -42.543609619140625, 542.943603515625, -52.03266906738281, 146.9324951171875, 206.51412963867188, 425.0174560546875, 159.58949279785156, 13.806381225585938, 38.41966247558594, 125.05789184570312, 158.67242431640625, -147.1406707763672, 49.54071044921875, 54.68699645996094, -76.22996520996094, 42.48451232910156, 40.740631103515625, 123.91899108886719, -32.07353973388672, -19.28626251220703, -146.0283660888672, 84.97932434082031, -17.244253158569336, 427.413330078125, 75.06317138671875, -26.499053955078125, 233.6193389892578, 66.55433654785156, 24.653038024902344, 206.69149780273438, 236.90542602539062, 73.94532775878906, 227.9635009765625, 197.09970092773438, 2.1942367553710938, -25.82439422607422, -18.53821563720703, 178.89065551757812, 164.58201599121094, -18.862205505371094, -59.67976379394531, 120.06610870361328, 228.48065185546875, 263.8235168457031, 35.23515319824219, 180.43582153320312, 9.858951568603516, 52.38528060913086, -21.71132469177246, 26.2955322265625, -144.67974853515625, -18.043014526367188, 181.7388916015625, 135.1856689453125, 216.2724609375, 126.82130432128906, 53.454071044921875, 48.185455322265625, 196.0325927734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000508.npy"} +{"epoch": 0.7679516250944822, "step": 509, "batch_size": 64, "mean": 60.950782775878906, "std": 98.13147735595703, "min": -175.93472290039062, "p10": -79.29530181884765, "median": 79.95530700683594, "p90": 166.6368865966797, "max": 241.7838134765625, "pos_frac": 0.734375, "sample": [36.89680480957031, -77.61027526855469, 80.98124694824219, 130.20590209960938, 70.88899230957031, -4.910093307495117, 57.61408996582031, -18.327730178833008, -96.33673858642578, 82.18717956542969, 108.9027328491211, 111.74655151367188, -175.93472290039062, 53.86369705200195, 152.29293823242188, -164.45289611816406, 173.083740234375, 110.44086456298828, 12.957321166992188, 35.4097900390625, 151.3947296142578, -44.82032012939453, 122.88226318359375, -55.778892517089844, 202.57037353515625, 68.31315612792969, -18.150367736816406, -8.940189361572266, 177.30709838867188, 17.952789306640625, 134.9265594482422, 40.67935562133789, 156.7889862060547, 124.65208435058594, -12.940673828125, -2.135244369506836, 157.42132568359375, 106.4710693359375, 107.85989379882812, 162.89183044433594, 241.7838134765625, 24.792770385742188, 164.567626953125, -80.0174560546875, 119.21939086914062, 149.15711975097656, 52.21629333496094, 81.37464904785156, 208.64041137695312, 167.52371215820312, 168.64163208007812, 35.075035095214844, -41.06908416748047, -155.12469482421875, 13.985641479492188, 132.97042846679688, 106.89043426513672, -127.52781677246094, -141.29061889648438, 90.80821990966797, 47.29878616333008, 130.82589721679688, 161.9330596923828, 78.92936706542969], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000509.npy"} +{"epoch": 0.7694633408919124, "step": 510, "batch_size": 64, "mean": 74.16134643554688, "std": 122.12361145019531, "min": -233.91860961914062, "p10": -75.05089035034179, "median": 59.18365669250488, "p90": 246.4293151855469, "max": 345.7673645019531, "pos_frac": 0.765625, "sample": [36.89360046386719, 208.8944854736328, 152.39581298828125, 69.38046264648438, -9.1007080078125, 9.713287353515625, 53.88230514526367, 227.30908203125, 101.09883880615234, 20.011993408203125, -24.519187927246094, 31.54296112060547, 26.628032684326172, 53.73334503173828, -127.67710876464844, -24.55810546875, 4.459503173828125, 50.468658447265625, -233.91860961914062, 35.42781066894531, 65.29498291015625, 144.07327270507812, 213.95130920410156, -118.63519287109375, 13.17828369140625, 194.54391479492188, 58.33324432373047, -36.87522888183594, -0.8020591735839844, 150.71621704101562, 105.96087646484375, 109.01486206054688, 292.31134033203125, 88.26068878173828, 60.0340690612793, 137.06912231445312, 102.5652847290039, 291.8874206542969, 42.221099853515625, -43.895599365234375, 15.49873161315918, -81.24633026123047, 137.97665405273438, 61.026119232177734, 299.3701171875, 57.99872589111328, 113.0694351196289, 248.9661865234375, 311.0964050292969, -60.59486389160156, 31.58641815185547, 77.05085754394531, 186.38772583007812, 135.84996032714844, -125.38809204101562, 28.473987579345703, 345.7673645019531, 240.50994873046875, -14.47369384765625, 125.76453399658203, 289.9312438964844, 74.76542663574219, -163.9208526611328, -120.41417694091797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000510.npy"} +{"epoch": 0.7709750566893424, "step": 511, "batch_size": 64, "mean": 74.47544860839844, "std": 98.37300872802734, "min": -116.96864318847656, "p10": -58.128530883789054, "median": 69.7714729309082, "p90": 205.5734756469727, "max": 274.34454345703125, "pos_frac": 0.75, "sample": [274.34454345703125, 196.67726135253906, 93.9460678100586, 186.33920288085938, 33.64377212524414, 0.8002166748046875, 35.2499885559082, 2.4575576782226562, 120.94706726074219, 164.14810180664062, 118.63055419921875, -106.41553497314453, 110.52519226074219, -17.053672790527344, 156.07504272460938, 223.4720001220703, 123.14785766601562, -15.856964111328125, -62.413116455078125, 131.71461486816406, 193.4031982421875, -33.930755615234375, 47.452125549316406, -85.63292694091797, -116.96864318847656, -48.13116455078125, 233.76402282714844, 120.4571533203125, 76.71985626220703, 25.376220703125, 34.248451232910156, 121.15750122070312, 38.10833740234375, 62.823089599609375, 155.0660400390625, 89.03373718261719, 141.15939331054688, 35.85140609741211, -45.996376037597656, 41.86393737792969, 151.9345245361328, 58.68540573120117, 147.4080352783203, 238.80718994140625, 209.38613891601562, 146.86569213867188, -110.01080322265625, 43.5810546875, -14.765657424926758, 196.2158660888672, 35.153602600097656, -1.2124824523925781, 107.79391479492188, 104.32099151611328, 6.356201171875, 50.96147918701172, -64.53680419921875, 248.5084228515625, 141.0923614501953, 124.3798828125, 213.87985229492188, -2.7864723205566406, -89.36801147460938, -32.42635726928711], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000511.npy"} +{"epoch": 0.7724867724867724, "step": 512, "batch_size": 64, "mean": 53.0557861328125, "std": 97.06890106201172, "min": -220.52700805664062, "p10": -72.43657455444335, "median": 45.014644622802734, "p90": 180.73920135498048, "max": 252.9346466064453, "pos_frac": 0.75, "sample": [141.56881713867188, 76.35777282714844, 67.46832275390625, -111.82406616210938, -66.79525756835938, 182.53939819335938, 60.02335739135742, -20.65464210510254, 70.67516326904297, 84.40422058105469, 145.5602569580078, 243.5621795654297, -220.52700805664062, -72.71748352050781, 93.64128875732422, 2.7044219970703125, 33.61554718017578, 6.644481658935547, 157.5782012939453, 5.862878799438477, 192.23291015625, 135.82460021972656, 6.443550109863281, 112.56946563720703, -42.26570129394531, 138.11416625976562, -134.3427734375, 142.82180786132812, 3.5482406616210938, 176.5387420654297, 23.009662628173828, 218.61154174804688, 162.55775451660156, -5.052865982055664, 76.55851745605469, 57.29463195800781, 8.528587341308594, 105.65293884277344, -3.9671993255615234, 187.27752685546875, -34.68180847167969, -93.1809310913086, -16.781587600708008, 38.09474182128906, 42.05560302734375, 252.9346466064453, 57.42206573486328, 15.48055648803711, 37.358062744140625, 31.49313735961914, 143.0860137939453, -14.883243560791016, 19.949371337890625, -117.49057006835938, 34.13554382324219, 120.17132568359375, 13.78118896484375, 104.38995361328125, 187.6985626220703, 145.32998657226562, 47.97368621826172, -71.78112030029297, 104.40864562988281, -93.03717803955078], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000512.npy"} +{"epoch": 0.7739984882842026, "step": 513, "batch_size": 64, "mean": 66.19425964355469, "std": 89.2889404296875, "min": -122.91561889648438, "p10": -25.98064079284668, "median": 52.15108680725098, "p90": 190.94650573730473, "max": 331.7816467285156, "pos_frac": 0.796875, "sample": [280.71240234375, 161.64532470703125, 182.2841339111328, 95.11228942871094, 85.58208465576172, 27.17636489868164, 109.67452239990234, 13.647266387939453, 12.20974349975586, -23.230369567871094, 49.981712341308594, -119.11984252929688, 166.19369506835938, 74.67697143554688, 102.20866394042969, 117.8689193725586, -27.15932846069336, 6.857017517089844, 194.45401000976562, 140.23480224609375, 14.694564819335938, 208.83717346191406, 99.29134368896484, 52.89686965942383, 227.44564819335938, 32.065277099609375, -11.717239379882812, 87.07319641113281, 20.850969314575195, 69.44025421142578, 34.196189880371094, 87.21739196777344, 101.21248626708984, -122.91561889648438, 72.85259246826172, 51.405303955078125, -52.28510284423828, 57.967803955078125, 331.7816467285156, 43.5143928527832, 6.354808807373047, -43.79370880126953, 233.02188110351562, -3.253936767578125, 138.44528198242188, 220.7974090576172, 182.7623291015625, -20.58385467529297, 101.9580078125, 80.31666564941406, 53.37969207763672, 2.5151214599609375, 70.18704986572266, -1.3100128173828125, 16.8858642578125, -57.23069763183594, 50.71900177001953, 37.19472122192383, 4.653560638427734, 37.73503875732422, 105.9119873046875, -57.55878448486328, -15.266769409179688, 35.75676345825195], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000513.npy"} +{"epoch": 0.7755102040816326, "step": 514, "batch_size": 64, "mean": 72.47398376464844, "std": 105.85395812988281, "min": -183.8817138671875, "p10": -42.823921203613274, "median": 77.40316772460938, "p90": 178.51605377197268, "max": 453.29534912109375, "pos_frac": 0.75, "sample": [18.036848068237305, 79.45819091796875, 101.35194396972656, 97.14082336425781, 74.4171142578125, 67.18395233154297, 146.260986328125, 40.13810729980469, 92.28926086425781, -24.44711685180664, 49.41000747680664, 158.48703002929688, 100.15377807617188, 102.05184936523438, 3.1882381439208984, 81.91255950927734, -152.83535766601562, 151.65850830078125, 18.699974060058594, 70.83391571044922, -183.8817138671875, -13.833503723144531, 152.1845703125, 131.59523010253906, 341.7384948730469, -24.441818237304688, 55.545684814453125, 125.060546875, 110.90132141113281, 196.342041015625, -32.7674560546875, -71.56648254394531, 139.6337890625, 136.82028198242188, 50.21727752685547, 80.8020248413086, 125.12921142578125, 159.52163696289062, -5.846282958984375, 61.14528274536133, 18.745323181152344, -64.85757446289062, -91.11386108398438, 13.045265197753906, 125.41047668457031, 88.59815216064453, 75.34814453125, -28.532241821289062, 72.75028228759766, 100.62976837158203, -20.417808532714844, 21.323284149169922, -1.6084518432617188, 226.63809204101562, -114.72802734375, 123.1754379272461, 171.99913024902344, 133.15609741210938, 236.63165283203125, -31.03129768371582, 453.29534912109375, 186.01174926757812, -47.13383483886719, 181.30902099609375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000514.npy"} +{"epoch": 0.7770219198790628, "step": 515, "batch_size": 64, "mean": 67.06163024902344, "std": 104.43751525878906, "min": -155.24285888671875, "p10": -48.000845718383786, "median": 56.44480514526367, "p90": 197.1020248413086, "max": 386.1338806152344, "pos_frac": 0.703125, "sample": [-2.9352760314941406, 70.16371154785156, 42.21825408935547, 52.604042053222656, 252.36216735839844, 142.77488708496094, 24.97921371459961, 154.34901428222656, 31.988204956054688, 194.79864501953125, 174.90689086914062, 70.61234283447266, 94.44799041748047, -42.285682678222656, 89.66600036621094, 21.78460693359375, -66.72030639648438, 194.1359100341797, -122.23882293701172, -44.8880615234375, 80.16117095947266, 21.586105346679688, -0.417022705078125, 15.583450317382812, -155.24285888671875, 154.03158569335938, 198.0891876220703, 60.28556823730469, -35.00102996826172, -41.80852508544922, 42.00951385498047, 129.9293670654297, 34.05210494995117, 18.5115909576416, 101.59037017822266, 161.83782958984375, -76.91954040527344, 163.90188598632812, -28.0550594329834, 163.06199645996094, -49.334896087646484, 77.93353271484375, 242.23081970214844, -96.31848907470703, 67.28581237792969, 306.55621337890625, 13.379365921020508, -13.738973617553711, 85.2966079711914, 141.31536865234375, 24.12792205810547, 51.67900848388672, 94.62664794921875, 227.65968322753906, 211.17359924316406, -31.456958770751953, 129.55715942382812, -3.4682579040527344, 386.1338806152344, -71.02941131591797, 80.02701568603516, 88.23977661132812, -2.3944568634033203, -7.447853088378906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000515.npy"} +{"epoch": 0.7785336356764928, "step": 516, "batch_size": 64, "mean": 63.619972229003906, "std": 106.1317138671875, "min": -210.59628295898438, "p10": -40.442189025878896, "median": 61.634178161621094, "p90": 178.92009277343752, "max": 431.1020202636719, "pos_frac": 0.734375, "sample": [171.25888061523438, 218.63194274902344, 79.46797180175781, -44.60975646972656, -88.76358032226562, 431.1020202636719, -6.552894592285156, 87.33323669433594, 157.541748046875, 99.06770324707031, 54.44512176513672, 78.35371398925781, 166.8831787109375, 240.1492462158203, 26.021053314208984, 9.073709487915039, 88.59774780273438, 172.75967407226562, 212.5849151611328, 82.55043029785156, -26.116188049316406, -164.76266479492188, 64.17729187011719, 93.99480438232422, 137.29876708984375, 90.0903091430664, 172.4187774658203, 16.564544677734375, 138.38461303710938, 153.52777099609375, -210.59628295898438, -30.717864990234375, 0.4088916778564453, -23.598533630371094, -4.040046691894531, 168.394775390625, 146.63140869140625, -3.1502208709716797, 39.72066879272461, -14.274913787841797, 102.80429077148438, -152.69027709960938, 110.7137222290039, 181.56027221679688, 51.371337890625, 19.043838500976562, 132.7499542236328, 18.875545501708984, 50.09886932373047, 59.091064453125, 13.047447204589844, 82.02103424072266, 31.663818359375, 4.7494049072265625, -80.19735717773438, 114.42872619628906, -15.917415618896484, 88.30567932128906, -14.125816345214844, -5.823947906494141, 200.4214630126953, 1.3243370056152344, -106.93721771240234, 204.84754943847656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000516.npy"} +{"epoch": 0.780045351473923, "step": 517, "batch_size": 64, "mean": 51.72804641723633, "std": 105.50260162353516, "min": -129.02362060546875, "p10": -63.38309783935546, "median": 21.607465744018555, "p90": 216.97792205810558, "max": 366.40008544921875, "pos_frac": 0.671875, "sample": [122.2252197265625, -33.3309326171875, -12.395328521728516, 57.781829833984375, -68.76239776611328, 25.897754669189453, -40.01219177246094, 91.75401306152344, 229.13027954101562, 90.7105712890625, 5.2863311767578125, -39.311500549316406, -83.08206176757812, 12.944267272949219, 18.134033203125, -50.831398010253906, 2.5032482147216797, 4.227149963378906, 63.264007568359375, 12.206474304199219, 71.76435852050781, -97.19346618652344, 272.9995422363281, 289.9981384277344, -80.02291870117188, 50.675811767578125, -12.863555908203125, 62.41033172607422, 16.038543701171875, 125.00091552734375, 33.51384735107422, 261.675537109375, 30.724245071411133, 60.188453674316406, -26.411224365234375, -74.84650421142578, 20.601638793945312, 366.40008544921875, -36.95896911621094, -19.188560485839844, 48.46228790283203, 188.62242126464844, 267.8846435546875, 19.533580780029297, 102.84471130371094, 165.1812286376953, 22.613292694091797, 88.17918395996094, -129.02362060546875, -24.02716827392578, -69.26363372802734, 102.01396179199219, -17.637130737304688, 174.63003540039062, 244.68890380859375, 168.143798828125, 40.01170349121094, -35.40239334106445, 18.12604522705078, 10.830387115478516, 167.68112182617188, -50.685882568359375, 105.60174560546875, -21.259796142578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000517.npy"} +{"epoch": 0.781557067271353, "step": 518, "batch_size": 64, "mean": 62.14179229736328, "std": 98.20014953613281, "min": -107.57855224609375, "p10": -54.574828338623036, "median": 33.11054801940918, "p90": 190.03481903076172, "max": 332.30426025390625, "pos_frac": 0.703125, "sample": [110.73286437988281, -9.635650634765625, 5.288871765136719, 188.33120727539062, 12.0220947265625, 14.176925659179688, -2.0087356567382812, 3.044971466064453, -95.5401611328125, 69.44446563720703, 55.54110336303711, -17.308624267578125, 54.676551818847656, 301.3184814453125, 131.68380737304688, -65.34822082519531, 163.3827362060547, 145.9410858154297, -98.24530792236328, 129.91091918945312, 196.52880859375, -107.57855224609375, -15.430961608886719, 76.01144409179688, 114.10899353027344, 120.95228576660156, -65.37921142578125, 126.23773193359375, -68.90824890136719, 184.73907470703125, 36.96621322631836, -17.3957576751709, -0.5939273834228516, 116.22509765625, 112.77604675292969, 37.64775085449219, 20.071460723876953, -18.715606689453125, 62.36054992675781, 266.06402587890625, 11.685134887695312, 22.599769592285156, 6.599573135375977, 216.53717041015625, 93.44792175292969, 145.35675048828125, -32.12846374511719, 190.7649383544922, 203.2530517578125, -60.331565856933594, -41.14244079589844, -4.077005386352539, 139.18789672851562, -2.047475814819336, 20.72998809814453, 180.1092529296875, 154.9736328125, 29.2548828125, 7.23261833190918, -33.87367248535156, 18.435558319091797, 1.4213314056396484, 332.30426025390625, 102.68510437011719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000518.npy"} +{"epoch": 0.783068783068783, "step": 519, "batch_size": 64, "mean": 37.70978546142578, "std": 119.4354019165039, "min": -248.5994873046875, "p10": -103.7517723083496, "median": 31.746665954589844, "p90": 174.14893188476563, "max": 335.63177490234375, "pos_frac": 0.65625, "sample": [-151.36639404296875, 158.79745483398438, 56.464874267578125, -182.31884765625, -64.01435852050781, 159.48629760742188, 1.7110710144042969, 231.55892944335938, 4.6404876708984375, 97.69915771484375, 4.307373046875, 8.401224136352539, 150.00741577148438, -49.196800231933594, 132.41470336914062, 72.04481506347656, 151.9767608642578, 20.84977149963379, -164.94122314453125, -17.2752685546875, 168.52420043945312, 335.63177490234375, -145.28858947753906, 125.62814331054688, 155.7973175048828, 170.39266967773438, -108.9488525390625, 37.47465515136719, 67.39146423339844, 146.32992553710938, -48.963531494140625, 180.69677734375, 31.788497924804688, 13.899063110351562, 68.16043090820312, -63.57159423828125, -20.611251831054688, 15.846446990966797, 244.31361389160156, -88.92054748535156, 82.16384887695312, 10.167442321777344, 175.75875854492188, 62.536705017089844, 49.048072814941406, -248.5994873046875, 31.34320068359375, -52.26380920410156, 31.704833984375, -91.62525177001953, 159.1642303466797, -84.40798950195312, -88.19100952148438, 210.6348876953125, 33.98085021972656, 55.55579376220703, 169.35792541503906, 128.97076416015625, -55.310604095458984, -138.23239135742188, -33.063987731933594, -58.63813018798828, 239.72213745117188, -83.16862487792969], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000519.npy"} +{"epoch": 0.7845804988662132, "step": 520, "batch_size": 64, "mean": 59.25237274169922, "std": 105.83686828613281, "min": -117.2446517944336, "p10": -53.557209777832014, "median": 35.81147003173828, "p90": 173.79326171875007, "max": 502.9090881347656, "pos_frac": 0.75, "sample": [145.25860595703125, 4.228811264038086, -8.972042083740234, -0.6507606506347656, 132.0780487060547, 502.9090881347656, 70.87120056152344, 17.578208923339844, 0.6928367614746094, 389.5809326171875, 122.60087585449219, 20.05545997619629, 7.2510223388671875, -33.0953369140625, 71.843017578125, 64.6970443725586, 34.813995361328125, -62.32658386230469, 10.13692855834961, 194.9761962890625, 157.27789306640625, 52.30473709106445, 31.96840476989746, 198.72608947753906, 63.23712158203125, 65.9731216430664, 128.9886016845703, -77.38188171386719, -117.2446517944336, 102.01426696777344, 104.90298461914062, -4.2691802978515625, -32.69489288330078, 3.8715744018554688, 36.80894470214844, -0.5852775573730469, 180.87127685546875, 86.6454086303711, -16.362869262695312, -76.38239288330078, -112.75297546386719, 28.002273559570312, 7.1501312255859375, -94.59584045410156, 15.158782958984375, 54.00830078125, 20.466781616210938, 99.58808898925781, 99.10415649414062, 69.01315307617188, 27.250625610351562, 198.6293487548828, 108.12821960449219, 68.64546966552734, 254.9033966064453, 149.16644287109375, 62.386688232421875, 7.3612213134765625, -15.00350570678711, -6.452384948730469, 0.22843170166015625, 132.9713897705078, 131.30821228027344, -85.71125793457031], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000520.npy"} +{"epoch": 0.7860922146636432, "step": 521, "batch_size": 64, "mean": 79.99005126953125, "std": 93.42823791503906, "min": -160.2666015625, "p10": -17.659706878662107, "median": 74.5955924987793, "p90": 184.07873535156253, "max": 365.8417053222656, "pos_frac": 0.8125, "sample": [112.56676483154297, 59.272216796875, 186.2620849609375, 23.17425537109375, 168.80239868164062, 42.82231903076172, 160.9371337890625, -7.371131896972656, -1.6273651123046875, 78.16427612304688, 44.583526611328125, -13.20870590209961, 105.27366638183594, 44.352455139160156, 155.59654235839844, 88.33451080322266, 46.14845275878906, 56.97478485107422, -28.197372436523438, 169.8223876953125, 8.941581726074219, 174.38726806640625, 39.98876953125, 62.70867919921875, -16.55497932434082, 102.04793548583984, -114.38121032714844, 162.29934692382812, 57.997650146484375, 119.57208251953125, 226.0735626220703, 365.8417053222656, -76.80725860595703, 32.20803451538086, 32.62462615966797, 19.18498992919922, 27.452064514160156, 73.27188110351562, 5.0111846923828125, 329.83465576171875, 204.3144073486328, 7.531810760498047, -8.354499816894531, 108.4100570678711, -18.133161544799805, 80.04914093017578, 151.4525909423828, 115.53968811035156, 61.69648361206055, 110.87739562988281, 111.70683288574219, 93.32012176513672, -160.2666015625, 258.7587890625, 226.44775390625, 84.242431640625, 66.24256134033203, -35.193450927734375, 113.48731994628906, -21.776565551757812, 107.44607543945312, 75.91930389404297, 82.27482604980469, 178.9842529296875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000521.npy"} +{"epoch": 0.7876039304610734, "step": 522, "batch_size": 64, "mean": 51.697486877441406, "std": 116.70186614990234, "min": -266.6222229003906, "p10": -90.35977706909179, "median": 36.834983825683594, "p90": 188.44877624511722, "max": 402.55718994140625, "pos_frac": 0.703125, "sample": [-56.16229248046875, 95.74590301513672, 2.3383655548095703, 17.21923065185547, 67.16753387451172, 183.2496337890625, 37.724220275878906, 190.0889892578125, 182.64044189453125, 27.505773544311523, -152.1504364013672, -130.83731079101562, 48.55607604980469, 210.70718383789062, 35.28776550292969, 122.08809661865234, -68.16393280029297, 92.03846740722656, 148.00680541992188, 24.564878463745117, 150.86863708496094, 24.853504180908203, 33.284385681152344, -266.6222229003906, 184.62161254882812, 16.42303466796875, 48.3878059387207, -83.04366302490234, -93.511962890625, 42.48783874511719, -8.773460388183594, 190.42156982421875, -40.921630859375, 168.29478454589844, 155.00357055664062, 24.953781127929688, -24.822662353515625, 69.88871765136719, -18.691158294677734, 35.94574737548828, 74.3793716430664, -1.5906219482421875, 19.53430938720703, -60.452919006347656, 105.69276428222656, 0.7480010986328125, 159.75962829589844, -172.3870086669922, 210.58355712890625, -43.12994384765625, 221.61517333984375, -150.3615264892578, 135.87457275390625, 402.55718994140625, -3.152557373046875, 40.13298797607422, -93.49525451660156, 245.1667938232422, 127.539794921875, 157.41384887695312, 24.72666358947754, -65.80232238769531, 114.16693115234375, 172.4561767578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000522.npy"} +{"epoch": 0.7891156462585034, "step": 523, "batch_size": 64, "mean": 65.05988311767578, "std": 108.41600036621094, "min": -199.02276611328125, "p10": -62.685369110107416, "median": 62.47275924682617, "p90": 209.75678100585944, "max": 375.0145263671875, "pos_frac": 0.734375, "sample": [17.076541900634766, 241.30911254882812, -9.82033920288086, 54.18279266357422, 117.86565399169922, 103.44335174560547, 57.10072326660156, 256.0911865234375, 155.52328491210938, 42.388031005859375, 1.214345932006836, 79.73155212402344, 185.44354248046875, -119.824462890625, 375.0145263671875, 60.95222473144531, 92.96099853515625, -12.132570266723633, 77.50735473632812, -119.40775299072266, 22.504669189453125, 144.99981689453125, -25.713653564453125, 53.855552673339844, -107.62628173828125, 126.40992736816406, -9.096351623535156, 22.05910873413086, 81.61890411376953, 85.5830078125, 56.649681091308594, 124.95597839355469, 247.83377075195312, 134.62730407714844, 132.32017517089844, 152.22158813476562, 197.6244659423828, 42.11985778808594, -23.70728302001953, -199.02276611328125, 72.05569458007812, -81.93827056884766, 240.68795776367188, 102.6547622680664, 113.00700378417969, -48.017982482910156, -12.57357406616211, 58.49591064453125, 105.27973937988281, -66.61554718017578, 214.9563446044922, 40.91646194458008, 64.98590087890625, -52.848350524902344, 235.51904296875, 106.5761947631836, 63.99329376220703, 173.270751953125, 45.66761016845703, -182.48048400878906, -52.87775421142578, 46.71424865722656, -53.51495361328125, 113.08070373535156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000523.npy"} +{"epoch": 0.7906273620559335, "step": 524, "batch_size": 64, "mean": 54.18507385253906, "std": 90.21692657470703, "min": -165.85153198242188, "p10": -36.77284927368163, "median": 44.700531005859375, "p90": 153.34962310791016, "max": 458.5036926269531, "pos_frac": 0.828125, "sample": [114.03427124023438, 49.74744415283203, 458.5036926269531, 38.2037467956543, 11.475067138671875, -17.94646453857422, -41.28398895263672, 11.7073974609375, 83.48094177246094, 32.35440444946289, 154.02426147460938, 6.267601013183594, -99.49031829833984, 76.78890991210938, 3.01336669921875, 175.98965454101562, -142.4237060546875, -60.102783203125, 41.67592239379883, 151.7754669189453, 104.91334533691406, -165.85153198242188, 134.09278869628906, 4.086299896240234, 15.331878662109375, 42.63481140136719, 141.25494384765625, 51.98542022705078, 107.58578491210938, 7.414093017578125, -17.277061462402344, 80.01306915283203, 25.7139892578125, 56.39398193359375, 121.03768920898438, 65.17835998535156, 29.40863609313965, 82.15531921386719, 65.89338684082031, -26.246856689453125, -75.8087158203125, -19.168304443359375, 24.539451599121094, 201.1629638671875, 29.703142166137695, 41.66217803955078, 170.7308807373047, 12.19924545288086, 83.21115112304688, 11.276580810546875, 66.3094482421875, 46.76625061035156, 70.97797393798828, 170.19241333007812, 74.99687194824219, 121.6607894897461, 136.06756591796875, 76.17107391357422, -71.44033813476562, 20.40087127685547, 175.22012329101562, 10.936973571777344, 94.15928649902344, 22.40349578857422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000524.npy"} +{"epoch": 0.7921390778533636, "step": 525, "batch_size": 64, "mean": 46.000911712646484, "std": 89.1210708618164, "min": -125.61289978027344, "p10": -58.8330810546875, "median": 34.97364044189453, "p90": 172.6895233154297, "max": 303.82830810546875, "pos_frac": 0.6875, "sample": [34.481597900390625, -114.71347045898438, -78.70681762695312, -51.34713363647461, -59.94599151611328, 35.22743225097656, 63.488075256347656, 22.806854248046875, 222.59957885742188, -35.753440856933594, 97.24333190917969, 16.446929931640625, 102.84797668457031, 21.636871337890625, 34.7198486328125, 98.49697875976562, 101.51887512207031, -56.236289978027344, 159.22886657714844, -40.7152099609375, 30.48773956298828, -15.425678253173828, 183.97586059570312, 159.44467163085938, -12.874397277832031, 80.12821960449219, -15.9915771484375, 79.35012817382812, 46.97003936767578, 93.50311279296875, 0.1149749755859375, 179.50494384765625, 44.818450927734375, 115.20079040527344, 99.70542907714844, 17.784469604492188, 15.878459930419922, 2.4104461669921875, 303.82830810546875, 36.019744873046875, -125.61289978027344, 55.188201904296875, 48.20804977416992, -115.17827606201172, -29.720352172851562, 201.41246032714844, 13.720226287841797, -18.71074676513672, 117.09695434570312, 153.30545043945312, -18.377098083496094, 176.3031005859375, 33.87152862548828, -16.392303466796875, 82.37821197509766, 164.25784301757812, -78.92718505859375, 80.04969787597656, 77.63046264648438, 91.13639831542969, -24.96778106689453, 181.07009887695312, -16.055089950561523, -105.78764343261719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000525.npy"} +{"epoch": 0.7936507936507936, "step": 526, "batch_size": 64, "mean": 47.694358825683594, "std": 90.37165832519531, "min": -150.8576202392578, "p10": -61.53966178894043, "median": 44.06809616088867, "p90": 162.1349502563477, "max": 297.1781921386719, "pos_frac": 0.75, "sample": [200.56809997558594, 15.507061004638672, -18.644723892211914, 28.56348991394043, -41.87488555908203, 20.378433227539062, 28.26297378540039, 152.4277801513672, -150.8576202392578, -65.10343933105469, 28.67742919921875, 74.10124206542969, -109.57331085205078, 25.062835693359375, 107.72625732421875, -16.75670623779297, 68.10807800292969, 202.46084594726562, 29.91912078857422, 114.55176544189453, 46.78424072265625, 178.1653289794922, 78.37504577636719, 134.92123413085938, 137.68283081054688, 1.7227439880371094, 127.43924713134766, 53.479530334472656, -47.403053283691406, 297.1781921386719, -12.556398391723633, 61.86389923095703, 41.351951599121094, -71.2342300415039, 166.295166015625, -3.348146438598633, 75.0198745727539, 3.2264480590820312, 71.12664794921875, 5.930427551269531, 151.07968139648438, 60.427181243896484, 15.398391723632812, 51.20270538330078, 102.46563720703125, -144.89413452148438, -13.410991668701172, 32.78253936767578, 110.96649169921875, -55.679351806640625, 93.22802734375, 20.560962677001953, 267.4762268066406, 72.34100341796875, 184.49737548828125, 15.455001831054688, -62.69246292114258, 52.919010162353516, 91.77754974365234, 53.06837463378906, 30.377487182617188, -58.84979248046875, 83.49566650390625, -141.0794677734375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000526.npy"} +{"epoch": 0.7951625094482238, "step": 527, "batch_size": 64, "mean": 37.480682373046875, "std": 93.43035125732422, "min": -157.48812866210938, "p10": -65.02089309692383, "median": 23.713302612304688, "p90": 177.8575668334961, "max": 270.7969970703125, "pos_frac": 0.59375, "sample": [-10.834360122680664, 13.831253051757812, 47.42351531982422, 57.326171875, 21.047531127929688, -3.710540771484375, 178.18154907226562, 54.83726119995117, 165.30453491210938, 210.97882080078125, 164.59954833984375, -63.3489990234375, 189.385009765625, 29.10437774658203, 96.10910034179688, -74.50420379638672, 255.41470336914062, -10.589944839477539, 83.40444946289062, -62.033233642578125, -65.73741912841797, -82.94889068603516, -62.42649841308594, -36.15834045410156, 78.03421020507812, 24.388153076171875, 270.7969970703125, 51.69670104980469, 10.087352752685547, 23.0384521484375, 90.97317504882812, 6.697021484375, -38.813262939453125, 83.5806884765625, 43.0469856262207, 201.98898315429688, 64.17705535888672, -24.94866943359375, -14.919416427612305, 121.54415893554688, 96.5897216796875, -8.152202606201172, 124.77493286132812, -21.946826934814453, -26.504150390625, -41.88902282714844, -118.98019409179688, 177.1016082763672, -52.06111145019531, -33.503021240234375, -78.48974609375, 15.428319931030273, 102.43045043945312, 102.03903198242188, -62.51045227050781, -21.03815460205078, 89.81594848632812, 29.134010314941406, 83.1898193359375, 180.873779296875, -157.48812866210938, -98.60841369628906, 68.59481811523438, -36.061317443847656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000527.npy"} +{"epoch": 0.7966742252456538, "step": 528, "batch_size": 64, "mean": 42.81523513793945, "std": 107.91165924072266, "min": -319.13525390625, "p10": -39.06600570678711, "median": 39.567949295043945, "p90": 167.63772583007815, "max": 288.2633361816406, "pos_frac": 0.6875, "sample": [47.61854553222656, -319.13525390625, 198.79531860351562, -16.114885330200195, 72.13814544677734, 122.20858001708984, 86.61775207519531, 4.21435546875, 30.55170440673828, 15.369552612304688, -6.669158935546875, -16.881912231445312, 115.70021057128906, -162.2061767578125, -17.60542869567871, -36.16017150878906, 161.5534210205078, 11.118062973022461, -3.2796802520751953, -28.209930419921875, -30.16259002685547, 283.1120300292969, 35.63903045654297, -182.93624877929688, -13.682853698730469, 68.9771728515625, -18.78626251220703, -40.311363220214844, 25.585905075073242, 116.10015106201172, 67.36717224121094, 32.753623962402344, 75.40293884277344, 288.2633361816406, 84.58888244628906, -31.25181770324707, 249.13572692871094, 85.8712387084961, 170.2452850341797, 60.817474365234375, 145.42617797851562, 150.6318817138672, 36.82716369628906, 46.925323486328125, -138.46925354003906, 109.03471374511719, 7.427692413330078, 22.6309814453125, 106.64232635498047, 6.569328308105469, 66.51850891113281, -157.36297607421875, -149.19407653808594, 132.78839111328125, 205.98660278320312, 42.30873489379883, 2.647113800048828, -21.763206481933594, 156.17294311523438, -29.33547019958496, 99.60037994384766, 72.93721771240234, 176.23348999023438, 62.63911437988281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000528.npy"} +{"epoch": 0.7981859410430839, "step": 529, "batch_size": 64, "mean": 91.49447631835938, "std": 92.07616424560547, "min": -109.0980453491211, "p10": -16.332379150390615, "median": 101.81355667114258, "p90": 197.9495590209961, "max": 349.2822570800781, "pos_frac": 0.828125, "sample": [119.84063720703125, -54.572566986083984, 41.99900817871094, 15.534996032714844, 57.11221694946289, -4.9008331298828125, 285.587158203125, 194.77523803710938, 25.164142608642578, 23.485885620117188, -34.678932189941406, -0.04325294494628906, 7.100982666015625, 92.10277557373047, 133.11912536621094, 115.20245361328125, -109.0980453491211, 133.48648071289062, -4.313385009765625, 154.5126953125, 198.72171020507812, 7.2049713134765625, -23.945953369140625, 121.56896209716797, 102.85698699951172, 175.72755432128906, 23.16063690185547, 12.660469055175781, 155.26611328125, 32.21357727050781, 349.2822570800781, 142.42579650878906, 114.7601318359375, 176.8248748779297, 127.79790496826172, 179.84402465820312, 131.5106201171875, 200.68870544433594, 16.75307846069336, 174.34976196289062, 111.71762084960938, 157.71490478515625, 228.66273498535156, 65.98130798339844, 152.385009765625, 162.904052734375, 55.99919128417969, -4.2129669189453125, 67.1701431274414, 45.739288330078125, 70.59980010986328, 106.5084457397461, 24.039926528930664, 196.1478729248047, -72.02618408203125, 211.81591796875, 99.33040618896484, 155.4207305908203, 105.19511413574219, 100.77012634277344, -100.62289428710938, 68.93326568603516, -21.231613159179688, 255.61544799804688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000529.npy"} +{"epoch": 0.799697656840514, "step": 530, "batch_size": 64, "mean": 53.997432708740234, "std": 94.67193603515625, "min": -173.6663055419922, "p10": -42.930221939086906, "median": 41.88303565979004, "p90": 166.1064682006836, "max": 392.58099365234375, "pos_frac": 0.671875, "sample": [392.58099365234375, 58.253787994384766, 74.06605529785156, -45.53876876831055, -9.528221130371094, -6.7583465576171875, 239.99447631835938, -4.188972473144531, 110.80781555175781, 123.67434692382812, 23.514453887939453, 229.51947021484375, -21.8939151763916, 19.02313232421875, 85.88632202148438, 63.91065216064453, 41.914337158203125, 45.03010559082031, 161.10296630859375, -12.670562744140625, -8.682422637939453, 2.560699462890625, -79.19047546386719, 155.74026489257812, 166.10983276367188, 3.524972915649414, 113.00483703613281, 80.42227172851562, 49.60810089111328, 188.50701904296875, -12.263923645019531, -2.4464187622070312, 194.2645263671875, 188.71524047851562, 71.93336486816406, 8.909490585327148, 41.85173416137695, 137.79800415039062, -97.56546020507812, 38.024173736572266, -2.5449371337890625, 91.86341857910156, -92.04497528076172, 74.22357177734375, -48.95110321044922, -48.799644470214844, 148.41416931152344, -36.84361267089844, -173.6663055419922, 6.7567138671875, -9.464471817016602, -35.395912170410156, 138.6193389892578, -31.982311248779297, 147.4726104736328, 66.84849548339844, 155.14584350585938, -35.02854919433594, 47.12945556640625, 35.18626403808594, 166.09861755371094, 27.748825073242188, 2.9620933532714844, 52.56231689453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000530.npy"} +{"epoch": 0.8012093726379441, "step": 531, "batch_size": 64, "mean": 53.90513610839844, "std": 98.0972671508789, "min": -190.5341339111328, "p10": -54.4348258972168, "median": 38.375633239746094, "p90": 184.04981079101563, "max": 333.7026062011719, "pos_frac": 0.703125, "sample": [-55.79689025878906, 132.10809326171875, 144.57830810546875, 131.0752410888672, 123.23041534423828, 38.1954345703125, -20.55646514892578, 333.7026062011719, 47.82630920410156, -9.98910140991211, 59.48515319824219, 3.533445358276367, 185.38580322265625, -37.29963684082031, 154.96127319335938, 161.28396606445312, -100.60921478271484, -51.256675720214844, 6.075431823730469, 186.68443298339844, 69.23225402832031, 196.1349334716797, -99.0860824584961, -64.09300994873047, -26.88861083984375, 38.55583190917969, 62.18248748779297, 45.463104248046875, 51.94584655761719, 2.9193344116210938, 31.95423126220703, -14.240365982055664, -0.22777366638183594, 21.121322631835938, -190.5341339111328, -13.326366424560547, 108.0738525390625, 101.0960922241211, 48.103782653808594, -41.11543273925781, 17.0594482421875, 64.53856658935547, 180.9324951171875, 157.3367919921875, -88.76280212402344, -123.68189239501953, 147.7863311767578, 101.09808349609375, 82.38931274414062, 30.139068603515625, 26.05408477783203, -9.31503677368164, 33.8563232421875, 160.65098571777344, 103.06924438476562, 9.09438705444336, 112.55032348632812, -40.974361419677734, 221.5608367919922, 264.8255920410156, 5.729667663574219, -6.478265762329102, 213.63592529296875, 26.944351196289062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000531.npy"} +{"epoch": 0.8027210884353742, "step": 532, "batch_size": 64, "mean": 62.78184127807617, "std": 102.39450073242188, "min": -128.78958129882812, "p10": -69.84070892333983, "median": 59.98585319519043, "p90": 194.87121887207033, "max": 370.8572692871094, "pos_frac": 0.734375, "sample": [92.5277099609375, 106.91667175292969, 278.3044128417969, 370.8572692871094, -20.627838134765625, 1.3420944213867188, 75.6543197631836, 191.4373779296875, 15.803508758544922, 171.10121154785156, -61.976165771484375, 96.46331787109375, 134.9950408935547, 10.9644775390625, 71.40898132324219, -1.049041748046875, 42.459110260009766, 99.84375, 24.338788986206055, -125.26271057128906, -57.57710266113281, 44.51885223388672, 138.13507080078125, 54.07959747314453, 52.171966552734375, 59.52995300292969, 69.88101196289062, 218.2876739501953, 127.54966735839844, 196.34286499023438, 247.1692657470703, 11.39162826538086, -28.764923095703125, 227.93328857421875, 35.271514892578125, 93.78031921386719, 98.29818725585938, -87.67491912841797, 34.32307434082031, 9.275285720825195, -79.34767150878906, -73.21122741699219, -1.8925209045410156, -30.449275970458984, 189.967529296875, 80.13741302490234, 175.4381103515625, -23.784523010253906, 51.13996887207031, 151.43154907226562, 60.44175338745117, 66.96390533447266, -10.031166076660156, 204.25982666015625, -126.15113067626953, 104.11344909667969, -117.18877410888672, 88.3983154296875, 153.8197021484375, -128.78958129882812, 2.4418506622314453, -21.203393936157227, 80.67076110839844, 101.43838500976562], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000532.npy"} +{"epoch": 0.8042328042328042, "step": 533, "batch_size": 64, "mean": 70.71826934814453, "std": 95.48406982421875, "min": -166.00291442871094, "p10": -27.360737228393553, "median": 62.20289611816406, "p90": 188.08537292480472, "max": 349.16058349609375, "pos_frac": 0.703125, "sample": [222.43978881835938, 156.79917907714844, 56.700843811035156, 83.42548370361328, 146.74874877929688, 76.20040130615234, -2.976959228515625, -2.5536231994628906, 148.24432373046875, 137.31857299804688, -44.7119140625, 19.758319854736328, -6.424777984619141, -8.895788192749023, 126.2349853515625, 349.16058349609375, 56.76341247558594, 131.64535522460938, -31.621063232421875, -0.25606536865234375, 7.625495910644531, 31.76127052307129, 129.38954162597656, -4.7369842529296875, -49.169105529785156, 91.4184341430664, 108.08110046386719, 63.1368408203125, 33.784461975097656, -151.3226776123047, -6.364715576171875, 26.113658905029297, 152.89535522460938, 107.6603012084961, -16.519014358520508, 194.62367248535156, -17.0794677734375, 286.6251525878906, 51.399192810058594, -28.45370864868164, 61.268951416015625, 98.25725555419922, 122.24172973632812, 117.5968246459961, 219.33114624023438, 190.7498779296875, 17.816055297851562, -51.58903884887695, 29.318588256835938, -7.083106994628906, -1.544595718383789, 156.26226806640625, 148.73101806640625, -24.810470581054688, 0.9561176300048828, 14.508979797363281, 164.2326202392578, 102.08062744140625, 69.17584991455078, 181.86819458007812, 144.20791625976562, 229.19032287597656, -166.00291442871094, 84.33634948730469], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000533.npy"} +{"epoch": 0.8057445200302343, "step": 534, "batch_size": 64, "mean": 62.22846221923828, "std": 98.24252319335938, "min": -196.8139190673828, "p10": -58.40932502746582, "median": 69.07070922851562, "p90": 181.91366424560547, "max": 319.49932861328125, "pos_frac": 0.6875, "sample": [40.91332244873047, -14.588541030883789, 195.002197265625, 82.25863647460938, 12.05009651184082, 90.27519989013672, -1.5257415771484375, -196.8139190673828, 108.40988159179688, -86.12528991699219, -9.019868850708008, 7.35748291015625, 167.7683563232422, 154.34548950195312, -59.468162536621094, 319.49932861328125, 99.23815155029297, 56.43336868286133, 103.19963073730469, -44.748382568359375, 141.75738525390625, 212.93214416503906, 112.98747253417969, 69.88726806640625, -33.119789123535156, -71.73590087890625, -148.3170166015625, -15.753255844116211, 210.61566162109375, 48.02227783203125, 70.36509704589844, -72.12258911132812, 178.1388702392578, 205.2951202392578, 154.10975646972656, 71.22726440429688, 34.577877044677734, 44.813270568847656, 279.8526611328125, 168.2786102294922, 110.82637023925781, 143.83273315429688, -14.698661804199219, 27.090118408203125, 128.53013610839844, 68.254150390625, -9.748886108398438, 98.71049499511719, 46.39112091064453, -9.660385131835938, 15.7266845703125, 183.53143310546875, 97.208984375, -25.938732147216797, -28.364959716796875, 36.83103942871094, 102.70787811279297, -58.88026428222656, 91.18048095703125, -19.718826293945312, 101.24022674560547, 98.34331512451172, 170.26406860351562, -57.31046676635742], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000534.npy"} +{"epoch": 0.8072562358276644, "step": 535, "batch_size": 64, "mean": 58.443016052246094, "std": 105.11847686767578, "min": -202.44822692871094, "p10": -51.73934478759765, "median": 43.55280685424805, "p90": 205.65453948974616, "max": 368.217041015625, "pos_frac": 0.6875, "sample": [27.150474548339844, -39.66545486450195, 142.08753967285156, 101.42448425292969, 1.0020980834960938, 67.32806396484375, -124.340087890625, -43.857242584228516, -104.34406280517578, 45.96473693847656, 257.48443603515625, -14.964399337768555, 41.14087677001953, 132.8035888671875, 3.298553466796875, -10.819572448730469, -44.39764404296875, 67.33612060546875, -0.9876308441162109, -42.10704040527344, 69.37709045410156, 68.94956970214844, 137.677490234375, -43.35711669921875, 128.33056640625, 32.845191955566406, 229.2454833984375, -110.93190002441406, -83.13947296142578, 39.63147735595703, 191.1486053466797, 222.4747314453125, 109.84321594238281, -5.583610534667969, 89.75515747070312, 127.85909271240234, 33.79456329345703, 368.217041015625, -18.5103759765625, 89.71595001220703, 20.91077423095703, -57.20701599121094, 272.05126953125, 109.04731750488281, 2.0542449951171875, -202.44822692871094, 8.372802734375, 130.03192138671875, -19.696109771728516, 102.77417755126953, 94.66698455810547, -54.88578796386719, 34.22372817993164, 211.87136840820312, 161.2601776123047, 76.17857360839844, 39.415863037109375, 183.85577392578125, 109.902099609375, -22.66925048828125, 52.47501754760742, -30.05046844482422, 228.11370849609375, 151.22366333007812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000535.npy"} +{"epoch": 0.8087679516250945, "step": 536, "batch_size": 64, "mean": 81.815673828125, "std": 83.45316314697266, "min": -66.76708984375, "p10": -10.921430587768555, "median": 65.26588439941406, "p90": 193.7341247558594, "max": 278.00408935546875, "pos_frac": 0.78125, "sample": [74.12730407714844, 15.897865295410156, -29.736854553222656, 171.2118377685547, 153.81484985351562, 53.47474670410156, 23.568603515625, -10.856048583984375, 88.86666870117188, 43.457275390625, -66.76708984375, 74.34074401855469, 155.05267333984375, 184.39846801757812, 131.14990234375, 123.39163970947266, 128.68161010742188, 187.173583984375, 278.00408935546875, 82.08568572998047, 58.714324951171875, -3.959318161010742, 212.85467529296875, 270.76318359375, 41.44187927246094, 218.41845703125, 117.25849914550781, -4.745038986206055, 122.6569595336914, 98.48458862304688, 194.8277587890625, -10.949451446533203, 8.602153778076172, 55.86320877075195, -19.426513671875, 7.82012939453125, 183.08677673339844, 5.7048797607421875, 65.691650390625, 37.11944580078125, 173.0889892578125, 157.4798126220703, 87.4586181640625, -12.654436111450195, 28.311145782470703, 40.43757629394531, 9.910881042480469, 126.9220962524414, 64.84011840820312, 237.59988403320312, -21.693510055541992, 145.4126739501953, 34.34477233886719, 191.18231201171875, 91.20100402832031, 51.35736083984375, -9.673530578613281, 226.2532501220703, -9.739591598510742, -5.9632415771484375, 47.38939666748047, 88.54901123046875, -20.903717041015625, -6.4737091064453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000536.npy"} +{"epoch": 0.8102796674225246, "step": 537, "batch_size": 64, "mean": 60.93144226074219, "std": 97.22217559814453, "min": -145.29306030273438, "p10": -70.37139892578124, "median": 66.09057235717773, "p90": 184.94546203613282, "max": 257.75823974609375, "pos_frac": 0.6875, "sample": [113.51588439941406, 66.0987319946289, 19.055419921875, 81.505615234375, 170.51904296875, 185.42727661132812, 105.42048645019531, -0.1794872283935547, 110.67694091796875, 89.54177856445312, 60.081932067871094, 29.677316665649414, 191.4937286376953, 149.16851806640625, 109.248779296875, 156.22854614257812, 180.08938598632812, 26.65290641784668, -17.880172729492188, -47.28875732421875, -5.423770904541016, 75.37322998046875, 56.77159118652344, 25.272850036621094, -5.77049446105957, 164.92337036132812, 47.16583251953125, 42.957176208496094, -145.29306030273438, -4.337507247924805, 109.7801284790039, 6.144443511962891, 71.87096405029297, -44.09867858886719, -75.87350463867188, 21.748210906982422, 250.19390869140625, 109.87371826171875, 6.76715087890625, -26.791667938232422, 198.48358154296875, 183.82122802734375, -38.06019592285156, 87.42329406738281, -26.26057243347168, -45.53388214111328, 217.0667724609375, -61.11815643310547, 66.08241271972656, 74.68138122558594, -25.702880859375, 84.5479736328125, -74.33707427978516, 152.62181091308594, 171.0745849609375, 257.75823974609375, -130.40496826171875, 215.13063049316406, -84.4052963256836, 101.97706604003906, 152.99282836914062, 154.96249389648438, -95.59053039550781, -97.90636444091797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000537.npy"} +{"epoch": 0.8117913832199547, "step": 538, "batch_size": 64, "mean": 78.09992980957031, "std": 110.72981262207031, "min": -210.95953369140625, "p10": -36.58970069885253, "median": 68.55127716064453, "p90": 201.60550689697268, "max": 476.7793884277344, "pos_frac": 0.796875, "sample": [-44.53978729248047, 111.5009765625, 18.77129364013672, 11.124202728271484, 129.94993591308594, -173.91519165039062, 70.23893737792969, -32.9345703125, 80.58000183105469, 105.72239685058594, 70.46135711669922, 175.84817504882812, -2.9027557373046875, 133.46604919433594, 59.990928649902344, 34.050262451171875, 158.8289031982422, 171.33990478515625, 48.372398376464844, 73.12051391601562, 56.50658416748047, -2.3337249755859375, 196.4498748779297, 195.45999145507812, 26.797019958496094, -38.156185150146484, 150.9964141845703, 29.546531677246094, 203.8150634765625, 100.2848129272461, 3.561676025390625, 99.0832748413086, -42.422637939453125, -31.576751708984375, 16.53113555908203, 47.47752380371094, 24.472808837890625, 20.455015182495117, -12.114517211914062, 54.18113708496094, 270.2631530761719, 245.6973876953125, 204.5526123046875, 63.509796142578125, -61.484130859375, 87.614501953125, -55.60985565185547, 128.71498107910156, 127.60240173339844, 387.9849853515625, 148.70835876464844, 102.46757507324219, 268.20965576171875, -210.95953369140625, 66.86361694335938, 54.97489929199219, 476.7793884277344, -2.523265838623047, 91.53288269042969, 16.53173828125, 40.99897003173828, 84.56533813476562, 77.7275619506836, 85.58372497558594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000538.npy"} +{"epoch": 0.8133030990173847, "step": 539, "batch_size": 64, "mean": 53.63603973388672, "std": 86.77368927001953, "min": -110.36911010742188, "p10": -31.62803268432617, "median": 36.51308822631836, "p90": 198.6311004638673, "max": 265.0032043457031, "pos_frac": 0.71875, "sample": [93.6955795288086, 64.15817260742188, 25.34819793701172, 211.1773681640625, 36.127845764160156, -27.121681213378906, 169.35647583007812, 92.55926513671875, 102.75238037109375, 46.67371368408203, 89.84523010253906, -11.543563842773438, 42.872283935546875, 12.3531494140625, -23.511749267578125, -0.2126007080078125, 73.49452209472656, 1.0282516479492188, 211.42164611816406, 105.75218200683594, 1.1815204620361328, -63.7483024597168, 54.248382568359375, -33.559326171875, 11.256599426269531, -110.36911010742188, -5.979619979858398, 44.037628173828125, 260.8325500488281, 71.91299438476562, 9.121360778808594, -7.0013427734375, -35.36354064941406, -22.488502502441406, 247.94931030273438, 57.23509979248047, 33.61011505126953, 16.7652587890625, 123.82402801513672, -0.6427898406982422, -13.029373168945312, 36.89833068847656, 74.07560729980469, 22.469505310058594, 64.13681030273438, 37.08625793457031, -89.15953826904297, 118.05742645263672, 21.17401885986328, -8.08984375, 153.99801635742188, 32.05901336669922, 53.155025482177734, -27.074485778808594, 129.0381622314453, 265.0032043457031, -33.71238708496094, 68.2344970703125, -90.58349609375, 129.28660583496094, 11.847200393676758, 257.47039794921875, 238.56622314453125, 12.750457763671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000539.npy"} +{"epoch": 0.8148148148148148, "step": 540, "batch_size": 64, "mean": 82.4444351196289, "std": 107.09252166748047, "min": -128.80740356445312, "p10": -29.947732543945307, "median": 65.8939323425293, "p90": 219.67147674560547, "max": 474.3529357910156, "pos_frac": 0.796875, "sample": [-25.0438232421875, 13.666763305664062, 474.3529357910156, 154.69053649902344, 14.373502731323242, -128.80740356445312, 138.68429565429688, 90.06565856933594, 67.68233489990234, 5.338428497314453, 64.10552978515625, 189.0067138671875, 168.17506408691406, 28.991844177246094, -78.0123519897461, 63.28196716308594, 159.2293701171875, 108.8044204711914, 19.934886932373047, 79.07603454589844, 70.36015319824219, -96.07713317871094, 125.52304077148438, 58.237884521484375, 252.50912475585938, 155.12066650390625, -20.040197372436523, 126.03938293457031, -94.48342895507812, 19.000717163085938, -32.049407958984375, 177.96934509277344, 132.716796875, 62.92781066894531, 183.32485961914062, -4.913902282714844, 20.93743133544922, 140.52105712890625, -44.117889404296875, 6.415754318237305, -5.585216522216797, 262.96575927734375, 143.31533813476562, 215.10467529296875, 221.62867736816406, 60.62226867675781, 203.32093811035156, 46.40415573120117, 82.2396240234375, -82.56729888916016, 60.297637939453125, 312.4011535644531, 54.66009521484375, -4.2126007080078125, 20.03368377685547, 79.87454223632812, -20.98786163330078, 222.0273895263672, 3.9434661865234375, 100.35430908203125, 129.67898559570312, 20.94830894470215, 76.66386413574219, 225.79306030273438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000540.npy"} +{"epoch": 0.8163265306122449, "step": 541, "batch_size": 64, "mean": 68.72541809082031, "std": 87.73442840576172, "min": -169.03158569335938, "p10": -53.300658416748014, "median": 84.16107559204102, "p90": 176.9457214355469, "max": 200.33450317382812, "pos_frac": 0.734375, "sample": [177.94900512695312, 85.26995849609375, 123.36373901367188, 176.00936889648438, -10.534317016601562, 135.93951416015625, 6.1463775634765625, 131.36306762695312, 170.22372436523438, 4.036590576171875, 74.20048522949219, -169.03158569335938, 35.79523468017578, 91.03266906738281, -3.0344772338867188, -122.59552001953125, 119.62577819824219, 161.63894653320312, 31.613754272460938, 184.22552490234375, -76.79106903076172, 65.10311889648438, 190.3301544189453, 177.34701538085938, 171.50799560546875, 115.52703857421875, 162.04371643066406, -14.306900024414062, -84.72982788085938, -5.165714263916016, 97.7253189086914, 51.570655822753906, 187.94529724121094, 200.33450317382812, 9.571220397949219, 129.6808624267578, 142.6149444580078, -84.25762939453125, 112.827880859375, -11.460275650024414, 35.64202117919922, -67.08224487304688, 134.35940551757812, 151.49502563476562, 83.05219268798828, 87.28126525878906, 142.82351684570312, 107.57514190673828, -69.07069396972656, -1.34442138671875, 9.525976181030273, 147.89613342285156, 116.80846405029297, 57.04249572753906, -14.534439086914062, -14.64263916015625, 65.03741455078125, 66.6551513671875, -17.473154067993164, 39.17980194091797, 123.49766540527344, -21.143625259399414, 195.66293334960938, 99.52738189697266], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000541.npy"} +{"epoch": 0.817838246409675, "step": 542, "batch_size": 64, "mean": 77.48338317871094, "std": 98.92201232910156, "min": -176.9029998779297, "p10": -52.989075469970686, "median": 66.61274719238281, "p90": 199.6385223388672, "max": 268.3974304199219, "pos_frac": 0.828125, "sample": [130.13519287109375, 174.88836669921875, 31.26049041748047, 212.80389404296875, 44.816200256347656, 88.04794311523438, 27.8387451171875, 53.52734375, 74.48281860351562, 55.28913116455078, 67.12347412109375, 1.7102813720703125, -74.2736587524414, 182.35272216796875, 42.42718505859375, 142.815185546875, 62.567405700683594, 268.3974304199219, -102.522705078125, 176.1268310546875, 172.8380126953125, 225.4727783203125, 122.01567840576172, 2.0545196533203125, -150.4715118408203, -176.9029998779297, 94.45111083984375, 57.25146484375, 106.12572479248047, -58.772674560546875, 155.73817443847656, 191.62339782714844, 42.8466796875, 196.03591918945312, -39.49401092529297, -10.663105010986328, 12.324981689453125, 103.00028991699219, 10.791900634765625, 139.82379150390625, 7.93414306640625, 20.585594177246094, 204.77163696289062, -62.07980728149414, 101.33828735351562, 12.218971252441406, 139.47691345214844, 39.116905212402344, 25.37157440185547, 261.81402587890625, 146.053955078125, 201.1824951171875, -84.60176086425781, -25.681129455566406, -1.9693546295166016, 189.6573028564453, 7.5102996826171875, 173.33895874023438, 149.0218505859375, 66.10202026367188, 37.03779602050781, 225.52493286132812, 88.12348175048828, 181.1832275390625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000542.npy"} +{"epoch": 0.8193499622071051, "step": 543, "batch_size": 64, "mean": 73.95252990722656, "std": 101.69051361083984, "min": -227.01519775390625, "p10": -33.56692352294922, "median": 68.55611038208008, "p90": 196.92164916992195, "max": 384.8651123046875, "pos_frac": 0.796875, "sample": [7.1926116943359375, 39.9311637878418, 75.34133911132812, -5.24034309387207, 91.50346374511719, 39.72348403930664, 144.1273193359375, 163.84664916992188, -174.2416229248047, 218.74468994140625, 73.83775329589844, 155.15380859375, 116.94175720214844, 129.7570343017578, 203.9675750732422, 70.37239074707031, 180.4811553955078, -72.656005859375, 84.53437805175781, 171.79360961914062, 11.000753402709961, 17.33788299560547, 42.167476654052734, 58.160736083984375, 15.958263397216797, 175.68997192382812, -227.01519775390625, -0.332061767578125, -75.22636413574219, -51.782867431640625, 25.231277465820312, 157.76925659179688, 247.08050537109375, 47.2591438293457, 171.3744659423828, 128.20545959472656, 208.08822631835938, -6.377038955688477, -61.00261688232422, -12.283439636230469, 85.42515563964844, 62.93891143798828, 151.61160278320312, 31.33935546875, 163.22100830078125, 384.8651123046875, 119.30529022216797, 222.52154541015625, 30.298030853271484, 62.32410430908203, -8.519481658935547, 68.66146087646484, 78.5057373046875, 4.128166198730469, 142.70542907714844, 58.112274169921875, 75.7017822265625, 17.302610397338867, 0.1605224609375, -31.23592758178711, 143.16526794433594, 250.12286376953125, -34.565921783447266, 68.45075988769531], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000543.npy"} +{"epoch": 0.8208616780045351, "step": 544, "batch_size": 64, "mean": 87.98986053466797, "std": 127.41131591796875, "min": -151.15951538085938, "p10": -40.13933563232422, "median": 79.49538803100586, "p90": 210.16072387695314, "max": 631.8416748046875, "pos_frac": 0.75, "sample": [98.16360473632812, -93.70231628417969, 73.75765991210938, 19.876375198364258, 140.8201904296875, -9.832977294921875, -151.15951538085938, 53.369056701660156, 100.8232421875, 57.176353454589844, -36.13844299316406, 59.52851486206055, -5.880096435546875, 205.51663208007812, 103.22138214111328, 35.308998107910156, -140.73216247558594, 85.23311614990234, 125.88712310791016, 120.29025268554688, 57.800933837890625, 103.62189483642578, 168.59768676757812, 161.7524871826172, 191.96908569335938, 32.88697052001953, 45.484283447265625, -32.79578399658203, -31.75821876525879, -118.44314575195312, 284.3431701660156, -101.64892578125, 165.8054962158203, -8.877418518066406, 180.75494384765625, 148.2891387939453, 631.8416748046875, -40.49394226074219, 63.872283935546875, 171.97116088867188, 88.77996826171875, 51.965293884277344, 263.4736328125, 54.97679901123047, 50.881614685058594, 70.84471130371094, 121.47691345214844, 119.42145538330078, 206.50930786132812, -39.311920166015625, 20.990259170532227, 195.66912841796875, 308.26959228515625, 271.646484375, -29.226722717285156, -95.64579772949219, 211.72561645507812, 4.5975341796875, 325.2951354980469, 140.41323852539062, -13.460784912109375, 111.739013671875, 143.58673095703125, 130.23333740234375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000544.npy"} +{"epoch": 0.8223733938019653, "step": 545, "batch_size": 64, "mean": 58.89155960083008, "std": 110.53691101074219, "min": -199.94247436523438, "p10": -54.33967285156248, "median": 42.32789993286133, "p90": 196.39603118896488, "max": 371.09539794921875, "pos_frac": 0.65625, "sample": [92.8951187133789, 89.20443725585938, -63.1649169921875, 204.4390106201172, -19.70987319946289, 9.190101623535156, -31.18311309814453, -68.80249786376953, -13.854242324829102, 29.486812591552734, 107.45065307617188, -20.851806640625, 208.5833282470703, 25.286226272583008, 73.30219268798828, 61.51911163330078, 176.74424743652344, 160.3699188232422, -20.301668167114258, 81.84100341796875, -12.600624084472656, 149.93194580078125, -8.762496948242188, 150.5604248046875, 364.7438049316406, 29.01490020751953, 40.67658233642578, 32.497169494628906, 68.00712585449219, -1.5505084991455078, -0.2721061706542969, -33.7474365234375, -188.26873779296875, 97.83428955078125, -63.507476806640625, -22.686704635620117, 184.90696716308594, 201.31991577148438, 154.26918029785156, -13.439414978027344, 18.576946258544922, 288.35235595703125, 115.03041076660156, 112.36763763427734, 20.259872436523438, 110.17777252197266, -28.034889221191406, 98.72711944580078, 77.45632934570312, -199.94247436523438, 11.970489501953125, 88.53598022460938, 43.979217529296875, 371.09539794921875, 106.6947021484375, -141.14608764648438, -6.786510467529297, 121.23700714111328, -139.33502197265625, -1.5292797088623047, 112.80204010009766, 153.49966430664062, 201.74636840820312, 21.95388412475586], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000545.npy"} +{"epoch": 0.8238851095993953, "step": 546, "batch_size": 64, "mean": 60.669288635253906, "std": 96.26319122314453, "min": -171.3084716796875, "p10": -77.29852752685545, "median": 74.6286392211914, "p90": 169.81378173828125, "max": 244.6695556640625, "pos_frac": 0.796875, "sample": [190.97079467773438, -40.8756103515625, 170.2882537841797, 81.59688568115234, 70.08674621582031, 123.68132781982422, 22.616363525390625, -62.88331604003906, 88.92526245117188, -81.47529602050781, -162.4410400390625, 106.69233703613281, 20.491207122802734, 64.12252807617188, -124.91398620605469, 155.87161254882812, 29.351186752319336, 37.656707763671875, -65.19244384765625, 20.875450134277344, -95.98736572265625, 76.58552551269531, 59.5384521484375, 95.664306640625, 54.777679443359375, 100.56024169921875, 153.2129669189453, 26.986328125, 162.47837829589844, 244.6695556640625, 104.65716552734375, -13.276893615722656, 159.95701599121094, 156.3029327392578, 91.0947265625, -67.552734375, 78.92664337158203, 139.28277587890625, 105.98641967773438, 72.6717529296875, 24.467243194580078, 37.450721740722656, -122.88522338867188, 113.46845245361328, 6.651422500610352, 213.20230102539062, -57.855506896972656, 55.636837005615234, 108.52816009521484, 104.08340454101562, 112.3614501953125, 79.32182312011719, 222.92376708984375, -171.3084716796875, 13.755424499511719, 65.36044311523438, 156.29812622070312, 82.37284851074219, 195.03245544433594, 66.3199691772461, 182.82171630859375, -128.25599670410156, 168.70668029785156, 2.3955211639404297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000546.npy"} +{"epoch": 0.8253968253968254, "step": 547, "batch_size": 64, "mean": 72.64120483398438, "std": 94.58318328857422, "min": -221.10098266601562, "p10": -13.208116912841794, "median": 71.34172058105469, "p90": 175.12394866943362, "max": 362.3791809082031, "pos_frac": 0.859375, "sample": [9.841178894042969, -2.2983245849609375, 32.38188934326172, 39.02601623535156, 167.90390014648438, 50.335411071777344, -64.01415252685547, 362.3791809082031, 66.29122924804688, 147.8892822265625, 119.5931396484375, 98.95258331298828, 28.95808982849121, 73.0855941772461, 21.783069610595703, -10.486083984375, 84.97618103027344, 196.8720703125, 9.05135726928711, 82.81431579589844, 160.7813262939453, 172.65122985839844, 189.74710083007812, 42.95220947265625, 163.43954467773438, 86.80248260498047, 19.5032958984375, 125.62882232666016, 78.00518798828125, 69.59784698486328, 86.03672790527344, 3.297513961791992, -208.039794921875, 100.47882080078125, -15.24310302734375, 74.73420715332031, 165.27944946289062, 52.70864486694336, 176.18368530273438, 95.5164794921875, 120.3787612915039, 222.0474090576172, 132.81507873535156, 137.03211975097656, -96.61231994628906, 85.31700134277344, 34.00391387939453, -221.10098266601562, 0.17950439453125, 30.743698120117188, 190.24444580078125, 152.46734619140625, 213.3655548095703, -14.374702453613281, 51.61748504638672, -57.115814208984375, 38.61897659301758, 170.90322875976562, 24.637956619262695, 38.126304626464844, 60.74677276611328, 17.836776733398438, 154.76602172851562, 6.994720458984375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000547.npy"} +{"epoch": 0.8269085411942555, "step": 548, "batch_size": 64, "mean": 49.93299102783203, "std": 99.35533142089844, "min": -152.20352172851562, "p10": -83.19016876220702, "median": 39.81950378417969, "p90": 187.45974426269532, "max": 270.6487121582031, "pos_frac": 0.703125, "sample": [76.07862854003906, 127.17839813232422, -55.842742919921875, -79.09269714355469, -90.29420471191406, 134.78001403808594, -52.54600524902344, 224.67593383789062, 182.95684814453125, 98.00255584716797, -27.619155883789062, 189.38955688476562, 3.663105010986328, 12.371147155761719, 39.912017822265625, 19.981828689575195, -94.90547180175781, 22.87700653076172, -5.292236328125, 63.797943115234375, 150.22940063476562, 56.54676818847656, -97.30375671386719, 38.20232391357422, 116.17886352539062, 33.08291244506836, 248.037841796875, 162.71510314941406, 65.31863403320312, 102.42230224609375, -59.32266616821289, 129.37193298339844, 41.906829833984375, 171.00025939941406, 45.14503479003906, 26.701278686523438, 204.1728973388672, 70.9010009765625, -48.28192138671875, 24.31238555908203, 70.97032928466797, -29.56670570373535, 51.96814727783203, -152.20352172851562, 249.08761596679688, 8.951416015625, -1.2749252319335938, -2.1034698486328125, -102.68181610107422, 56.41980743408203, 11.123199462890625, -123.04389953613281, 39.72698974609375, 114.8502197265625, 223.97665405273438, 45.34595489501953, 151.45387268066406, 4.582733154296875, 37.613739013671875, -71.49632263183594, 270.6487121582031, 165.29620361328125, -84.94622802734375, -10.3970947265625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000548.npy"} +{"epoch": 0.8284202569916855, "step": 549, "batch_size": 64, "mean": 49.09757995605469, "std": 102.73126983642578, "min": -356.00469970703125, "p10": -62.00262260437012, "median": 51.3264102935791, "p90": 178.61504669189455, "max": 219.61474609375, "pos_frac": 0.703125, "sample": [-22.413848876953125, 64.4220962524414, -25.18799591064453, 9.638084411621094, 18.563453674316406, -356.00469970703125, 120.6672134399414, 5.257287979125977, 57.629661560058594, 206.54901123046875, 179.2190704345703, 44.84138488769531, 75.40174102783203, 191.56430053710938, 19.363067626953125, -26.58789825439453, -78.08035278320312, 69.59419250488281, 142.75039672851562, 136.13429260253906, 125.2749252319336, 121.31587219238281, 190.96817016601562, -60.15687942504883, 54.618743896484375, -16.3880615234375, 17.432979583740234, 219.61474609375, 127.77925109863281, -40.468101501464844, 177.20565795898438, 139.60519409179688, 175.202880859375, -18.485612869262695, 100.95252990722656, 201.1925048828125, -6.618452072143555, -48.507476806640625, -62.79365539550781, 134.09201049804688, -113.24166870117188, 42.91310119628906, -32.37251281738281, 48.03407669067383, 62.249629974365234, 132.42942810058594, -84.55133056640625, 110.81938171386719, 146.48046875, 64.34480285644531, -21.96874237060547, 128.1552276611328, 96.01956176757812, 127.95512390136719, 27.976882934570312, 30.695266723632812, 37.55719757080078, 11.308147430419922, -20.611164093017578, 74.1760482788086, 179.39048767089844, -192.11859130859375, -82.51748657226562, 3.9641876220703125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000549.npy"} +{"epoch": 0.8299319727891157, "step": 550, "batch_size": 64, "mean": 44.13203811645508, "std": 95.35833740234375, "min": -155.8983154296875, "p10": -65.13648071289063, "median": 34.402512550354004, "p90": 194.1682678222657, "max": 311.0271301269531, "pos_frac": 0.703125, "sample": [44.057159423828125, 212.123779296875, 177.4170379638672, -2.4716033935546875, -103.00825500488281, 11.244575500488281, -0.41048240661621094, -39.13084411621094, -5.166206359863281, 19.071640014648438, 157.98919677734375, 49.255088806152344, 78.2354965209961, 0.19501495361328125, 8.161157608032227, 213.43106079101562, 210.13229370117188, 94.8663101196289, 37.5147705078125, 26.846431732177734, 119.39055633544922, -32.260433197021484, 141.23043823242188, 100.06874084472656, 201.3473663330078, -87.65882873535156, 219.40408325195312, -8.920047760009766, 15.588882446289062, -82.53358459472656, -146.62881469726562, 42.05610656738281, 107.70431518554688, 74.79069519042969, 0.3072509765625, 83.34500122070312, 42.351966857910156, -48.244903564453125, -13.521400451660156, 10.048187255859375, -65.23884582519531, 67.56376647949219, 14.947681427001953, 44.01511001586914, 71.57756042480469, 69.43177795410156, 7.76123046875, 31.290254592895508, 27.713775634765625, 139.4224395751953, -57.564727783203125, 93.96617126464844, 311.0271301269531, 51.088462829589844, 123.8543701171875, 58.10737609863281, -124.96086120605469, 2.6692638397216797, 217.61949157714844, -155.8983154296875, 114.85794067382812, -33.37010192871094, -48.75199508666992, -64.89762878417969], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000550.npy"} +{"epoch": 0.8314436885865457, "step": 551, "batch_size": 64, "mean": 61.839515686035156, "std": 100.36233520507812, "min": -90.88763427734375, "p10": -20.991113471984864, "median": 32.70697784423828, "p90": 178.83133392333988, "max": 501.02716064453125, "pos_frac": 0.71875, "sample": [35.958518981933594, 71.65057373046875, 86.81471252441406, -12.926315307617188, 70.4342041015625, 57.64458465576172, 501.02716064453125, -4.908176422119141, 37.35136413574219, 383.0492248535156, -16.238636016845703, 29.45543670654297, 116.8296890258789, 7.753076553344727, 88.34915161132812, 46.30527877807617, 145.00875854492188, 108.8700180053711, -90.88763427734375, -22.122940063476562, 6.114652633666992, 24.028343200683594, 109.90605926513672, 82.9444580078125, 50.6842041015625, 184.27774047851562, -0.7646636962890625, 73.59290313720703, 54.39097595214844, 156.95742797851562, 13.205133438110352, -20.38019561767578, 8.888946533203125, -38.7513427734375, 49.769378662109375, 279.852294921875, -8.020095825195312, 200.34149169921875, -21.2529354095459, -17.468002319335938, 7.01959228515625, 56.245155334472656, -27.25452423095703, -9.112300872802734, 139.8611297607422, 183.49591064453125, 155.74192810058594, 14.82889175415039, 23.688377380371094, 3.8225059509277344, 48.840065002441406, 128.22979736328125, 3.71649169921875, -68.69784545898438, -4.4180755615234375, 7.9673004150390625, -11.93813705444336, 136.59800720214844, -7.4471435546875, 14.318981170654297, -28.725051879882812, 10.157421112060547, 170.76878356933594, 182.28671264648438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000551.npy"} +{"epoch": 0.8329554043839759, "step": 552, "batch_size": 64, "mean": 62.83147430419922, "std": 87.28657531738281, "min": -186.0271453857422, "p10": -33.17834167480467, "median": 71.4557113647461, "p90": 138.2446304321289, "max": 280.84417724609375, "pos_frac": 0.796875, "sample": [17.96080207824707, 20.393112182617188, 10.656959533691406, 9.145362854003906, 129.38473510742188, 61.20478439331055, -1.1516132354736328, 61.46174621582031, 44.23350524902344, 134.40060424804688, 158.30972290039062, 280.84417724609375, 106.80160522460938, 86.03874206542969, -18.252655029296875, -66.17923736572266, -1.2425079345703125, 114.90567016601562, 82.97785186767578, -18.990737915039062, 131.50714111328125, 106.12471008300781, 84.48974609375, 105.14069366455078, 118.77944946289062, 20.49407196044922, 138.53610229492188, 134.7225799560547, 120.11418151855469, -176.0758056640625, 59.55696105957031, 39.372283935546875, 104.01040649414062, -120.89794921875, 92.38524627685547, -39.25874328613281, 269.0611267089844, 48.14085388183594, -7.787925720214844, -11.288253784179688, 151.07101440429688, 45.293853759765625, -112.32538604736328, 197.63951110839844, 38.91473388671875, 56.89764404296875, 100.71465301513672, 19.586158752441406, -186.0271453857422, 116.91725158691406, 210.04110717773438, 90.45873260498047, 78.03231048583984, -81.2027359008789, 101.34249877929688, 54.667869567871094, 44.264991760253906, 107.75287628173828, 108.18609619140625, 83.68220520019531, 98.25992584228516, 64.57312774658203, 137.5645294189453, 64.87911224365234], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000552.npy"} +{"epoch": 0.8344671201814059, "step": 553, "batch_size": 64, "mean": 68.6915283203125, "std": 129.03562927246094, "min": -211.35980224609375, "p10": -39.30155448913574, "median": 37.326377868652344, "p90": 253.6719177246094, "max": 519.5927124023438, "pos_frac": 0.671875, "sample": [276.33197021484375, -21.682769775390625, -27.235549926757812, 108.86216735839844, 3.0584335327148438, -15.067239761352539, 152.02545166015625, 191.0115966796875, -32.182167053222656, -14.119285583496094, 32.28083801269531, 519.5927124023438, 240.1288604736328, 190.33323669433594, -32.33411407470703, 252.31387329101562, 27.342851638793945, 49.03577423095703, 54.410064697265625, -211.35980224609375, -19.165441513061523, 199.3152313232422, 12.176445007324219, 119.77825927734375, 141.7142333984375, 26.65215301513672, 14.674613952636719, 36.00224304199219, -18.296966552734375, -71.23934936523438, 318.20733642578125, 160.62486267089844, 75.69731903076172, 336.99298095703125, 33.925537109375, 85.86746978759766, 161.77719116210938, 112.10600280761719, 259.1768493652344, 79.3874740600586, -0.8562393188476562, 60.05021286010742, 38.6505126953125, -20.391826629638672, -68.01478576660156, 15.064140319824219, -22.897762298583984, 281.6835021972656, -189.35418701171875, -129.15664672851562, 50.048248291015625, 126.07313537597656, 115.82412719726562, -40.91449737548828, -30.79853057861328, 42.2103271484375, -33.73307418823242, -95.08952331542969, 117.36204528808594, 135.19625854492188, 7.117820739746094, -35.538021087646484, 254.25393676757812, 11.347335815429688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000553.npy"} +{"epoch": 0.8359788359788359, "step": 554, "batch_size": 64, "mean": 70.23422241210938, "std": 101.14897918701172, "min": -218.43531799316406, "p10": -34.93683433532715, "median": 63.72294616699219, "p90": 196.33211212158204, "max": 308.1139221191406, "pos_frac": 0.828125, "sample": [63.16801452636719, 203.736572265625, 308.1139221191406, 198.5399169921875, 14.914573669433594, 79.57850646972656, 46.43566131591797, 40.54855728149414, 17.220169067382812, 168.00885009765625, 8.625053405761719, 14.423698425292969, -153.6088104248047, 134.11383056640625, -179.69342041015625, 28.63287353515625, 77.12567138671875, -46.46833801269531, 87.7793960571289, 22.728757858276367, -10.482139587402344, 134.24151611328125, 40.466400146484375, -36.090850830078125, 144.33189392089844, 131.9425048828125, 197.79104614257812, 35.53071975708008, -28.298473358154297, 47.950889587402344, 264.6095275878906, -15.387100219726562, 161.64071655273438, 20.91464614868164, 0.177520751953125, -63.18284606933594, 128.50564575195312, 13.732866287231445, 204.69061279296875, 52.977996826171875, -218.43531799316406, 173.5900115966797, 68.90150451660156, 81.30897521972656, 126.03465270996094, 108.06259155273438, 31.2880916595459, 100.92601013183594, 64.27787780761719, 2.19329833984375, 180.45513916015625, 5.664421081542969, -32.2441291809082, 178.8099365234375, 192.9279327392578, 32.93531036376953, 103.05592346191406, 96.60580444335938, 80.30701446533203, 178.8922576904297, 248.98582458496094, -82.17523956298828, 175.96697998046875, 36.6685791015625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000554.npy"} +{"epoch": 0.8374905517762661, "step": 555, "batch_size": 64, "mean": 63.25345993041992, "std": 108.17216491699219, "min": -237.12164306640625, "p10": -85.48149185180664, "median": 68.64990997314453, "p90": 192.8528259277344, "max": 276.05853271484375, "pos_frac": 0.78125, "sample": [16.98125648498535, -237.12164306640625, 26.54490089416504, 43.74066162109375, 199.8070068359375, 27.172386169433594, -158.0204315185547, -4.192291259765625, -86.40299987792969, 93.37413024902344, 201.47024536132812, -103.09761047363281, 167.0069580078125, 47.30513381958008, 106.13233947753906, 212.28109741210938, 56.452064514160156, 16.424232482910156, 11.250350952148438, 163.26364135742188, 103.4769058227539, 10.067794799804688, 276.05853271484375, 54.704071044921875, 172.43214416503906, 110.15425109863281, 159.71835327148438, 192.73411560058594, 192.90370178222656, 209.37881469726562, 152.02688598632812, 156.23104858398438, 111.13922119140625, 169.7882080078125, -83.33130645751953, 158.6940155029297, -206.23062133789062, 103.5984115600586, -30.646705627441406, 56.25058364868164, 96.50515747070312, 96.77778625488281, 1.3762893676757812, 46.039283752441406, -95.62686157226562, 31.755680084228516, 45.075286865234375, 101.9572525024414, 150.17759704589844, -47.129295349121094, 93.59201049804688, 41.16901397705078, -182.68861389160156, -12.100624084472656, 164.515625, 67.10310363769531, 70.19671630859375, 59.912506103515625, 85.93075561523438, -78.83269500732422, 213.17713928222656, 73.32129669189453, 160.10528564453125, -3.608011245727539], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000555.npy"} +{"epoch": 0.8390022675736961, "step": 556, "batch_size": 64, "mean": 53.99998474121094, "std": 105.59336853027344, "min": -156.40341186523438, "p10": -59.20853042602539, "median": 34.71976089477539, "p90": 177.51180572509767, "max": 374.2872314453125, "pos_frac": 0.6875, "sample": [128.160400390625, 28.426513671875, 79.90556335449219, 22.489925384521484, -37.17588806152344, 141.35879516601562, 210.1763916015625, 82.96078491210938, -57.14613342285156, -31.47930908203125, -3.1434154510498047, -2.9210662841796875, 87.52887725830078, 158.9725341796875, 22.53976058959961, -47.98175048828125, 95.92425537109375, 35.82324981689453, 74.5125732421875, 121.34746551513672, -37.058685302734375, 127.47889709472656, 167.09976196289062, 13.829582214355469, -60.09241485595703, 20.29712677001953, -149.16744995117188, 64.0721435546875, 104.4080581665039, -81.61981201171875, 7.504755020141602, -156.40341186523438, 139.81002807617188, 64.74022674560547, 79.74742126464844, -1.2605915069580078, 98.39122009277344, 131.27528381347656, 24.782752990722656, -16.151124954223633, 150.9578857421875, 336.16845703125, 18.655941009521484, 33.61627197265625, 219.29258728027344, 180.93826293945312, -43.71160888671875, 11.851165771484375, -144.46499633789062, 6.50274658203125, 217.07957458496094, -50.17237091064453, -29.71277618408203, 166.29779052734375, 15.404651641845703, -33.204559326171875, 70.67408752441406, -115.829833984375, 75.56747436523438, 173.37904357910156, 179.28298950195312, 374.2872314453125, 65.47652435302734, -74.30088806152344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000556.npy"} +{"epoch": 0.8405139833711263, "step": 557, "batch_size": 64, "mean": 75.6582260131836, "std": 104.66444396972656, "min": -127.68206787109375, "p10": -42.12977905273437, "median": 61.961931228637695, "p90": 209.44791259765626, "max": 416.5633544921875, "pos_frac": 0.703125, "sample": [210.30398559570312, 99.80314636230469, 79.60316467285156, 1.7237548828125, -13.197505950927734, -32.15742492675781, 211.7220916748047, 34.430152893066406, 164.08212280273438, 76.33183288574219, -34.99769592285156, -46.533538818359375, -11.572395324707031, 100.64972686767578, 19.85617446899414, 61.197322845458984, 80.47085571289062, 28.822479248046875, 173.24191284179688, 88.8843002319336, 30.426311492919922, -8.418670654296875, 180.9053955078125, -59.66107177734375, -47.08941650390625, -127.68206787109375, 185.93630981445312, 236.5685577392578, 7.181896209716797, -57.750221252441406, 207.45040893554688, 84.10198974609375, 121.30872344970703, -19.864360809326172, -21.923599243164062, 186.16452026367188, 144.01922607421875, 134.70553588867188, 37.08659362792969, 384.8127746582031, -25.952957153320312, 51.426025390625, -3.3370590209960938, -25.899078369140625, -9.211610794067383, -24.397064208984375, 416.5633544921875, 160.08746337890625, 133.31874084472656, 62.726539611816406, 28.814498901367188, 78.5298843383789, 56.38594055175781, 116.0302734375, 129.9967041015625, 94.96258544921875, 252.554931640625, 34.29250717163086, 50.90839767456055, 123.96626281738281, -50.375152587890625, -45.18638610839844, 213.74403381347656, 131.23411560058594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000557.npy"} +{"epoch": 0.8420256991685563, "step": 558, "batch_size": 64, "mean": 84.29093933105469, "std": 100.38368225097656, "min": -151.16830444335938, "p10": -23.2610595703125, "median": 66.9315071105957, "p90": 214.71462249755862, "max": 395.4012451171875, "pos_frac": 0.796875, "sample": [134.97943115234375, 210.42384338378906, 217.56768798828125, 1.7682266235351562, 153.91848754882812, 20.73149871826172, 179.56149291992188, -7.403858184814453, 12.663177490234375, 189.5131378173828, -23.198158264160156, 146.10614013671875, 42.41511535644531, 29.465721130371094, 2.411865234375, 117.76271057128906, 150.71783447265625, -10.47052001953125, 73.74497985839844, 77.46745300292969, 265.5142517089844, 184.7403106689453, 47.42551803588867, 147.3376007080078, 121.60958862304688, -33.61578369140625, 216.55352783203125, 33.17345428466797, 130.13265991210938, 60.11803436279297, 163.68936157226562, 41.43074035644531, 86.0677490234375, 238.6414031982422, 27.616310119628906, 16.592458724975586, 91.7257080078125, 29.030166625976562, 39.88652038574219, 395.4012451171875, 133.80613708496094, -29.821746826171875, 354.009521484375, -5.2744293212890625, 177.826171875, 160.44775390625, -23.28801727294922, -32.15221405029297, 227.7197723388672, 32.93145751953125, 85.61552429199219, -151.16830444335938, 96.79732513427734, -63.08294677734375, 17.05841064453125, -13.113227844238281, 112.16777801513672, 33.785987854003906, -45.30128479003906, 5.757232666015625, 138.5977020263672, 44.443199157714844, 115.99401092529297, -2.35284423828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000558.npy"} +{"epoch": 0.8435374149659864, "step": 559, "batch_size": 64, "mean": 78.87471771240234, "std": 120.8677749633789, "min": -162.3714141845703, "p10": -47.860939025878906, "median": 57.6204833984375, "p90": 251.63559570312506, "max": 521.9260864257812, "pos_frac": 0.703125, "sample": [-0.5092926025390625, -46.034141540527344, 32.07419967651367, -9.655738830566406, 132.71444702148438, 521.9260864257812, 210.48678588867188, 277.1685791015625, 56.679954528808594, -7.654964447021484, -124.56393432617188, 257.6988220214844, 59.134971618652344, 119.12974548339844, 29.421812057495117, 145.91897583007812, 145.51174926757812, 58.561012268066406, 138.90972900390625, 70.38504028320312, 179.1142578125, 50.78216552734375, -30.143600463867188, 139.090087890625, -18.817359924316406, 26.27678680419922, -78.75448608398438, 152.70877075195312, 27.778045654296875, 237.48806762695312, 62.95196533203125, 70.24501037597656, 12.727104187011719, 259.34246826171875, 189.9210968017578, -40.628257751464844, 92.2715835571289, -6.411592483520508, -32.192787170410156, -48.64385223388672, -6.211372375488281, -8.561988830566406, -52.91921615600586, 15.572723388671875, 108.22531127929688, 210.66433715820312, 104.93099212646484, -74.81729888916016, 143.2959747314453, 163.3585662841797, 38.86212158203125, 271.9190673828125, -122.96212768554688, 49.11140441894531, 130.537353515625, 258.25213623046875, 21.804759979248047, 326.28778076171875, 25.13678741455078, 24.49357795715332, 122.90931701660156, 162.9293670654297, -14.875633239746094, -162.3714141845703], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000559.npy"} +{"epoch": 0.8450491307634165, "step": 560, "batch_size": 64, "mean": 62.700660705566406, "std": 89.89407348632812, "min": -109.00618743896484, "p10": -55.508252716064455, "median": 49.52420234680176, "p90": 180.43162384033204, "max": 301.4762878417969, "pos_frac": 0.703125, "sample": [113.91020965576172, 60.89117431640625, 170.53013610839844, 23.54521942138672, -52.20628356933594, -104.53179168701172, 177.4957275390625, 29.455825805664062, 301.4762878417969, 16.22080421447754, 113.85536193847656, -10.594169616699219, 62.733642578125, 50.52531433105469, 35.32470703125, -62.689788818359375, 15.351913452148438, -0.6905345916748047, 36.87105178833008, 97.73995971679688, -55.6895751953125, 140.13302612304688, 79.82756805419922, 7.242668151855469, 181.6898651123047, 116.75105285644531, -18.41016387939453, 154.3241424560547, 199.1128387451172, 36.40552520751953, 53.57891845703125, -10.708290100097656, -55.085166931152344, 211.39605712890625, 129.16134643554688, -68.43145751953125, 47.460899353027344, -62.35400390625, 196.1279754638672, -40.05730438232422, 209.498046875, 156.19500732421875, 101.03453826904297, 104.91984558105469, -16.26105499267578, 102.16780853271484, -4.043327331542969, 168.1772003173828, 47.555049896240234, -21.692840576171875, -29.588211059570312, 126.22589111328125, -109.00618743896484, -4.6591033935546875, 117.45196533203125, 140.237548828125, -80.99583435058594, 36.286685943603516, 128.72206115722656, 48.52309036254883, 202.59109497070312, 105.55453491210938, 38.57674026489258, 127.68110656738281], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000560.npy"} +{"epoch": 0.8465608465608465, "step": 561, "batch_size": 64, "mean": 64.77287292480469, "std": 101.8272705078125, "min": -121.27651977539062, "p10": -53.62154235839843, "median": 47.084896087646484, "p90": 199.90057983398438, "max": 354.6990661621094, "pos_frac": 0.78125, "sample": [8.508766174316406, 23.574851989746094, 6.22271728515625, 32.09016418457031, 56.60650634765625, 149.466552734375, 58.86215591430664, -14.079216003417969, 161.4749755859375, 49.28355407714844, 39.32037353515625, 125.35124206542969, 16.156726837158203, 96.96086120605469, 28.446136474609375, -61.063411712646484, 15.600387573242188, 144.49032592773438, 216.0694580078125, 15.067474365234375, -47.92622375488281, 151.830078125, 90.6951904296875, -28.705699920654297, 90.84376525878906, -121.27651977539062, 145.14828491210938, -39.070743560791016, -92.20770263671875, 305.37042236328125, -39.04011154174805, 35.335479736328125, 199.95977783203125, 199.762451171875, 50.29566955566406, 10.523147583007812, 67.23828887939453, 75.72108459472656, 4.618505477905273, 354.6990661621094, 273.726318359375, 264.55706787109375, 115.90289306640625, 208.1720428466797, 116.128173828125, -117.18846893310547, -88.96829986572266, 44.88623809814453, 151.1059112548828, 9.732404708862305, -26.932830810546875, 61.400535583496094, 27.94470977783203, -9.307317733764648, 117.28341674804688, 182.89744567871094, 6.513891220092773, 108.22639465332031, 79.43231201171875, -56.06239318847656, 1.9163990020751953, 12.852968215942383, 164.58554077148438, -85.5662841796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000561.npy"} +{"epoch": 0.8480725623582767, "step": 562, "batch_size": 64, "mean": 82.03276062011719, "std": 105.94670104980469, "min": -141.70755004882812, "p10": -34.940704345703125, "median": 61.80521011352539, "p90": 189.64243164062503, "max": 451.28289794921875, "pos_frac": 0.78125, "sample": [19.269920349121094, 193.5102996826172, -80.79484558105469, 147.90982055664062, 158.54969787597656, 139.40414428710938, 31.430076599121094, 367.6588134765625, 178.07467651367188, -46.89105987548828, 108.13478088378906, 81.29058837890625, -47.43040466308594, 1.7153587341308594, 145.33929443359375, 2.0290565490722656, 37.77046203613281, 64.05291748046875, 124.90855407714844, -35.775665283203125, 141.410888671875, 149.13275146484375, -19.179550170898438, 8.986534118652344, -9.504997253417969, 175.32913208007812, -16.484426498413086, 92.75045013427734, 47.105804443359375, 42.2651481628418, 157.25228881835938, -63.1463623046875, 225.900146484375, 184.21072387695312, 46.11206817626953, 261.54443359375, -10.747089385986328, -2.0184783935546875, 7.154514312744141, 227.67919921875, 32.24031066894531, 15.603446960449219, 53.87062072753906, 148.34193420410156, 132.15167236328125, 191.97030639648438, 171.8736114501953, 151.32275390625, 131.85174560546875, -32.992462158203125, 133.17510986328125, 167.88070678710938, -8.827241897583008, 32.75208282470703, 119.51493835449219, 91.44915771484375, 163.42430114746094, 451.28289794921875, 59.55750274658203, -77.96699523925781, 10.592254638671875, -141.70755004882812, 4.835548400878906, 11.990020751953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000562.npy"} +{"epoch": 0.8495842781557067, "step": 563, "batch_size": 64, "mean": 64.97576141357422, "std": 103.94428253173828, "min": -183.98745727539062, "p10": -67.64158172607422, "median": 65.6597900390625, "p90": 190.0098114013672, "max": 305.31549072265625, "pos_frac": 0.78125, "sample": [49.048851013183594, 102.95646667480469, 46.78107833862305, -1.0753898620605469, 206.29991149902344, 152.37826538085938, -183.98745727539062, 143.98812866210938, 106.05250549316406, 23.87908172607422, 23.461856842041016, 98.27571105957031, 16.06194305419922, 1.267608642578125, 161.70303344726562, 15.503721237182617, -101.69461059570312, 143.96633911132812, -67.79722595214844, 200.76707458496094, 21.034271240234375, 305.31549072265625, -48.513092041015625, 164.88754272460938, 165.69378662109375, 83.48773956298828, 113.77983093261719, 15.344703674316406, 83.97720336914062, -104.21215057373047, -26.832138061523438, -57.1629524230957, 10.531242370605469, 26.265792846679688, -22.923954010009766, 170.1243133544922, 62.18631362915039, -106.13369750976562, 133.19290161132812, 38.6536979675293, 189.75933837890625, 199.7640380859375, 142.5480499267578, 4.1005859375, -162.08377075195312, 66.39479064941406, 64.92478942871094, -51.18592071533203, 190.11715698242188, 83.65315246582031, -123.64169311523438, 88.43038940429688, 36.48276901245117, 146.01708984375, 93.22357177734375, 20.259462356567383, 280.20721435546875, 222.5064697265625, 152.6887664794922, 134.91090393066406, 138.5772705078125, -67.27841186523438, 138.14938354492188, 3.3895111083984375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000563.npy"} +{"epoch": 0.8510959939531368, "step": 564, "batch_size": 64, "mean": 78.40560913085938, "std": 103.0383529663086, "min": -270.8210144042969, "p10": -48.37673530578612, "median": 76.7674331665039, "p90": 224.42993774414066, "max": 252.08131408691406, "pos_frac": 0.84375, "sample": [176.51112365722656, 45.87134552001953, 215.72509765625, 155.096435546875, 99.5699462890625, 228.16058349609375, 104.67311096191406, -102.64851379394531, 4.0092010498046875, 14.139190673828125, 210.41738891601562, 151.89691162109375, 86.35850524902344, -75.5595932006836, 18.359207153320312, 14.447616577148438, -32.39023971557617, 103.880126953125, 159.93008422851562, 74.05597686767578, 66.58287048339844, 12.555475234985352, 97.60121154785156, 78.936767578125, 128.04603576660156, 160.24200439453125, 239.01068115234375, -32.90928649902344, 3.4664859771728516, 69.7147216796875, 184.02679443359375, 252.08131408691406, 232.18182373046875, 82.1524658203125, 34.776275634765625, -270.8210144042969, 163.6383514404297, 50.3285026550293, 35.20307922363281, 23.885032653808594, 171.83006286621094, 90.30241394042969, 72.83345031738281, -26.052995681762695, -55.00564193725586, 14.514190673828125, 42.439308166503906, 229.3961181640625, 59.900062561035156, -67.26469421386719, 74.59809875488281, 91.13561248779297, 237.7349853515625, 1.3734683990478516, 160.04953002929688, 42.28876495361328, 141.35971069335938, 231.98379516601562, -148.6054229736328, 181.4417724609375, -62.344512939453125, 30.153583526611328, 85.85162353515625, 154.8426971435547], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000564.npy"} +{"epoch": 0.8526077097505669, "step": 565, "batch_size": 64, "mean": 71.27435302734375, "std": 96.14348602294922, "min": -147.99520874023438, "p10": -39.84469451904296, "median": 72.04281997680664, "p90": 177.51363830566407, "max": 361.615478515625, "pos_frac": 0.796875, "sample": [34.737396240234375, 31.52695083618164, 5.150444030761719, 73.91021728515625, 135.74954223632812, 10.618408203125, 145.482421875, 67.43135833740234, 6.784027099609375, 45.29645538330078, 137.3358154296875, 104.92430114746094, 280.4549865722656, -147.99520874023438, -0.752655029296875, 115.46003723144531, 93.34140014648438, 138.7285919189453, 361.615478515625, 198.48019409179688, 248.46905517578125, -81.42237854003906, 11.360458374023438, 23.21587371826172, 76.02493286132812, 130.697998046875, 34.35859680175781, 87.70169830322266, 74.4827651977539, 101.05265045166016, 177.25863647460938, 29.2568302154541, -26.945022583007812, -68.34773254394531, 135.54623413085938, -30.862003326416016, 36.21940612792969, 143.5965576171875, -133.72100830078125, 84.88752746582031, 70.54214477539062, 31.077754974365234, -88.49827575683594, 150.948486328125, -42.942413330078125, 143.48870849609375, 6.823486328125, 132.43463134765625, 229.17469787597656, 19.16217803955078, 137.54275512695312, -32.61668395996094, -85.74600219726562, -7.018779754638672, 72.75297546386719, 38.92660903930664, -11.714279174804688, 177.6229248046875, 71.3326644897461, 68.20872497558594, 166.78956604003906, 138.28341674804688, 78.97652435302734, 204.89529418945312], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000565.npy"} +{"epoch": 0.854119425547997, "step": 566, "batch_size": 64, "mean": 71.08990478515625, "std": 127.0308609008789, "min": -203.67518615722656, "p10": -67.99123916625976, "median": 59.36836242675781, "p90": 182.76105651855468, "max": 583.0045776367188, "pos_frac": 0.703125, "sample": [56.022125244140625, 40.26061248779297, -30.412704467773438, 115.6600341796875, 32.579620361328125, 182.66152954101562, 182.8037109375, 172.4622802734375, 14.440984725952148, 145.2593994140625, 64.43083190917969, 178.1515655517578, 77.79833984375, 65.72068786621094, 32.139671325683594, 262.827880859375, -203.67518615722656, 95.49373626708984, 158.29409790039062, 128.3888397216797, 201.6006622314453, -40.52076721191406, -61.17863082885742, 127.91516876220703, 3.0302295684814453, -76.34172058105469, 123.81036376953125, 152.22251892089844, 327.6040954589844, -3.2131195068359375, 168.73739624023438, 157.58168029785156, 152.63946533203125, 34.415771484375, -1.023773193359375, -182.72772216796875, -52.662025451660156, 30.256633758544922, 323.488525390625, -44.377845764160156, 59.81671142578125, -87.1343994140625, 583.0045776367188, -61.41224670410156, 166.4126434326172, 22.40660858154297, -68.43037414550781, -82.39595794677734, 86.43115997314453, 8.739234924316406, -29.78292465209961, 21.076499938964844, -15.516921997070312, 158.1558380126953, -16.342098236083984, 27.49191665649414, -66.96659088134766, 177.25192260742188, 70.62126922607422, -78.36383056640625, 149.1144256591797, 109.60640716552734, 58.920013427734375, 244.4849853515625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000566.npy"} +{"epoch": 0.8556311413454271, "step": 567, "batch_size": 64, "mean": 81.26472473144531, "std": 136.307861328125, "min": -202.66275024414062, "p10": -66.79768867492676, "median": 80.65040588378906, "p90": 206.25452575683596, "max": 844.57470703125, "pos_frac": 0.796875, "sample": [104.36558532714844, 5.757320404052734, -14.104209899902344, 161.02047729492188, 35.550445556640625, 136.426025390625, 194.31939697265625, 134.4626922607422, 175.10183715820312, 28.074790954589844, 152.91835021972656, 59.82301330566406, 71.66472625732422, -79.62095642089844, 108.05521392822266, -4.902717590332031, -22.602882385253906, 35.126251220703125, 122.70597076416016, 61.727420806884766, 140.97161865234375, 14.076942443847656, -2.9020862579345703, 172.433349609375, -202.66275024414062, 6.809514999389648, -82.07654571533203, -88.50528717041016, 124.20343780517578, -113.25765228271484, 215.96295166015625, 93.58114624023438, 264.47796630859375, 844.57470703125, 198.708740234375, 0.6359672546386719, 113.09156036376953, 8.68438720703125, 161.14210510253906, 247.3091278076172, -68.34622192382812, 94.07332611083984, 210.53073120117188, 101.08235168457031, 155.3995361328125, 100.79872131347656, 5.301856994628906, 2.7247467041015625, 134.00448608398438, 85.10751342773438, 98.14163208007812, 5.268886566162109, 209.48843383789062, 47.4820556640625, 5.372894287109375, -94.59346771240234, -63.184444427490234, 56.511444091796875, 227.74441528320312, -43.524871826171875, 104.29801940917969, 64.06394958496094, 76.19329833984375, 103.87523651123047], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000567.npy"} +{"epoch": 0.8571428571428571, "step": 568, "batch_size": 64, "mean": 89.83714294433594, "std": 124.42958068847656, "min": -103.74221801757812, "p10": -42.530870819091795, "median": 68.69835662841797, "p90": 238.8748107910157, "max": 631.14208984375, "pos_frac": 0.765625, "sample": [120.56964874267578, 52.481658935546875, 65.57179260253906, 29.188758850097656, 90.84027099609375, 121.55862426757812, 110.10501098632812, -103.74221801757812, 45.03187942504883, -8.244293212890625, -41.010841369628906, 136.48809814453125, 62.696006774902344, 144.3514404296875, -23.36345672607422, -91.93821716308594, 13.795448303222656, 10.136543273925781, 79.63338470458984, 82.02140808105469, -65.95689392089844, 27.938873291015625, 46.74799728393555, 162.9432373046875, 171.87974548339844, 250.25323486328125, 161.3739013671875, 292.01129150390625, -68.60124206542969, 9.42755126953125, 106.84407043457031, 53.47837829589844, 82.83737182617188, -15.380165100097656, 119.90863037109375, 29.208337783813477, -1.5229339599609375, 248.5057373046875, -5.355646133422852, 137.27114868164062, 44.90032196044922, 202.96163940429688, 76.48068237304688, 93.308837890625, 144.94873046875, 45.153541564941406, 61.70417785644531, -1.467926025390625, -43.18231201171875, 41.24212646484375, 137.06509399414062, 121.86056518554688, -46.29339599609375, 97.8849868774414, 270.4276428222656, 71.82492065429688, -1.78741455078125, 631.14208984375, 420.96319580078125, 179.66172790527344, -61.80348205566406, 33.791038513183594, 216.40264892578125, 372.404296875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000568.npy"} +{"epoch": 0.8586545729402872, "step": 569, "batch_size": 64, "mean": 51.020259857177734, "std": 115.341796875, "min": -200.38311767578125, "p10": -56.13357391357422, "median": 36.021263122558594, "p90": 177.06183471679688, "max": 620.2975463867188, "pos_frac": 0.71875, "sample": [151.27230834960938, -56.03330993652344, 40.14582061767578, 145.05206298828125, 63.81678771972656, 176.2127685546875, -3.7628631591796875, 620.2975463867188, 46.667724609375, 177.42572021484375, 67.31423950195312, 36.802955627441406, 4.006130218505859, 76.11823272705078, -200.38311767578125, 31.572158813476562, 25.244060516357422, 220.0935821533203, 5.128908157348633, 57.54219055175781, 180.50108337402344, 166.17706298828125, 46.52274703979492, 231.26156616210938, -48.944091796875, 135.28810119628906, 105.46397399902344, 20.943157196044922, 15.6766357421875, -116.78925323486328, -107.3545150756836, 38.45937728881836, -16.549705505371094, 16.900447845458984, -51.531227111816406, -18.754135131835938, 185.89273071289062, 10.89097785949707, 21.10034942626953, 35.23957061767578, 24.798673629760742, -56.176544189453125, -136.3598175048828, -20.566383361816406, 62.77189636230469, -103.6939926147461, 26.406494140625, -45.47267150878906, 25.544872283935547, 102.86096954345703, 95.03971862792969, 89.49214172363281, 184.89166259765625, -41.614585876464844, -41.45026397705078, 39.45630645751953, 115.41459655761719, 125.59654235839844, 149.84725952148438, -28.20098876953125, -73.19637298583984, 45.880531311035156, 166.02191162109375, 23.075828552246094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000569.npy"} +{"epoch": 0.8601662887377173, "step": 570, "batch_size": 64, "mean": 91.1358413696289, "std": 126.53754425048828, "min": -110.85084533691406, "p10": -42.88658828735351, "median": 87.32366943359375, "p90": 212.6078796386719, "max": 773.3682861328125, "pos_frac": 0.765625, "sample": [155.14215087890625, -99.70374298095703, 103.09183502197266, 245.30624389648438, 114.70612335205078, 78.5907974243164, -29.14483642578125, -29.652755737304688, 50.67730712890625, -36.02925109863281, 78.32111358642578, 56.870819091796875, 40.51408386230469, 111.43292236328125, -7.993003845214844, 123.95736694335938, -1.0380229949951172, -3.6106224060058594, 73.09010314941406, 93.67747497558594, 96.16500854492188, -76.41377258300781, 14.667015075683594, 135.13922119140625, 234.29800415039062, 2.6288280487060547, 108.68812561035156, 75.353759765625, 215.8773193359375, 23.392677307128906, 36.38239288330078, 150.66458129882812, 156.24661254882812, -101.51759338378906, 42.94350814819336, 80.36292266845703, 164.01361083984375, 80.96986389160156, 168.15235900878906, 94.84217071533203, 228.6552734375, 168.34231567382812, 251.3983154296875, -76.04972076416016, -3.9249954223632812, 95.7069091796875, 204.97918701171875, 186.86241149902344, 124.459228515625, -110.85084533691406, 139.15771484375, -45.82544708251953, 39.230003356933594, 173.74391174316406, 277.15087890625, 60.25935363769531, 111.68609619140625, -11.15876579284668, 102.103271484375, 169.60287475585938, 773.3682861328125, 20.956058502197266, 181.15310668945312, -49.374053955078125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000570.npy"} +{"epoch": 0.8616780045351474, "step": 571, "batch_size": 64, "mean": 50.90162658691406, "std": 103.60317993164062, "min": -180.33444213867188, "p10": -68.15439834594726, "median": 51.3266716003418, "p90": 150.70408782958984, "max": 457.4656982421875, "pos_frac": 0.734375, "sample": [103.63279724121094, 75.20789337158203, 65.18745422363281, 38.24291229248047, 9.770828247070312, 299.5623474121094, 123.56466674804688, 34.372474670410156, 118.3866195678711, 457.4656982421875, 46.527008056640625, 72.59986114501953, -61.144737243652344, 30.673465728759766, -8.354499816894531, 100.87742614746094, 34.63554382324219, 149.7634735107422, 11.034698486328125, 110.32457733154297, 171.20559692382812, 0.46009063720703125, 126.30122375488281, 36.47642517089844, 130.13082885742188, 69.74922180175781, 84.75405883789062, 142.59727478027344, -180.33444213867188, 11.118064880371094, -0.8925380706787109, -51.50987243652344, -1.1762313842773438, -88.9109878540039, 53.81666564941406, -7.802206039428711, -40.94358825683594, 2.7275543212890625, -34.27430725097656, 8.360260009765625, 78.7666244506836, 57.3583984375, 111.54200744628906, 102.2059555053711, 151.99072265625, 151.10720825195312, 178.26815795898438, -167.13571166992188, 29.21399688720703, -9.739822387695312, -91.31642150878906, -137.4499969482422, 96.31790924072266, 74.17418670654297, 102.18060302734375, -38.466880798339844, 114.951904296875, 140.32594299316406, 48.83667755126953, 71.69410705566406, -71.15853881835938, 35.355979919433594, -178.0978240966797, 162.5953826904297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000571.npy"} +{"epoch": 0.8631897203325775, "step": 572, "batch_size": 64, "mean": 58.15858840942383, "std": 83.94105529785156, "min": -121.933349609375, "p10": -46.52722778320312, "median": 49.63996124267578, "p90": 162.94185943603517, "max": 231.6959228515625, "pos_frac": 0.75, "sample": [7.873664855957031, -86.21266174316406, 6.615882873535156, 107.34661102294922, 227.43606567382812, 64.65449523925781, 154.07839965820312, 73.81871795654297, -20.808269500732422, 221.80548095703125, -23.560226440429688, 95.7519760131836, 231.19180297851562, 157.81082153320312, -34.479312896728516, 157.8011474609375, -74.64767456054688, 5.336427688598633, 41.946380615234375, 117.70005798339844, 97.28338623046875, 33.8311767578125, 62.37944030761719, 146.42881774902344, 146.1077880859375, 42.43583679199219, -17.49390411376953, 75.56580352783203, -19.12950897216797, 9.023773193359375, 178.11422729492188, 159.98680114746094, 173.08126831054688, -56.897979736328125, 112.07730102539062, 31.78934097290039, 59.583770751953125, -50.49055480957031, 17.733936309814453, 78.43550109863281, 87.01634979248047, 22.489084243774414, 126.55168914794922, 152.38841247558594, 41.297393798828125, 113.29215240478516, 15.960922241210938, -51.43568420410156, 231.6959228515625, 48.1103515625, 40.32456970214844, 95.65715026855469, -9.433731079101562, 51.184120178222656, -97.41934204101562, -37.27946472167969, 54.560035705566406, 48.888427734375, -26.086380004882812, -121.933349609375, 164.20831298828125, -9.90995979309082, 20.325109481811523, 50.39149475097656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000572.npy"} +{"epoch": 0.8647014361300076, "step": 573, "batch_size": 64, "mean": 67.5362548828125, "std": 126.11917877197266, "min": -190.61282348632812, "p10": -77.20642166137695, "median": 59.728023529052734, "p90": 228.5395263671875, "max": 430.55859375, "pos_frac": 0.75, "sample": [75.59175109863281, 70.3228759765625, -154.8346710205078, 172.96998596191406, -71.44365692138672, 49.863677978515625, -4.522554397583008, 90.76387786865234, 72.09088134765625, -40.36248779296875, -145.91619873046875, -167.43313598632812, 0.1482105255126953, 48.27531433105469, -26.186553955078125, 102.94293212890625, -44.27876281738281, 46.00080871582031, 191.33645629882812, -190.61282348632812, 127.32587432861328, 74.50132751464844, 224.3131103515625, 71.91645050048828, -79.67617797851562, 12.266712188720703, 430.55859375, 24.621585845947266, 137.47348022460938, 35.398643493652344, 44.86883544921875, 38.45989990234375, 73.65708923339844, 12.341903686523438, 226.7652587890625, 7.043895721435547, 106.37826538085938, 47.00407028198242, 51.673072814941406, 357.3475341796875, 110.91072845458984, 125.29396057128906, 121.09246826171875, -22.607057571411133, -57.79240417480469, 47.724021911621094, 259.6240234375, 79.62196350097656, 24.726463317871094, 81.96024322509766, 229.2999267578125, 310.73333740234375, 63.38433837890625, 157.81903076171875, -15.641738891601562, -93.36328125, 348.14556884765625, -116.50496673583984, 64.27488708496094, 88.3855209350586, 106.49246215820312, 56.07170867919922, -70.772216796875, 324.4861755371094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000573.npy"} +{"epoch": 0.8662131519274376, "step": 574, "batch_size": 64, "mean": 88.89559173583984, "std": 103.9065933227539, "min": -92.59261322021484, "p10": -23.777073669433587, "median": 82.98687744140625, "p90": 207.49983215332034, "max": 478.9583435058594, "pos_frac": 0.75, "sample": [65.19014739990234, -36.834251403808594, -26.69977569580078, -92.59261322021484, 103.32879638671875, 112.66888427734375, 18.504913330078125, -3.2330169677734375, 22.961130142211914, -30.587203979492188, -8.145309448242188, 102.58540344238281, -27.589614868164062, 11.141489028930664, -3.7510147094726562, -5.6273956298828125, -5.512378692626953, 111.28355407714844, 36.66352081298828, 67.23979187011719, 138.0896453857422, 60.966148376464844, 31.836883544921875, 133.0222930908203, -16.957435607910156, 211.66502380371094, 379.4512634277344, 301.42315673828125, 5.463701248168945, 191.55203247070312, 126.76577758789062, -3.8196945190429688, 74.13789367675781, 56.91154479980469, 167.0928955078125, 98.59031677246094, -44.496788024902344, 112.02885437011719, 225.31890869140625, 14.485048294067383, 91.83586120605469, 237.27011108398438, 173.09161376953125, 124.42817687988281, 101.36277770996094, 192.64068603515625, 99.44334411621094, 170.97708129882812, 92.54121398925781, 41.98967742919922, -31.768447875976562, 197.7810516357422, 42.240203857421875, 124.50369262695312, 153.58456420898438, 34.45275115966797, 59.645931243896484, 162.41482543945312, 145.6136474609375, 242.86778259277344, -16.437232971191406, 107.69999694824219, 478.9583435058594, -12.342254638671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000574.npy"} +{"epoch": 0.8677248677248677, "step": 575, "batch_size": 64, "mean": 52.085784912109375, "std": 111.57093048095703, "min": -210.17364501953125, "p10": -84.45034561157226, "median": 37.747779846191406, "p90": 190.3778991699219, "max": 353.4322204589844, "pos_frac": 0.6875, "sample": [36.52660369873047, -71.5045166015625, -189.06993103027344, -29.55417823791504, 301.998046875, 119.0941162109375, -8.75347900390625, 105.93912506103516, -3.0884246826171875, -34.99087905883789, 100.87188720703125, -89.9985580444336, 233.06996154785156, -57.62449264526367, -54.10040283203125, 81.99845886230469, -95.06893157958984, 92.79972839355469, 1.3202133178710938, 74.40771484375, 47.96314239501953, 192.60235595703125, 114.69879913330078, 38.968955993652344, 4.884391784667969, 1.3022308349609375, -98.54339599609375, 111.7482681274414, 165.71566772460938, 18.78131866455078, 5.91859245300293, 45.146080017089844, 177.92991638183594, 278.40594482421875, 154.5511474609375, -101.09754180908203, 247.95440673828125, -18.9744873046875, 253.4185791015625, 155.2354736328125, 6.3288116455078125, 29.29559326171875, 73.6440658569336, 62.22979736328125, 44.48667907714844, -23.75397491455078, 20.743207931518555, -210.17364501953125, 22.575485229492188, 353.4322204589844, 142.61305236816406, 42.16291809082031, 32.66178894042969, 76.14926147460938, -96.54556274414062, 25.196189880371094, 105.68901062011719, -37.36835861206055, -0.8767452239990234, 107.64340209960938, -56.12830352783203, 185.1875, 132.01280212402344, -14.596759796142578], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000575.npy"} +{"epoch": 0.8692365835222978, "step": 576, "batch_size": 64, "mean": 90.44554138183594, "std": 102.37847137451172, "min": -164.15232849121094, "p10": -20.21576862335205, "median": 89.63336563110352, "p90": 225.43653564453126, "max": 347.5877380371094, "pos_frac": 0.765625, "sample": [34.950714111328125, 145.7264404296875, -21.239479064941406, 109.28658294677734, 184.06707763671875, 35.92680358886719, 219.53697204589844, 46.29136657714844, 347.5877380371094, 97.55325317382812, -20.30080223083496, -36.10649490356445, 194.20050048828125, -16.920501708984375, 103.27715301513672, -82.56365966796875, -20.017356872558594, 147.7967071533203, 261.47406005859375, 145.48660278320312, 167.50062561035156, 274.90618896484375, 63.01738357543945, -1.2766056060791016, 131.88946533203125, 88.69144439697266, 56.429534912109375, -50.48912048339844, 251.54852294921875, -10.732902526855469, 61.97340393066406, 36.642738342285156, 35.67713928222656, 174.19970703125, 90.57528686523438, 52.22764587402344, 298.41778564453125, -36.42973327636719, 0.447540283203125, 30.82928466796875, 118.23001098632812, -11.837932586669922, 141.68939208984375, 15.407133102416992, 227.9649200439453, -6.480224609375, 188.66098022460938, 190.09410095214844, -8.927297592163086, 185.05007934570312, 190.3704071044922, -164.15232849121094, 234.62989807128906, 213.9047393798828, 109.1211929321289, 143.2564239501953, -18.206750869750977, 2.5452728271484375, 57.619178771972656, 112.83914947509766, 11.134023666381836, 30.72088050842285, 124.9139633178711, 107.90867614746094], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000576.npy"} +{"epoch": 0.8707482993197279, "step": 577, "batch_size": 64, "mean": 55.397239685058594, "std": 97.27368927001953, "min": -164.99258422851562, "p10": -69.8232192993164, "median": 53.593238830566406, "p90": 191.4932495117188, "max": 241.01885986328125, "pos_frac": 0.734375, "sample": [58.081932067871094, 4.9479522705078125, 27.83136749267578, 69.6290512084961, 2.0645904541015625, 60.181396484375, -1.7577323913574219, -34.4263916015625, 181.64791870117188, 3.5211563110351562, -14.282997131347656, 171.48965454101562, 140.2666778564453, -52.25364685058594, -72.20672607421875, 201.1641845703125, 216.14105224609375, 168.34051513671875, -148.24417114257812, 137.06033325195312, 195.71267700195312, 96.22220611572266, 27.52188491821289, 130.47268676757812, 18.753448486328125, -96.82413482666016, -97.84140014648438, 121.77876281738281, 122.3575210571289, 124.10406494140625, 35.41114807128906, 135.2160186767578, -81.02053833007812, 29.48710823059082, -3.621978759765625, -15.487586975097656, 62.452510833740234, 75.82794189453125, 4.698402404785156, -31.01034164428711, 85.67766571044922, 50.01446533203125, 226.79505920410156, 22.80304527282715, 208.12420654296875, 241.01885986328125, -160.37725830078125, 72.96107482910156, 100.54978942871094, 107.36643981933594, -64.26170349121094, 91.52853393554688, 240.97836303710938, 162.79042053222656, 13.947219848632812, 36.06131362915039, -164.99258422851562, -2.561155319213867, -26.694782257080078, 125.83411407470703, 37.885780334472656, 104.69638061523438, 57.17201232910156, 4.6996307373046875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000577.npy"} +{"epoch": 0.872260015117158, "step": 578, "batch_size": 64, "mean": 53.927913665771484, "std": 100.46681213378906, "min": -187.76573181152344, "p10": -63.4786407470703, "median": 36.39390563964844, "p90": 183.59716491699226, "max": 324.47137451171875, "pos_frac": 0.671875, "sample": [56.56236267089844, -72.21556091308594, 90.26361083984375, 55.97332763671875, 204.76742553710938, -21.15599250793457, 100.26268005371094, 26.413284301757812, 127.09060668945312, 5.716218948364258, -3.4481201171875, 323.09112548828125, -10.611312866210938, 99.01055908203125, 132.1667938232422, -187.76573181152344, 144.7599334716797, 161.3336639404297, 123.49656677246094, 132.3533477783203, 133.34222412109375, 68.38639831542969, 22.92307472229004, 6.753063201904297, 2.958890914916992, 38.41497039794922, 149.69427490234375, -5.085899353027344, 36.631248474121094, -5.049896240234375, 324.47137451171875, 147.82180786132812, 196.5741729736328, -92.51687622070312, -44.79907989501953, 21.898635864257812, 21.374223709106445, 98.02378845214844, 69.63457489013672, 73.634033203125, 165.83094787597656, -111.66448974609375, 68.16189575195312, -32.222816467285156, 15.593185424804688, -31.691390991210938, -29.86676788330078, 144.2953338623047, -68.90444946289062, 191.2112579345703, 97.2544937133789, -77.3741683959961, 256.32696533203125, -0.4796772003173828, -43.2928466796875, -50.81842041015625, 36.15656280517578, 49.54253387451172, 7.420783996582031, -4.734895706176758, 7.898990631103516, -2.6552352905273438, -94.05438995361328, 206.30323791503906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000578.npy"} +{"epoch": 0.873771730914588, "step": 579, "batch_size": 64, "mean": 81.40538024902344, "std": 91.115966796875, "min": -132.52232360839844, "p10": -34.57773361206054, "median": 76.15679168701172, "p90": 190.00438385009767, "max": 295.14996337890625, "pos_frac": 0.78125, "sample": [93.80591583251953, 49.39900207519531, 74.1414566040039, 38.97919464111328, 43.871604919433594, -125.0760498046875, 191.3759307861328, 73.48880004882812, 81.66595458984375, 295.14996337890625, 107.46330261230469, 162.15480041503906, 96.69172668457031, 217.71286010742188, 90.97639465332031, 11.720191955566406, 167.42263793945312, -16.232147216796875, -11.910179138183594, 266.109375, 247.45486450195312, -56.931941986083984, 176.1699981689453, -9.60093879699707, 47.95549011230469, 94.04188537597656, 214.36619567871094, 55.53851318359375, 181.65542602539062, 127.72709655761719, -58.728294372558594, 66.58973693847656, 143.78781127929688, 63.615234375, 133.65878295898438, 130.16949462890625, 64.26348876953125, -10.48359489440918, -25.416000366210938, 171.5702667236328, 1.893157958984375, 65.83360290527344, -2.0340499877929688, -35.600196838378906, 74.1649169921875, -32.191986083984375, -37.662315368652344, -132.52232360839844, 198.173828125, 155.94863891601562, 132.9189453125, 113.99440002441406, 126.4208984375, 156.5753936767578, 92.22265625, 43.591468811035156, 140.92298889160156, 172.6449432373047, 15.082555770874023, 78.14866638183594, -59.19715881347656, 33.27457046508789, 186.80410766601562, 54.22233963012695], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000579.npy"} +{"epoch": 0.8752834467120182, "step": 580, "batch_size": 64, "mean": 70.501708984375, "std": 121.80265808105469, "min": -170.00669860839844, "p10": -51.43808135986327, "median": 64.9278450012207, "p90": 238.04224243164074, "max": 543.0382690429688, "pos_frac": 0.703125, "sample": [-112.20753479003906, -56.06471252441406, -12.467529296875, 18.669340133666992, -40.642608642578125, 88.69762420654297, 185.5581817626953, 96.68296813964844, 88.19880676269531, 35.329261779785156, 175.96119689941406, 73.06192779541016, 8.585830688476562, 4.974414825439453, -76.22490692138672, 63.63233184814453, 125.82037353515625, 112.90386199951172, 1.1784687042236328, 22.638626098632812, -12.196247100830078, 126.89745330810547, 251.04803466796875, 17.34038543701172, 69.0346450805664, -12.658302307128906, 543.0382690429688, -17.31103515625, -17.160184860229492, 250.88412475585938, 208.07785034179688, 189.5775909423828, 8.514640808105469, 164.09768676757812, -66.97537231445312, 181.38726806640625, -37.18366622924805, -166.6909637451172, 35.3491096496582, 263.49078369140625, -170.00669860839844, 34.061492919921875, -30.089691162109375, 116.162353515625, 268.34783935546875, -17.945039749145508, 81.82628631591797, 53.869956970214844, 264.4556579589844, 116.56548309326172, 122.80274963378906, 133.84683227539062, -15.024345397949219, -115.63362121582031, -25.565044403076172, -6.628747940063477, 74.20820617675781, 83.25837707519531, 108.14898681640625, 335.1349792480469, 91.4368896484375, 16.172653198242188, 66.22335815429688, 143.63262939453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000580.npy"} +{"epoch": 0.8767951625094482, "step": 581, "batch_size": 64, "mean": 62.34282684326172, "std": 101.57078552246094, "min": -173.41183471679688, "p10": -52.05611038208007, "median": 48.85294532775879, "p90": 192.3453063964844, "max": 323.6719970703125, "pos_frac": 0.734375, "sample": [124.19975280761719, 194.0111083984375, 14.764793395996094, 26.688743591308594, 67.08885192871094, 21.600814819335938, 30.178192138671875, -48.25225830078125, 116.51075744628906, 323.6719970703125, -65.15042114257812, 39.7072639465332, 21.06623649597168, 45.98332214355469, 118.24701690673828, -47.95220947265625, 126.3741455078125, 53.91651153564453, 137.75543212890625, 19.860929489135742, 129.3324432373047, 24.8560791015625, 180.68113708496094, -57.7532958984375, 220.31944274902344, 220.94302368164062, 163.5045623779297, -34.654563903808594, 99.18274688720703, 82.66593933105469, -45.061798095703125, 94.60445404052734, 34.98696517944336, 160.88034057617188, 116.16838073730469, 125.46792602539062, 64.24992370605469, -166.6894989013672, 294.760986328125, 234.38348388671875, 28.289443969726562, -173.41183471679688, 88.20263671875, 44.9677734375, 85.61270141601562, -85.66325378417969, 222.61184692382812, -12.753585815429688, 7.342365264892578, -115.06622314453125, 120.64693450927734, -53.68633270263672, 91.59683227539062, 99.53581237792969, 47.49782180786133, -5.0938873291015625, -0.3148651123046875, -30.77548599243164, -46.27803039550781, 44.8172492980957, 50.20806884765625, 168.10916137695312, 188.45843505859375, -38.01231384277344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000581.npy"} +{"epoch": 0.8783068783068783, "step": 582, "batch_size": 64, "mean": 54.11070251464844, "std": 94.94625091552734, "min": -125.29977416992188, "p10": -61.676992797851554, "median": 33.7164306640625, "p90": 168.3150375366211, "max": 370.9622802734375, "pos_frac": 0.71875, "sample": [110.21314239501953, 232.58969116210938, 21.272926330566406, 93.91093444824219, -29.499465942382812, 23.712398529052734, 181.8330535888672, 51.22304153442383, 168.91383361816406, 154.23822021484375, 33.604156494140625, 70.91402435302734, 192.35951232910156, 370.9622802734375, -8.490203857421875, 167.07315063476562, -104.46563720703125, 8.414154052734375, 86.63526916503906, 208.86990356445312, 10.65689468383789, 114.18216705322266, 11.715045928955078, 168.84727478027344, 144.69223022460938, 111.91249084472656, 24.29471778869629, 4.548379898071289, -67.91466522216797, -109.82820129394531, -5.054372787475586, -5.184967041015625, 110.63336944580078, -65.93690490722656, -40.00782012939453, 124.91606140136719, 129.83544921875, 56.3629264831543, 160.10556030273438, 16.687206268310547, 31.732330322265625, 84.52435302734375, 49.68890380859375, 5.122896194458008, -36.774017333984375, -17.979782104492188, -49.080543518066406, 115.15177154541016, 35.772300720214844, -125.29977416992188, 1.9958553314208984, 33.828704833984375, -4.866474151611328, -2.1430740356445312, -75.35631561279297, 71.19219970703125, 160.09921264648438, -120.83927154541016, 164.6963653564453, -51.73719787597656, 18.75762939453125, 6.57110595703125, 104.03864288330078, 134.24203491210938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000582.npy"} +{"epoch": 0.8798185941043084, "step": 583, "batch_size": 64, "mean": 74.08435821533203, "std": 116.12728881835938, "min": -177.9230194091797, "p10": -67.05338439941406, "median": 63.990196228027344, "p90": 197.87516937255862, "max": 367.3018493652344, "pos_frac": 0.71875, "sample": [70.85275268554688, -35.48828887939453, 4.724357604980469, 2.5105323791503906, 183.5950927734375, 81.19471740722656, -84.90294647216797, -59.136802673339844, -115.14605712890625, 25.067718505859375, 195.3661346435547, -22.826351165771484, -21.32320785522461, 124.21147155761719, 44.75723648071289, -70.44620513916016, 109.89659118652344, -14.618902206420898, 194.17117309570312, 171.1483917236328, 27.120969772338867, -161.33831787109375, 155.47267150878906, 8.812919616699219, 258.01177978515625, 105.75544738769531, 367.3018493652344, -143.94537353515625, 26.706134796142578, 178.92391967773438, 91.01460266113281, 23.426023483276367, -5.9656524658203125, 57.12763977050781, 315.1885681152344, 218.24400329589844, -18.891494750976562, -34.143394470214844, 155.000732421875, 192.56356811523438, 46.139225006103516, 266.0989685058594, -177.9230194091797, 198.95046997070312, 5.028711318969727, 149.48257446289062, 130.14581298828125, 38.86511993408203, 121.20880889892578, -35.38658905029297, 161.80520629882812, 150.5205078125, 154.05572509765625, 7.077547073364258, 137.270751953125, 159.87451171875, -101.60811614990234, 112.42359161376953, -5.018501281738281, 154.3223114013672, 244.84634399414062, 192.63568115234375, -8.803634643554688, 39.39282989501953], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000583.npy"} +{"epoch": 0.8813303099017384, "step": 584, "batch_size": 64, "mean": 65.4421615600586, "std": 112.57149505615234, "min": -181.99928283691406, "p10": -62.66874313354492, "median": 57.51152229309082, "p90": 218.52759094238286, "max": 327.35308837890625, "pos_frac": 0.671875, "sample": [222.59906005859375, 150.16336059570312, 86.782470703125, -62.68083953857422, 43.827972412109375, -17.08782386779785, 232.7821044921875, 209.02749633789062, 103.16266632080078, 95.19514465332031, 80.88955688476562, -33.94884490966797, -21.432130813598633, -37.83812713623047, -100.06797790527344, 153.884521484375, 136.7528076171875, 186.93846130371094, 139.31558227539062, -35.6098518371582, 137.70953369140625, 231.15846252441406, -14.335029602050781, 152.08396911621094, 168.43722534179688, 167.447021484375, 60.13054656982422, 118.54670715332031, 35.855499267578125, 6.96923828125, 184.6363525390625, -73.80113220214844, 137.01998901367188, 32.263458251953125, -126.29399108886719, 284.76953125, -13.5379638671875, -52.30535888671875, -10.162895202636719, -14.928144454956055, 44.3464469909668, 250.36666870117188, -62.64051818847656, 45.46510314941406, 86.80834197998047, 178.03195190429688, 11.870744705200195, 53.10078430175781, 87.20377349853516, -56.025230407714844, -153.17166137695312, 57.28622817993164, -21.80883026123047, 167.237548828125, -5.90869140625, 277.9314880371094, 327.35308837890625, 3.967508316040039, 57.73681640625, 60.86814880371094, 37.621620178222656, 108.20498657226562, -129.86770629882812, -181.99928283691406], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000584.npy"} +{"epoch": 0.8828420256991686, "step": 585, "batch_size": 64, "mean": 46.786407470703125, "std": 88.73844909667969, "min": -260.47882080078125, "p10": -35.15698089599609, "median": 39.52016639709473, "p90": 170.42938079833985, "max": 229.39236450195312, "pos_frac": 0.765625, "sample": [20.322711944580078, 121.12098693847656, 15.8948974609375, -9.7178955078125, 53.140541076660156, 73.33610534667969, 43.93000030517578, 225.23995971679688, -27.828155517578125, 35.48686218261719, -172.63278198242188, 24.15542221069336, 168.98681640625, -1.7421722412109375, 4.217548370361328, 229.39236450195312, -97.89983367919922, 14.347846984863281, 80.03233337402344, 112.85615539550781, 119.085205078125, 2.942716598510742, -4.009275436401367, -89.26448059082031, 35.53117370605469, 45.125823974609375, 27.162437438964844, 51.71826934814453, 44.125579833984375, 83.30792999267578, -5.592586517333984, -10.424163818359375, 29.561817169189453, -3.0403480529785156, -105.26658630371094, 17.039291381835938, 9.542343139648438, 135.50433349609375, 176.56161499023438, 93.45906066894531, 43.509159088134766, 117.01361846923828, 75.41873168945312, -37.141212463378906, -260.47882080078125, 196.49526977539062, 212.3718719482422, 171.04762268066406, -30.52710723876953, -44.242897033691406, 88.28353881835938, 15.203971862792969, 19.592529296875, 56.80766296386719, 5.641883850097656, 60.552696228027344, 113.07711791992188, 163.4147186279297, 198.62416076660156, 55.27349853515625, 34.155311584472656, 25.14453125, 79.17716979980469, 70.20513153076172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000585.npy"} +{"epoch": 0.8843537414965986, "step": 586, "batch_size": 64, "mean": 66.50492858886719, "std": 79.159423828125, "min": -94.34697723388672, "p10": -9.476177406311034, "median": 58.175899505615234, "p90": 170.05075836181643, "max": 312.53564453125, "pos_frac": 0.859375, "sample": [80.69985961914062, 32.582618713378906, 134.20822143554688, 94.9880599975586, 40.815521240234375, 141.25209045410156, 67.69764709472656, 7.0336151123046875, 7.1691436767578125, 9.106767654418945, 99.13677215576172, -36.07592010498047, 0.1810760498046875, 212.04159545898438, 245.0326385498047, -37.198486328125, 86.68067169189453, 81.26286315917969, 31.506322860717773, 105.55286407470703, 7.860256195068359, 61.657379150390625, 68.05696105957031, 172.99964904785156, 60.86939239501953, -8.334905624389648, 5.523445129394531, 53.02032470703125, 128.2128143310547, 92.47735595703125, 131.69827270507812, 110.79666137695312, 60.266197204589844, 194.6554412841797, 118.82142639160156, 5.2202606201171875, -87.05241394042969, 312.53564453125, 24.192630767822266, 23.905271530151367, 3.107881546020508, 2.331573486328125, 24.97467041015625, 144.7139129638672, 80.19102478027344, -6.293758392333984, 3.5406570434570312, 56.085601806640625, -9.965293884277344, 96.97950744628906, 203.4751739501953, -57.53572082519531, 39.07383728027344, 36.174110412597656, 163.17001342773438, 54.96556854248047, -94.34697723388672, 206.67041015625, 0.3984222412109375, -10.990646362304688, 154.62860107421875, 9.462169647216797, 129.293212890625, 85.155517578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000586.npy"} +{"epoch": 0.8858654572940288, "step": 587, "batch_size": 64, "mean": 68.79959106445312, "std": 122.73521423339844, "min": -354.9806213378906, "p10": -54.01735916137694, "median": 66.06926155090332, "p90": 203.99231414794923, "max": 471.70684814453125, "pos_frac": 0.765625, "sample": [48.968265533447266, -137.12538146972656, -2.8961257934570312, 113.17313385009766, 145.0634765625, -69.33160400390625, 52.43777847290039, 5.890533447265625, 198.30535888671875, 57.959381103515625, 26.128002166748047, 111.0836410522461, -100.24954223632812, 11.230850219726562, 1.6867218017578125, 38.36280822753906, 141.21206665039062, -10.313125610351562, 40.48307800292969, 222.79722595214844, 171.0241241455078, 96.62208557128906, -1.0156097412109375, 182.82188415527344, 114.75048828125, 82.73235321044922, 225.88058471679688, 34.66145324707031, -354.9806213378906, -13.669742584228516, 52.837852478027344, 62.30189895629883, 85.23870849609375, 320.0179138183594, 57.87826919555664, 76.40850830078125, 87.30262756347656, -224.80613708496094, -36.05340576171875, 267.6566467285156, 159.631591796875, -11.552730560302734, 140.83590698242188, 85.91004943847656, 3.3967723846435547, 133.5916748046875, 22.859657287597656, 176.62020874023438, 154.27023315429688, 69.83662414550781, 206.42958068847656, -60.963897705078125, 109.58946990966797, -4.5011138916015625, 91.17364501953125, 250.1984100341797, 43.437416076660156, -37.80876922607422, -116.80259704589844, 77.3459701538086, 99.56502532958984, 471.70684814453125, 151.23873901367188, 4.6885986328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000587.npy"} +{"epoch": 0.8873771730914588, "step": 588, "batch_size": 64, "mean": 65.5625, "std": 107.82894134521484, "min": -178.85299682617188, "p10": -45.56854476928711, "median": 60.4250602722168, "p90": 183.21771697998048, "max": 383.2759704589844, "pos_frac": 0.75, "sample": [51.30445861816406, -44.86161804199219, -16.54663848876953, 142.29971313476562, -9.867416381835938, 123.73286437988281, 155.47396850585938, 126.40443420410156, 113.80474090576172, 123.04217529296875, 336.5314025878906, 57.493316650390625, 53.704742431640625, 112.13993835449219, 28.71361541748047, 133.67408752441406, 206.16110229492188, 383.2759704589844, 29.378446578979492, 340.13482666015625, 64.53516387939453, 119.98101043701172, -41.17751693725586, 188.04747009277344, 21.563928604125977, 85.68399047851562, 13.144783020019531, -23.642372131347656, 15.371994018554688, -178.85299682617188, 20.83062171936035, 120.68443298339844, -119.0857925415039, 86.44825744628906, 94.76256561279297, 170.74398803710938, -29.46346092224121, 78.90165710449219, 107.63725280761719, -45.87151336669922, -34.049766540527344, -34.02024841308594, 57.08655548095703, 179.49855041503906, 115.44081115722656, 62.47938537597656, 169.31985473632812, -6.516513824462891, 62.59037780761719, 107.21324157714844, 39.50663757324219, -127.95016479492188, 25.314773559570312, 58.37073516845703, 72.22105407714844, 51.1614990234375, -82.98661804199219, 23.174888610839844, 46.456939697265625, -142.3189697265625, 124.79679107666016, 184.8116455078125, 185.4927978515625, -137.33148193359375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000588.npy"} +{"epoch": 0.8888888888888888, "step": 589, "batch_size": 64, "mean": 72.16041564941406, "std": 90.75740814208984, "min": -143.93234252929688, "p10": -23.169627380371093, "median": 82.9339828491211, "p90": 178.41959991455082, "max": 335.3751220703125, "pos_frac": 0.765625, "sample": [28.847047805786133, 87.20964813232422, -113.01791381835938, 185.58331298828125, 102.93785858154297, 6.329475402832031, -9.467262268066406, -22.453125, -12.883293151855469, 28.622543334960938, -23.476699829101562, 3.370576858520508, 335.3751220703125, 27.968830108642578, 108.50483703613281, 82.86139678955078, 83.0065689086914, 123.35853576660156, 266.7909240722656, 99.03710174560547, 97.23577880859375, 109.47805786132812, 161.72018432617188, -94.59722900390625, 246.0366668701172, 128.7700958251953, -42.195213317871094, 113.24239349365234, 141.56019592285156, 134.47232055664062, 84.02897644042969, -15.55233383178711, 171.9718475341797, 25.5771484375, -32.21498107910156, 9.1162109375, 46.156150817871094, -41.541786193847656, 86.6084976196289, 183.78030395507812, 61.647552490234375, 146.54901123046875, 181.18292236328125, 79.79549407958984, 89.65438079833984, 26.606292724609375, 102.91646575927734, -20.48959732055664, -5.726696014404297, 25.614601135253906, -143.93234252929688, 117.60050201416016, 162.08059692382812, -19.48168182373047, 86.51383209228516, 151.71847534179688, 110.38565063476562, 93.23921203613281, 24.050270080566406, 24.547157287597656, 67.58612060546875, 74.89884948730469, 279.23809814453125, -0.08714866638183594], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000589.npy"} +{"epoch": 0.890400604686319, "step": 590, "batch_size": 64, "mean": 65.826171875, "std": 84.92048645019531, "min": -124.85421752929688, "p10": -35.19277572631836, "median": 58.524925231933594, "p90": 177.09115447998047, "max": 245.74752807617188, "pos_frac": 0.734375, "sample": [50.08704376220703, 49.639381408691406, 83.74573516845703, 2.3264389038085938, 151.51779174804688, 120.46419525146484, 75.59329223632812, -120.75714111328125, 4.079341888427734, -34.976829528808594, 24.264877319335938, 31.58919906616211, 32.5311279296875, 47.877769470214844, -124.85421752929688, 101.41414642333984, 149.69920349121094, 92.04098510742188, -12.179790496826172, -6.3664703369140625, -27.05743408203125, 130.98947143554688, 21.086036682128906, 196.98788452148438, 181.75360107421875, -36.66272735595703, -16.109237670898438, 44.77500915527344, 231.2259521484375, 109.27645874023438, -38.365394592285156, 100.8746337890625, 25.975507736206055, 69.73089599609375, 134.22579956054688, 91.79512023925781, 91.66292572021484, -0.344818115234375, 37.878997802734375, 239.80224609375, 190.864501953125, 108.6352767944336, 245.74752807617188, 97.08096313476562, 175.002685546875, 61.31634521484375, 36.235626220703125, 109.77445983886719, -19.598785400390625, 19.13482666015625, -20.85711669921875, 174.03384399414062, 142.80270385742188, -35.28532409667969, -0.2104644775390625, 159.45858764648438, 55.73350524902344, 109.78351593017578, 176.5662078857422, -5.310733795166016, 72.29951477050781, -44.37823486328125, 177.31613159179688, -80.50717163085938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000590.npy"} +{"epoch": 0.891912320483749, "step": 591, "batch_size": 64, "mean": 83.24821472167969, "std": 117.76740264892578, "min": -183.0280303955078, "p10": -75.8507095336914, "median": 94.18243408203125, "p90": 235.3174865722657, "max": 363.0571594238281, "pos_frac": 0.78125, "sample": [-6.864301681518555, 248.96307373046875, 112.83440399169922, 108.8027114868164, 137.52963256835938, 124.29304504394531, 168.5640411376953, 113.59416198730469, 85.97120666503906, 113.45262145996094, 11.338638305664062, -89.29342651367188, 184.23765563964844, -97.2663345336914, 100.83407592773438, -22.703109741210938, 53.34521484375, 65.46844482421875, 115.93560791015625, 50.07303237915039, 129.47360229492188, 87.53079223632812, 248.84439086914062, -79.16630554199219, 181.58758544921875, -127.52474975585938, 139.58966064453125, 116.39932250976562, 10.605743408203125, 43.375152587890625, -183.0280303955078, -117.09418487548828, 363.0571594238281, 349.2723388671875, -3.4111251831054688, 203.0516815185547, -66.68344116210938, 46.650421142578125, 195.903076171875, 2.296619415283203, 18.040664672851562, -45.52007293701172, 31.208526611328125, 170.6967010498047, 2.693674087524414, 245.5679931640625, -44.16893768310547, -103.96455383300781, 31.482383728027344, 315.01971435546875, -68.11431884765625, 2.6327896118164062, 213.93093872070312, 62.44585418701172, 1.5565757751464844, 191.03213500976562, 241.52725219726562, 117.09307861328125, 151.7453155517578, 101.70037841796875, 220.82803344726562, 124.22850036621094, 52.54070281982422, 173.84234619140625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000591.npy"} +{"epoch": 0.8934240362811792, "step": 592, "batch_size": 64, "mean": 74.48173522949219, "std": 107.23197174072266, "min": -158.19766235351562, "p10": -41.85539455413817, "median": 52.055381774902344, "p90": 219.04253692626955, "max": 489.1862487792969, "pos_frac": 0.8125, "sample": [79.96371459960938, 228.69708251953125, 214.3957977294922, 25.673730850219727, 147.49610900878906, 191.5120391845703, 221.72528076171875, 40.2618522644043, 23.225120544433594, 28.738121032714844, -60.623374938964844, 128.47021484375, 43.06654357910156, 19.333763122558594, -158.19766235351562, 100.0312728881836, 190.85935974121094, 98.81613159179688, 3.8031234741210938, 45.7902717590332, -48.20060729980469, 168.47494506835938, 77.03291320800781, 221.03399658203125, 9.730777740478516, 81.11238098144531, 75.68152618408203, 196.47402954101562, 161.38385009765625, 25.715042114257812, -70.01966094970703, 105.02020263671875, -14.355888366699219, 108.58132934570312, 43.353172302246094, 58.320491790771484, -78.10472106933594, 125.45681762695312, -74.39518737792969, -27.049898147583008, -22.202190399169922, 91.87748718261719, -10.286689758300781, 3.3787078857421875, -79.30831909179688, 85.26568603515625, 65.43412780761719, 325.12432861328125, 264.80743408203125, 6.888427734375, 489.1862487792969, 0.086517333984375, 11.03957748413086, 13.24957275390625, 39.458587646484375, 10.863393783569336, 234.27658081054688, 37.33118438720703, -0.8423423767089844, 61.51717758178711, 40.6874885559082, 124.16239166259766, 99.267578125, 117.28392791748047], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000592.npy"} +{"epoch": 0.8949357520786092, "step": 593, "batch_size": 64, "mean": 72.87154388427734, "std": 105.58618927001953, "min": -220.3134765625, "p10": -65.66251296997069, "median": 84.69963073730469, "p90": 196.8503616333008, "max": 269.76214599609375, "pos_frac": 0.71875, "sample": [131.87733459472656, 16.94937515258789, 160.89920043945312, -2.428955078125, 156.2313690185547, -1.4514236450195312, 78.84247589111328, -220.3134765625, 232.0764923095703, 51.30835723876953, 192.52835083007812, 20.509601593017578, 214.4189910888672, 180.4169158935547, 35.28044891357422, 14.058963775634766, 140.33120727539062, 92.63803100585938, 142.756591796875, 122.76500701904297, 32.33295440673828, 147.67019653320312, 243.01303100585938, 269.76214599609375, -18.114288330078125, -4.083351135253906, -28.436948776245117, 145.02406311035156, 90.97090911865234, -139.38755798339844, 143.33883666992188, -93.70240783691406, 56.917266845703125, 110.58474731445312, 123.98646545410156, 134.36578369140625, -2.0161972045898438, -49.74958801269531, 112.68147277832031, -87.51744079589844, -120.68719482421875, -20.530364990234375, 219.2826385498047, -131.59649658203125, 189.68792724609375, 143.5424346923828, 198.0198974609375, 77.4714584350586, -33.53013610839844, 249.13418579101562, 71.1937255859375, -17.942102432250977, 99.38472747802734, 97.13522338867188, 81.1748275756836, -72.48233795166016, 54.10316467285156, 194.12144470214844, 8.318309783935547, -4.9617767333984375, 183.622314453125, 88.22443389892578, 11.9515380859375, 151.80624389648438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000593.npy"} +{"epoch": 0.8964474678760394, "step": 594, "batch_size": 64, "mean": 61.98661804199219, "std": 100.72444915771484, "min": -158.18165588378906, "p10": -50.18269653320313, "median": 49.3269157409668, "p90": 176.69919891357424, "max": 400.552734375, "pos_frac": 0.734375, "sample": [24.734012603759766, 146.65467834472656, -101.24124145507812, 30.792762756347656, 75.73402404785156, 126.44355010986328, 12.603675842285156, 49.26908874511719, 59.71894836425781, 3.9019241333007812, -4.44183349609375, 1.1725425720214844, -22.565933227539062, 80.14109802246094, 19.49558448791504, -2.751300811767578, -23.929916381835938, -28.283702850341797, 95.56619262695312, 61.22727966308594, 172.2889404296875, 67.6258544921875, 9.767522811889648, 111.70552062988281, 197.55027770996094, 76.36296081542969, 132.85606384277344, -66.98099517822266, 149.89962768554688, -2.7691593170166016, 400.552734375, 303.9762268066406, 264.51690673828125, 138.58212280273438, 60.174072265625, -50.300689697265625, -28.91278839111328, 65.50750732421875, 142.17880249023438, -52.97935485839844, 168.78298950195312, 13.42003059387207, 42.722808837890625, 161.42242431640625, 1.0702590942382812, 18.5336971282959, 49.384742736816406, 178.5893096923828, 1.5583820343017578, 218.78228759765625, 106.9598388671875, -158.18165588378906, 103.756103515625, 277.30047607421875, -49.907379150390625, 119.93081665039062, 20.320762634277344, 46.70984649658203, 62.59147644042969, -8.213447570800781, 57.94416809082031, -74.31462097167969, -30.16405487060547, -57.69926452636719], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000594.npy"} +{"epoch": 0.8979591836734694, "step": 595, "batch_size": 64, "mean": 91.57583618164062, "std": 120.44166564941406, "min": -225.67637634277344, "p10": -19.40427989959716, "median": 84.40452194213867, "p90": 206.01111450195316, "max": 540.3745727539062, "pos_frac": 0.84375, "sample": [22.81145477294922, 107.03719329833984, 304.9047546386719, 142.55929565429688, 23.56786346435547, 43.90409851074219, 67.85985565185547, 75.92744445800781, 39.09748840332031, 183.61378479003906, 197.23406982421875, 48.020721435546875, 53.14859390258789, -126.79234313964844, 196.7355499267578, 14.342727661132812, 15.931844711303711, 18.675994873046875, 151.41017150878906, 28.4814395904541, 161.463134765625, 97.26504516601562, 209.772705078125, 60.857017517089844, 313.06365966796875, 8.193870544433594, 82.2644271850586, 138.7371063232422, 189.22300720214844, 128.9599609375, 7.105506896972656, -61.90065002441406, 72.60226440429688, -4.8388671875, 165.43170166015625, 183.0478057861328, 240.9512176513672, -125.9285888671875, -225.67637634277344, 247.45742797851562, 540.3745727539062, 179.7108612060547, 129.79342651367188, 93.12472534179688, -98.52096557617188, 189.9458465576172, 86.54461669921875, 110.17828369140625, -23.31573486328125, 247.0541229248047, -9.713384628295898, 188.17962646484375, 90.54521942138672, 31.001625061035156, 26.280914306640625, 65.89551544189453, 180.2455291748047, 113.30499267578125, 74.95161437988281, 1.92974853515625, -10.277551651000977, -158.25318908691406, 148.77932739257812, 166.5699462890625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000595.npy"} +{"epoch": 0.8994708994708994, "step": 596, "batch_size": 64, "mean": 39.19957733154297, "std": 94.1088638305664, "min": -178.1538543701172, "p10": -67.41607437133788, "median": 23.46184730529785, "p90": 168.59134368896486, "max": 279.5521240234375, "pos_frac": 0.640625, "sample": [-68.12666320800781, 25.301063537597656, 220.2976837158203, 41.667991638183594, 49.007102966308594, -72.51396942138672, -27.496631622314453, 28.225685119628906, -27.41918182373047, 169.37265014648438, -79.6298828125, 52.22237014770508, -83.07388305664062, -80.63998413085938, 279.5521240234375, 161.8638916015625, 148.16802978515625, 39.113101959228516, -98.58659362792969, 38.556846618652344, -31.614990234375, 160.7888946533203, 48.24168395996094, 56.54164123535156, 187.89581298828125, 132.636962890625, 58.350120544433594, -52.9921875, 90.37801361083984, 91.6650390625, -23.24789810180664, 62.39860534667969, 55.79298400878906, 11.783624649047852, 58.084754943847656, -2.689311981201172, 43.846473693847656, 253.37059020996094, 20.496273040771484, -37.10637664794922, 47.76789855957031, 2.5629806518554688, -27.324237823486328, -178.1538543701172, 13.911239624023438, -13.034648895263672, 12.033843994140625, -15.51474380493164, 274.4661560058594, 232.998046875, 81.57669067382812, -15.169586181640625, -55.306495666503906, -5.109062194824219, -65.7580337524414, 166.76829528808594, 17.155975341796875, 110.18890380859375, -14.235963821411133, 35.303314208984375, 21.622631072998047, 4.3965606689453125, 4.7642669677734375, -27.619958877563477], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000596.npy"} +{"epoch": 0.9009826152683296, "step": 597, "batch_size": 64, "mean": 60.950660705566406, "std": 91.78598022460938, "min": -79.29576110839844, "p10": -42.49670639038085, "median": 39.859182357788086, "p90": 176.25079040527345, "max": 363.0830383300781, "pos_frac": 0.703125, "sample": [-13.983829498291016, -45.41126251220703, -24.58297348022461, -19.383255004882812, 207.67367553710938, -25.659225463867188, 25.426090240478516, -73.06537628173828, -35.696075439453125, 171.7921905517578, 5.396678924560547, 88.80997467041016, 10.615510940551758, 132.33116149902344, -52.45314025878906, 52.39317321777344, 203.8812255859375, 166.67649841308594, 172.51266479492188, -12.928522109985352, 79.43329620361328, 32.90098571777344, -79.29576110839844, 39.470664978027344, 153.05215454101562, 112.99628448486328, -0.3479633331298828, -53.52928161621094, 136.319580078125, 16.704072952270508, 106.51885223388672, 33.838645935058594, 167.56155395507812, 363.0830383300781, 82.29952239990234, 53.249691009521484, 0.3671417236328125, 66.52232360839844, 73.21692657470703, 177.85284423828125, -14.09536361694336, -78.09671020507812, 22.06787109375, -33.19074249267578, 10.254446029663086, -22.277761459350586, -14.045743942260742, 9.622011184692383, 114.53917694091797, 29.904136657714844, 198.72567749023438, 40.24769973754883, 126.8531265258789, -9.48779296875, -77.14190673828125, 91.58566284179688, 51.81581497192383, 52.235321044921875, 247.9882354736328, 219.326171875, 27.435422897338867, 137.87164306640625, 133.49215698242188, 138.65399169921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000597.npy"} +{"epoch": 0.9024943310657596, "step": 598, "batch_size": 64, "mean": 42.1598014831543, "std": 96.57611846923828, "min": -187.08377075195312, "p10": -82.22881851196288, "median": 39.4395751953125, "p90": 184.2257278442383, "max": 248.809326171875, "pos_frac": 0.671875, "sample": [-7.725679397583008, -83.89445495605469, 13.415275573730469, 34.14575958251953, 126.11693572998047, -9.789119720458984, 45.58677673339844, 183.1814422607422, 133.98182678222656, -173.3423309326172, 42.656089782714844, 38.75469970703125, 6.493747711181641, -35.08518981933594, 218.26220703125, -34.80724334716797, 248.809326171875, -65.63134765625, 129.0708465576172, 49.95590591430664, -27.0554141998291, -187.08377075195312, -69.44050598144531, 115.94081115722656, 11.994552612304688, 54.327857971191406, 40.509769439697266, 166.03091430664062, -15.511146545410156, -16.864795684814453, -81.89625549316406, -84.16669464111328, 58.146209716796875, 127.6256103515625, 142.91104125976562, 13.14602279663086, 56.10234069824219, 14.044795989990234, 58.04667663574219, 78.71966552734375, 194.562255859375, 40.12445068359375, 2.8894481658935547, 184.67327880859375, -2.1638641357421875, 95.93499755859375, 45.98667907714844, 2.4264755249023438, 144.51551818847656, 117.20539855957031, -41.410430908203125, -123.40970611572266, 7.888786315917969, 72.151123046875, 23.22380256652832, -82.44489288330078, 151.23614501953125, 64.69374084472656, -9.807632446289062, -34.171409606933594, 234.63095092773438, 190.07421875, -82.37134552001953, 186.10598754882812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000598.npy"} +{"epoch": 0.9040060468631897, "step": 599, "batch_size": 64, "mean": 54.95996856689453, "std": 110.56111907958984, "min": -159.57138061523438, "p10": -56.70568389892578, "median": 41.01114273071289, "p90": 170.40415496826174, "max": 413.26617431640625, "pos_frac": 0.65625, "sample": [-57.40821838378906, -55.066436767578125, 60.76447296142578, 84.05022430419922, -29.42017364501953, 98.49339294433594, 22.432880401611328, 219.2869873046875, -59.08368682861328, -118.90158081054688, -35.87501525878906, -21.35283851623535, 32.851600646972656, 69.66043090820312, 354.8497314453125, -26.145423889160156, -100.68611145019531, -11.294902801513672, 1.7796401977539062, -44.53857421875, 151.10816955566406, 104.99241638183594, 39.225006103515625, -159.57138061523438, 8.864639282226562, 17.889511108398438, -20.694717407226562, 171.3048553466797, -88.08612060546875, 73.44140625, 342.9827880859375, 168.30252075195312, -95.10443878173828, -49.22948455810547, 32.95066452026367, 25.394058227539062, 92.29663848876953, -31.8944091796875, 48.62211608886719, 99.2784423828125, 114.79930114746094, 8.822746276855469, 62.07141876220703, -43.50834655761719, 155.6707763671875, 75.30184936523438, 186.33709716796875, -26.814498901367188, -43.82705307006836, 38.29659652709961, 291.515869140625, 141.41818237304688, 106.71641540527344, 126.00143432617188, 125.91038513183594, 42.797279357910156, 413.26617431640625, 54.44513702392578, 109.32713317871094, -3.8868350982666016, -8.664840698242188, 92.0057601928711, 118.63526916503906, 64.33151245117188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000599.npy"} +{"epoch": 0.9055177626606198, "step": 600, "batch_size": 64, "mean": 51.775245666503906, "std": 97.58605194091797, "min": -159.0043487548828, "p10": -83.09415359497069, "median": 47.42346000671387, "p90": 193.48715362548828, "max": 276.3832702636719, "pos_frac": 0.71875, "sample": [135.3138427734375, 71.19005584716797, 16.516511917114258, 20.842506408691406, 194.10519409179688, 40.71551513671875, 54.584903717041016, -114.8578109741211, 164.1595458984375, 209.4791717529297, -10.495307922363281, 45.71751022338867, 101.90616607666016, 154.57725524902344, 22.060760498046875, -15.576688766479492, -54.2889404296875, -65.6943588256836, -91.03036499023438, 171.3002166748047, 36.93943405151367, 42.66838073730469, 141.97279357910156, 81.57197570800781, -57.27062225341797, 170.34657287597656, 90.2596664428711, 192.04505920410156, -11.569358825683594, 179.39199829101562, 202.81787109375, 5.207572937011719, -156.3996124267578, -8.325035095214844, 69.64521789550781, -159.0043487548828, -90.55120849609375, 69.39659881591797, 1.8589859008789062, 276.3832702636719, -9.17645263671875, 87.75668334960938, 57.82884216308594, 194.72744750976562, -0.20668601989746094, 55.37983322143555, -94.73277282714844, -30.78373908996582, -117.2420425415039, 57.125850677490234, 19.954933166503906, 54.76145935058594, 27.253149032592773, 49.12940979003906, 94.43157958984375, 22.3526611328125, 63.42716979980469, -0.6667289733886719, 67.45112609863281, 215.34637451171875, 9.424247741699219, 29.424781799316406, 73.17498779296875, 259.5627746582031], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000600.npy"} +{"epoch": 0.9070294784580499, "step": 601, "batch_size": 64, "mean": 75.81163024902344, "std": 102.66317749023438, "min": -136.06175231933594, "p10": -47.376937484741205, "median": 79.89121627807617, "p90": 185.36457366943358, "max": 411.9508056640625, "pos_frac": 0.78125, "sample": [92.48970031738281, 104.75597381591797, 159.27749633789062, -50.35477828979492, 183.36074829101562, 121.68423461914062, 411.9508056640625, -95.09756469726562, 73.81785583496094, -3.8999710083007812, 228.40504455566406, -136.06175231933594, 185.34085083007812, 71.26195526123047, -1.3514347076416016, 87.73323822021484, 168.68209838867188, 191.42361450195312, -99.38803100585938, 148.64376831054688, 169.50558471679688, 11.477088928222656, 1.2995452880859375, 182.30181884765625, 200.78878784179688, 108.81477355957031, 21.781831741333008, -40.23505783081055, 98.63373565673828, 4.89520263671875, 31.872299194335938, 113.72740173339844, 80.46417236328125, -15.248359680175781, 33.17683410644531, 164.97335815429688, 0.9463424682617188, 111.9320068359375, 40.612335205078125, 60.433502197265625, -92.54127502441406, 69.66378784179688, 37.83199691772461, 185.37474060058594, 132.92974853515625, 92.30447387695312, 67.60441589355469, -40.42864227294922, -2.0832157135009766, 79.3182601928711, 4.356941223144531, 264.6552429199219, -19.903451919555664, 5.840721130371094, 103.81011962890625, 124.95484161376953, 246.69525146484375, 126.11299133300781, 177.54058837890625, 7.5294036865234375, -107.61967468261719, 132.57244873046875, 141.32667541503906, -110.72930908203125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000601.npy"} +{"epoch": 0.90854119425548, "step": 602, "batch_size": 64, "mean": 66.69522094726562, "std": 87.99596405029297, "min": -174.63375854492188, "p10": -16.494658279418942, "median": 46.73527526855469, "p90": 172.88542633056642, "max": 292.2881774902344, "pos_frac": 0.796875, "sample": [-28.731658935546875, 142.63259887695312, 149.168212890625, 292.2881774902344, 51.76634216308594, 37.737335205078125, 72.67819213867188, 7.568220138549805, 24.245956420898438, 13.745189666748047, 211.6319580078125, -31.981876373291016, 224.93748474121094, 156.281005859375, -55.947696685791016, 171.4127197265625, -174.63375854492188, 197.5518798828125, 173.51658630371094, -7.130916595458984, -17.097030639648438, -4.0736083984375, 3.1171188354492188, -15.089122772216797, 38.28431701660156, 159.30105590820312, 40.911827087402344, 114.19481658935547, 56.94148635864258, 41.49474334716797, 25.64639663696289, 128.089111328125, 74.67098236083984, 42.35574722290039, 79.39689636230469, -10.716777801513672, 5.964786529541016, 75.19120025634766, 170.19960021972656, 27.10082244873047, 29.045719146728516, 115.93899536132812, 138.31893920898438, 162.1448516845703, -1.6899337768554688, -129.81851196289062, 54.938133239746094, 49.742889404296875, 49.467491149902344, 26.47917938232422, 1.9126319885253906, -2.3555908203125, 119.21393585205078, 44.00305938720703, 16.20209503173828, 170.6343536376953, 79.1601791381836, 70.98429870605469, 229.07269287109375, 218.34580993652344, -65.75975799560547, 160.861083984375, 43.626625061035156, 23.404769897460938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000602.npy"} +{"epoch": 0.91005291005291, "step": 603, "batch_size": 64, "mean": 74.82354736328125, "std": 119.27364349365234, "min": -158.1538848876953, "p10": -57.44953880310058, "median": 70.96265029907227, "p90": 179.6397445678711, "max": 682.6943969726562, "pos_frac": 0.78125, "sample": [175.40553283691406, 165.07957458496094, 107.4766845703125, -22.539222717285156, -80.63929748535156, 65.88850402832031, 78.41312408447266, 78.13900756835938, -13.830520629882812, 49.35746765136719, 168.71890258789062, 96.08680725097656, 47.50756072998047, -73.6781234741211, -13.356986999511719, 682.6943969726562, -55.10981369018555, 63.833152770996094, 18.026458740234375, 363.2154846191406, 74.33368682861328, 79.76284790039062, 143.85720825195312, 104.29872131347656, -10.052791595458984, 81.5009765625, 133.55946350097656, -60.957366943359375, 115.60798645019531, -158.1538848876953, 29.017501831054688, 20.13623046875, 113.50355529785156, 117.48353576660156, -89.55834197998047, 113.73855590820312, 67.59161376953125, 14.955547332763672, 7.835405349731445, 184.72280883789062, 43.48069763183594, 120.8843002319336, 75.73097229003906, 38.75678253173828, 125.03960418701172, 168.0213623046875, 60.03941345214844, -31.172487258911133, 99.068359375, 27.725343704223633, 7.629638671875, 44.797576904296875, 122.47137451171875, 168.70770263671875, 74.8264389038086, -1.003692626953125, 33.43772506713867, -134.9237823486328, 181.45440673828125, 184.03489685058594, 188.8846893310547, 10.917512893676758, 254.4785614013672, -58.45227813720703], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000603.npy"} +{"epoch": 0.9115646258503401, "step": 604, "batch_size": 64, "mean": 57.27574920654297, "std": 104.24842834472656, "min": -234.5524444580078, "p10": -49.58404846191406, "median": 34.5237922668457, "p90": 188.2928436279297, "max": 341.239501953125, "pos_frac": 0.734375, "sample": [183.30023193359375, 175.91732788085938, -2.865131378173828, -16.32769775390625, 29.469615936279297, 107.28681945800781, 18.693164825439453, 55.23729705810547, 190.38966369628906, -50.05671691894531, -15.26904296875, -36.787940979003906, 1.108499526977539, 186.89859008789062, -48.48115539550781, -27.78215789794922, 143.6208953857422, 26.30195426940918, 37.459442138671875, 137.52536010742188, -180.9217529296875, -59.269683837890625, 13.75909423828125, -29.647171020507812, 71.2533950805664, 4.291238784790039, 2.7347049713134766, 23.69548797607422, 62.19395446777344, -234.5524444580078, 18.23233413696289, -67.6080551147461, 56.755577087402344, -56.454315185546875, 170.13760375976562, -21.840049743652344, 34.0667724609375, 26.74859619140625, 188.890380859375, 132.44277954101562, 81.70339965820312, 198.87232971191406, 341.239501953125, 48.653404235839844, 132.935791015625, 113.11004638671875, 26.19103240966797, 141.74473571777344, 244.6299285888672, -42.129180908203125, 269.1363525390625, 24.082229614257812, 61.558677673339844, 29.51099395751953, 99.24272155761719, 85.37016296386719, 137.50772094726562, 34.980812072753906, 4.813077926635742, 243.11810302734375, 129.501708984375, -41.87132263183594, -107.361328125, 158.55958557128906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000604.npy"} +{"epoch": 0.9130763416477702, "step": 605, "batch_size": 64, "mean": 65.7748794555664, "std": 101.00025939941406, "min": -172.80532836914062, "p10": -57.92136535644531, "median": 68.51635360717773, "p90": 189.6602783203125, "max": 320.0089416503906, "pos_frac": 0.75, "sample": [87.57196044921875, 69.91267395019531, -116.48605346679688, 306.03778076171875, 43.033363342285156, 190.80303955078125, 33.558624267578125, 126.10806274414062, 120.1189193725586, -24.5819091796875, 320.0089416503906, -62.98210144042969, 67.12003326416016, -1.8813705444335938, -20.699932098388672, -89.98323059082031, 73.54130554199219, 146.519775390625, 129.99514770507812, 77.57261657714844, -5.1269989013671875, 6.223602294921875, 132.70526123046875, -123.14736938476562, -19.99018096923828, 75.9427490234375, 164.03189086914062, 152.30206298828125, 28.19739532470703, 83.05192565917969, 114.78929901123047, 50.42518615722656, -154.4197540283203, 186.99383544921875, 31.3212890625, 128.55050659179688, 206.6039276123047, 107.21798706054688, 41.63383483886719, 116.89688110351562, 85.44602966308594, 0.4341011047363281, 133.10708618164062, 55.537506103515625, 211.89187622070312, 23.111427307128906, 180.24264526367188, 139.37802124023438, 91.65229034423828, -48.8372802734375, 214.38247680664062, 58.08036804199219, 90.95306396484375, -21.198463439941406, -61.814544677734375, 3.4220523834228516, 48.99546813964844, -172.80532836914062, 37.82261657714844, -23.15591812133789, 7.433160781860352, 267.01397705078125, -2.2133941650390625, 91.22235107421875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000605.npy"} +{"epoch": 0.9145880574452003, "step": 606, "batch_size": 64, "mean": 70.64083862304688, "std": 96.44786071777344, "min": -148.72857666015625, "p10": -42.90415420532226, "median": 73.75823974609375, "p90": 185.15776977539065, "max": 376.5887756347656, "pos_frac": 0.734375, "sample": [-22.990028381347656, 178.68202209472656, 33.00396728515625, 240.66384887695312, -10.736946105957031, 11.877166748046875, 147.64187622070312, 92.42951965332031, 11.976999282836914, 152.62767028808594, 187.93309020996094, -46.39430236816406, 104.91802978515625, 127.12615966796875, 138.28427124023438, -97.8055191040039, 87.39002990722656, 68.09979248046875, 1.6570110321044922, 86.08755493164062, 376.5887756347656, 63.824676513671875, 33.53037643432617, 135.1546630859375, -70.09542083740234, -24.37413787841797, 145.02374267578125, -34.760475158691406, 75.27857971191406, 144.59732055664062, 117.74940490722656, -1.3179931640625, -123.64655303955078, 156.0667266845703, 72.23789978027344, 30.77019691467285, 68.06080627441406, -16.48350715637207, 144.40377807617188, 85.73662567138672, 195.9380645751953, 221.14663696289062, 75.80196380615234, 111.98251342773438, 7.6099853515625, 249.5691375732422, -8.496597290039062, 58.88409423828125, -50.344459533691406, -48.6266975402832, 178.45867919921875, 3.1068115234375, -148.72857666015625, 53.817840576171875, -14.069229125976562, 112.12814331054688, 40.63494110107422, -8.184612274169922, 93.34988403320312, 237.72206115722656, 91.47013854980469, 125.94429779052734, 80.77037048339844, -9.689260482788086], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000606.npy"} +{"epoch": 0.9160997732426304, "step": 607, "batch_size": 64, "mean": 75.95558166503906, "std": 137.0189971923828, "min": -175.85049438476562, "p10": -54.905494689941406, "median": 66.19586181640625, "p90": 183.78233032226566, "max": 847.7648315429688, "pos_frac": 0.71875, "sample": [4.824851989746094, 25.06011962890625, -73.80773162841797, 61.59657287597656, 106.19682312011719, 35.49480438232422, 159.18777465820312, 176.86837768554688, -175.85049438476562, -54.269004821777344, 12.691204071044922, -49.326332092285156, 95.68698120117188, 174.32464599609375, 51.294769287109375, -77.73358154296875, -7.394645690917969, 17.6754207611084, 25.827125549316406, 277.80291748046875, 65.74037170410156, 186.74545288085938, 114.60578918457031, 132.76962280273438, 116.77198791503906, 116.51346588134766, -33.726661682128906, 105.61143493652344, -39.916015625, -18.73138427734375, 219.98220825195312, 114.73046112060547, 134.21273803710938, 99.1801986694336, -68.42684936523438, 161.17636108398438, 234.76011657714844, -11.51882553100586, 163.64991760253906, 96.55574035644531, -32.23371124267578, 40.589019775390625, 31.376008987426758, 125.71366882324219, 82.95993041992188, -17.335205078125, -55.17827606201172, 128.06578063964844, 301.3379821777344, 168.55641174316406, -0.3180351257324219, -3.8480987548828125, 113.42137908935547, 847.7648315429688, 66.65135192871094, 161.41656494140625, 5.529804229736328, 50.304443359375, 71.33099365234375, 187.5050048828125, 15.194198608398438, -136.30126953125, -74.9065170288086, 106.72381591796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000607.npy"} +{"epoch": 0.9176114890400605, "step": 608, "batch_size": 64, "mean": 63.58171844482422, "std": 106.38533020019531, "min": -158.18621826171875, "p10": -44.561937713623045, "median": 40.31443405151367, "p90": 173.65240783691408, "max": 512.3961791992188, "pos_frac": 0.734375, "sample": [113.83425903320312, -46.64808654785156, 1.8836898803710938, 163.85638427734375, -1.114084243774414, -9.61029052734375, 144.772705078125, 0.136322021484375, 46.06410598754883, -46.09947204589844, 115.92588806152344, 118.23250579833984, 159.0105743408203, 123.62913513183594, 75.58186340332031, 132.60386657714844, 207.66566467285156, -145.1539306640625, -0.2014179229736328, 18.156234741210938, 22.268386840820312, -27.38906478881836, -2.372945785522461, 272.01715087890625, 158.939697265625, -4.695743560791016, -11.111320495605469, 74.6790771484375, 44.68568420410156, 176.0879669189453, 286.68743896484375, -158.18621826171875, 151.5810546875, 74.16110229492188, 34.11909484863281, 62.074676513671875, 21.528411865234375, -115.86499786376953, -49.59910583496094, 15.142242431640625, -27.814491271972656, 68.49557495117188, 41.45597839355469, 19.284385681152344, 20.647354125976562, 123.57106018066406, -2.84173583984375, 150.41622924804688, 512.3961791992188, 115.38274383544922, 16.07080078125, 39.172889709472656, 54.787925720214844, 104.30204772949219, 6.954254150390625, 138.98707580566406, -90.89407348632812, 177.4445343017578, 30.157257080078125, -40.97435760498047, 28.216773986816406, 167.9694366455078, 33.89384460449219, 184.8697509765625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000608.npy"} +{"epoch": 0.9191232048374905, "step": 609, "batch_size": 64, "mean": 74.38362121582031, "std": 91.22006225585938, "min": -201.40948486328125, "p10": -37.090326309204094, "median": 75.53634262084961, "p90": 175.11797485351565, "max": 325.4278564453125, "pos_frac": 0.796875, "sample": [188.2097930908203, 129.09446716308594, 117.80591583251953, 20.60390853881836, -2.460357666015625, 16.322280883789062, 152.5569610595703, 104.54576110839844, -9.408187866210938, 7.318473815917969, -62.820823669433594, 80.4422607421875, -29.096298217773438, 42.42658615112305, 325.4278564453125, 137.30587768554688, 196.65695190429688, 145.749267578125, 61.8597412109375, 207.45248413085938, -68.04595184326172, 80.70402526855469, 138.763916015625, 126.36181640625, -44.141361236572266, 214.12701416015625, 165.93832397460938, 48.225990295410156, -94.7735366821289, 116.64788055419922, 271.1720275878906, -8.065919876098633, 154.8944091796875, 86.86173248291016, 27.937835693359375, 113.47518157958984, 85.21771240234375, -7.491003036499023, 167.004638671875, 36.20745849609375, 45.712730407714844, -38.93723678588867, 34.77118682861328, 176.55813598632812, 156.0073699951172, -201.40948486328125, 41.30953598022461, 51.4659423828125, 74.57041931152344, 3.9961071014404297, 68.30818176269531, 103.35926818847656, 99.33149719238281, 76.50226593017578, 17.2171688079834, 18.124046325683594, 143.28981018066406, 125.56461334228516, 171.75759887695312, -71.06110382080078, 128.91705322265625, 63.730010986328125, 63.232879638671875, -32.78086853027344], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000609.npy"} +{"epoch": 0.9206349206349206, "step": 610, "batch_size": 64, "mean": 60.46433639526367, "std": 114.9767074584961, "min": -298.7161560058594, "p10": -63.67588653564452, "median": 54.89692687988281, "p90": 195.9194793701172, "max": 362.9544372558594, "pos_frac": 0.734375, "sample": [48.38592529296875, 178.7714385986328, 144.82080078125, 182.55775451660156, 23.307472229003906, 153.4524688720703, 25.52972412109375, 14.733964920043945, -246.29071044921875, 77.79347229003906, 39.76512908935547, -78.4278793334961, 77.11589050292969, 85.62977600097656, -16.166603088378906, 56.76580810546875, 172.03683471679688, -54.89131164550781, -115.57125854492188, 2.2615280151367188, -11.159690856933594, -67.14041137695312, 7.641059875488281, 96.53701782226562, -70.58921813964844, -111.56245422363281, -26.398666381835938, 201.55099487304688, 96.10926055908203, 210.71255493164062, 22.161375045776367, 362.9544372558594, 194.47976684570312, 144.31005859375, 8.763900756835938, 156.8537139892578, 156.8652801513672, -7.884908676147461, 156.55850219726562, 12.472503662109375, 15.555572509765625, 108.24787902832031, 57.92485809326172, -298.7161560058594, 196.5364990234375, 12.490612030029297, 2.5774612426757812, 7.898448944091797, 164.63999938964844, 216.88790893554688, 71.6732177734375, 53.028045654296875, 82.45793151855469, -3.119752883911133, -6.758615493774414, -27.59795379638672, -55.59199523925781, 129.19882202148438, 335.40087890625, 104.88671875, 108.97032165527344, -3.4694061279296875, 205.76983642578125, 86.01089477539062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000610.npy"} +{"epoch": 0.9221466364323507, "step": 611, "batch_size": 64, "mean": 50.348487854003906, "std": 88.44739532470703, "min": -173.23414611816406, "p10": -62.59696578979492, "median": 47.5870246887207, "p90": 165.7832000732422, "max": 280.0301818847656, "pos_frac": 0.734375, "sample": [88.61434936523438, 19.391992568969727, 75.06381225585938, 73.92021179199219, 211.58682250976562, 7.420036315917969, -21.232879638671875, 37.34626770019531, -25.822307586669922, 42.70069885253906, -33.63412094116211, 150.89984130859375, 74.70841979980469, 114.87205505371094, 0.01793670654296875, -24.351333618164062, -61.40537643432617, 174.2555694580078, -89.18710327148438, 280.0301818847656, 82.14129638671875, 71.43975067138672, 99.81753540039062, -63.10764694213867, -79.95494079589844, -8.271629333496094, 157.0745849609375, 57.09400177001953, 47.42877960205078, 179.12387084960938, 29.967018127441406, -83.86307525634766, -27.088180541992188, 189.69989013671875, -25.2562255859375, 33.173583984375, -173.23414611816406, 133.8401641845703, 79.43340301513672, 39.805633544921875, 266.21197509765625, 54.03627014160156, -127.37276458740234, 111.13969421386719, 51.174171447753906, 42.123741149902344, 63.913482666015625, 47.745269775390625, 98.31980895996094, 120.23663330078125, 64.14830017089844, 161.94573974609375, 26.742414474487305, 61.84973907470703, 8.01141357421875, 42.780914306640625, 167.42782592773438, -86.1082534790039, 126.66666412353516, 31.548477172851562, -32.91584014892578, 26.152626037597656, -12.015523910522461, 74.08172607421875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000611.npy"} +{"epoch": 0.9236583522297808, "step": 612, "batch_size": 64, "mean": 76.49797821044922, "std": 102.94752502441406, "min": -260.2772216796875, "p10": -24.776315879821777, "median": 65.51111602783203, "p90": 218.5002395629883, "max": 311.9088134765625, "pos_frac": 0.8125, "sample": [-1.6673736572265625, -97.85693359375, 53.903167724609375, 10.535072326660156, 59.58247375488281, 14.448112487792969, 133.6482391357422, 137.78097534179688, -10.50213623046875, 18.696487426757812, 111.53112030029297, 46.33271789550781, 47.554222106933594, 70.62315368652344, 22.406272888183594, 60.399078369140625, 74.64042663574219, 14.203147888183594, 71.20011901855469, 112.38127899169922, 126.65939331054688, -62.215599060058594, 129.99032592773438, 8.056259155273438, 50.527740478515625, 40.90107345581055, 311.9088134765625, -25.153812408447266, -43.204647064208984, 27.62108612060547, 70.66983795166016, 221.5976104736328, 1.2661170959472656, -4.595159530639648, -16.072044372558594, 211.27304077148438, 72.09845733642578, 198.34109497070312, 273.71624755859375, -97.99473571777344, 254.53172302246094, 59.89542770385742, 186.6830596923828, -61.61436462402344, 246.73834228515625, 163.4748077392578, 123.50811767578125, -23.895490646362305, 157.6581573486328, 289.45135498046875, 32.519752502441406, 2.0981826782226562, 284.1810607910156, 93.72959899902344, 39.34147644042969, 107.11703491210938, 117.41378021240234, 26.473413467407227, 133.00819396972656, -260.2772216796875, 122.48753356933594, 114.3125228881836, 124.0602035522461, 117.74317932128906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000612.npy"} +{"epoch": 0.9251700680272109, "step": 613, "batch_size": 64, "mean": 80.26461029052734, "std": 115.38871765136719, "min": -252.47531127929688, "p10": -28.14633560180664, "median": 61.33583068847656, "p90": 232.38503723144532, "max": 457.3052673339844, "pos_frac": 0.765625, "sample": [-25.285354614257812, 35.81378173828125, -16.768020629882812, 58.58274841308594, 114.50774383544922, 92.30146026611328, 132.56314086914062, 67.6751708984375, -47.73761749267578, 0.33354759216308594, 55.682395935058594, 56.30086135864258, -150.86663818359375, 122.47821044921875, 139.26528930664062, 299.93829345703125, -13.713571548461914, 47.95502471923828, 278.6152038574219, 96.43897247314453, 24.169334411621094, -60.258270263671875, -252.47531127929688, 16.34245491027832, 176.70346069335938, 89.0823745727539, 128.270263671875, 39.19709777832031, -28.67646026611328, 57.98139190673828, 229.11965942382812, 68.30810546875, 120.8736572265625, -2.544475555419922, -17.05970001220703, 64.08891296386719, -31.697242736816406, 457.3052673339844, 85.80709075927734, 315.5618896484375, 160.86056518554688, 154.37667846679688, 30.73719024658203, 57.2567138671875, 264.7203369140625, 21.229232788085938, 233.78448486328125, 37.60173034667969, -26.909378051757812, 196.1157989501953, -14.839553833007812, -17.15472412109375, 108.62864685058594, 263.5308837890625, 102.73136901855469, 112.3696517944336, 24.50851821899414, 187.15628051757812, 96.41889953613281, 224.54913330078125, 17.07854461669922, 137.1316375732422, 26.478904724121094, -85.606689453125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000613.npy"} +{"epoch": 0.926681783824641, "step": 614, "batch_size": 64, "mean": 61.57132339477539, "std": 97.13410186767578, "min": -190.99697875976562, "p10": -26.409179878234863, "median": 61.72587776184082, "p90": 161.88509979248053, "max": 392.7474365234375, "pos_frac": 0.765625, "sample": [132.3326416015625, 89.83695220947266, 30.04123306274414, 145.7135009765625, 62.56780242919922, 106.64031219482422, 5.907619476318359, -0.8245315551757812, 42.95600509643555, 46.93288040161133, 146.4928436279297, 83.63243103027344, 392.7474365234375, 184.3878631591797, 207.38467407226562, 135.1487579345703, 95.73649597167969, 92.45673370361328, 113.54967498779297, 144.89854431152344, -112.83155059814453, 4.4269866943359375, 14.267105102539062, -190.99697875976562, 229.5574951171875, -16.02043914794922, -24.52655601501465, -17.980499267578125, 144.1444091796875, -27.216018676757812, -80.88134002685547, -7.690212249755859, -5.901256561279297, 138.63369750976562, 43.83430862426758, 36.71129608154297, 111.43833923339844, 111.17446899414062, -159.8717803955078, 143.7811279296875, -4.950431823730469, 168.48178100585938, 93.01007843017578, 126.46485137939453, 70.87018585205078, 115.19683837890625, 32.061317443847656, 24.205810546875, 118.30767822265625, 39.99497985839844, 33.01432418823242, 53.441261291503906, -174.2292938232422, -24.114707946777344, 12.49283218383789, 170.56985473632812, 106.72682189941406, 0.5401382446289062, 135.9610137939453, 5.9078216552734375, 61.50396728515625, 61.94778823852539, -56.64299011230469, 177.21026611328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000614.npy"} +{"epoch": 0.9281934996220711, "step": 615, "batch_size": 64, "mean": 79.20098114013672, "std": 132.73484802246094, "min": -238.17788696289062, "p10": -76.62662506103514, "median": 69.63699340820312, "p90": 200.93410644531255, "max": 632.96630859375, "pos_frac": 0.78125, "sample": [107.29305267333984, 107.19584655761719, 171.8214874267578, 63.55162048339844, 49.3531494140625, 171.17062377929688, 83.93338775634766, -143.03712463378906, 163.64122009277344, 60.28620147705078, -88.18444061279297, 55.388328552246094, -108.87863159179688, 95.78535461425781, 65.64862060546875, -63.97773742675781, 105.29219818115234, 254.422119140625, 175.27606201171875, -4.9557647705078125, 172.85256958007812, -41.275794982910156, 175.57940673828125, -3.8830337524414062, 176.11260986328125, 11.401611328125, -82.04757690429688, 76.72465515136719, 9.61798095703125, 207.30506896972656, 184.07962036132812, 30.161100387573242, 21.04973602294922, 19.046100616455078, 79.61432647705078, 163.05096435546875, 632.96630859375, 8.257095336914062, 19.183792114257812, 184.15530395507812, 232.20394897460938, 260.69171142578125, 167.12986755371094, -152.06529235839844, 181.90260314941406, 54.66084671020508, 3.2204360961914062, -5.22491455078125, 73.6253662109375, 186.0685272216797, 35.45819091796875, 338.0362548828125, -29.474281311035156, 96.37043762207031, -22.1993408203125, 252.8868408203125, 54.11769104003906, 98.01333618164062, 185.78465270996094, 14.234634399414062, 76.01677703857422, 24.054641723632812, -183.44992065429688, -238.17788696289062], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000615.npy"} +{"epoch": 0.9297052154195011, "step": 616, "batch_size": 64, "mean": 64.75700378417969, "std": 99.22488403320312, "min": -138.34727478027344, "p10": -43.268117713928206, "median": 41.32134437561035, "p90": 192.69707336425782, "max": 312.7535095214844, "pos_frac": 0.703125, "sample": [192.20742797851562, 26.892501831054688, 158.43035888671875, -13.401107788085938, 146.34730529785156, -2.404294967651367, 271.7016906738281, 24.293624877929688, 7.362079620361328, 78.0927963256836, 34.77465057373047, -7.730690002441406, 40.46959686279297, -138.34727478027344, 46.650936126708984, -60.734947204589844, 163.12734985351562, -27.06467056274414, -13.467369079589844, 274.1503601074219, 64.58220672607422, 211.84056091308594, -49.86614990234375, -14.81602668762207, 192.90692138671875, -5.67009162902832, -27.872709274291992, -16.432518005371094, 3.863187789916992, 164.43055725097656, 91.97293090820312, 23.873855590820312, 74.49412536621094, 149.0740203857422, 244.4615936279297, 133.4838409423828, -52.024593353271484, 42.173091888427734, 25.9339599609375, 156.30166625976562, 130.28286743164062, 60.91938400268555, 65.85739135742188, 75.40400695800781, 171.80859375, 312.7535095214844, -25.914968490600586, 30.79239273071289, -103.17559814453125, 17.363067626953125, -11.142505645751953, -93.06240844726562, 49.200172424316406, 68.65586853027344, 39.65563201904297, 179.26956176757812, 36.082481384277344, 5.210544586181641, 117.94638061523438, -11.658035278320312, 140.40869140625, 286.0518798828125, 42.182830810546875, -54.50426483154297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000616.npy"} +{"epoch": 0.9312169312169312, "step": 617, "batch_size": 64, "mean": 57.11562728881836, "std": 83.00542449951172, "min": -129.71343994140625, "p10": -29.919087028503412, "median": 41.419822692871094, "p90": 181.10662231445315, "max": 241.1456298828125, "pos_frac": 0.8125, "sample": [59.20256805419922, 8.580074310302734, -8.18243408203125, 96.13953399658203, 218.6320037841797, 70.76316833496094, 150.47589111328125, 176.78536987304688, 19.88692855834961, -55.61646270751953, 42.67726135253906, 33.164878845214844, -26.070175170898438, 54.944427490234375, 11.507183074951172, 101.66207885742188, 7.151435852050781, 182.95858764648438, 19.980499267578125, 25.84642791748047, 235.83847045898438, 8.05007553100586, 5.404888153076172, 186.85604858398438, 66.98214721679688, 6.8979644775390625, -14.853824615478516, 114.19384765625, 75.62980651855469, 57.74663162231445, 60.67667770385742, -10.601634979248047, 20.551544189453125, 190.00936889648438, 76.789306640625, 241.1456298828125, 40.45874786376953, -39.31452178955078, -82.39984893798828, 131.42352294921875, 133.31005859375, 33.55558776855469, -31.568620681762695, 4.173427581787109, 198.90390014648438, 171.10719299316406, -110.34037017822266, -129.71343994140625, -125.49433898925781, 37.05595397949219, 100.67745971679688, 128.85267639160156, 111.51200866699219, 17.041357040405273, 157.35984802246094, 93.95378112792969, 42.380897521972656, 22.518653869628906, 71.88150024414062, 27.709075927734375, 33.173858642578125, 33.890769958496094, 85.30484008789062, -13.820098876953125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000617.npy"} +{"epoch": 0.9327286470143613, "step": 618, "batch_size": 64, "mean": 69.04601287841797, "std": 92.1083755493164, "min": -125.10502624511719, "p10": -27.239991760253904, "median": 52.1192626953125, "p90": 186.9562942504883, "max": 320.3938293457031, "pos_frac": 0.828125, "sample": [254.8600311279297, 185.9512939453125, 72.21029663085938, 86.48306274414062, 1.4895706176757812, -18.079721450805664, -28.229358673095703, 75.64458465576172, 236.90618896484375, -5.368885040283203, 40.613555908203125, 218.576171875, 135.58642578125, 50.4378662109375, 179.94044494628906, 15.149528503417969, 22.12004852294922, 24.908004760742188, 320.3938293457031, 102.63681030273438, 16.41858673095703, 281.4906005859375, 2.5947513580322266, 59.163368225097656, 13.264442443847656, 209.40213012695312, 17.449888229370117, 76.81006622314453, 93.35283660888672, 56.57267761230469, 4.5124664306640625, 6.228599548339844, 184.81640625, 47.01295852661133, 92.54182434082031, 29.80224609375, 79.99230194091797, -58.005638122558594, 128.74484252929688, 54.88805389404297, -66.87080383300781, 0.7625007629394531, -24.931468963623047, -110.8056640625, 101.58451080322266, -50.27799987792969, 39.133583068847656, -125.10502624511719, 125.57820892333984, 156.68212890625, 27.67066192626953, 134.76072692871094, 39.730262756347656, 4.617805480957031, 26.993560791015625, 33.28558349609375, -3.5339813232421875, 119.50609588623047, 116.46343994140625, 166.73394775390625, 187.3870086669922, 164.01895141601562, -67.52322387695312, 53.8006591796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000618.npy"} +{"epoch": 0.9342403628117913, "step": 619, "batch_size": 64, "mean": 69.18682861328125, "std": 89.01602172851562, "min": -136.76217651367188, "p10": -35.068852996826166, "median": 60.539297103881836, "p90": 185.53916015625003, "max": 251.62753295898438, "pos_frac": 0.765625, "sample": [118.70137023925781, -38.16065979003906, 5.080190658569336, 35.14703369140625, 178.74752807617188, 162.3966522216797, 188.44985961914062, -27.854637145996094, -95.83241271972656, 213.1968231201172, 157.60458374023438, 39.209590911865234, 178.58998107910156, 11.756444931030273, 92.90043640136719, -11.327484130859375, 70.83861541748047, 117.675048828125, -10.50541877746582, 6.6512451171875, -79.74242401123047, 251.62753295898438, 22.959091186523438, -43.41725158691406, -9.558549880981445, 177.76443481445312, 154.99546813964844, 123.62345886230469, 8.814695358276367, -13.0294189453125, 15.297569274902344, -136.76217651367188, -48.963897705078125, 193.06021118164062, 143.84619140625, 147.38925170898438, 25.825260162353516, 174.40089416503906, 216.28802490234375, -47.28120422363281, 81.31927490234375, 211.36773681640625, 69.34822082519531, -6.703325271606445, 165.0778350830078, 107.13288879394531, 86.26020812988281, 117.85462188720703, 152.5464324951172, 63.69852828979492, 23.00090789794922, 62.11576461791992, 58.96282958984375, 37.28565979003906, 237.8702392578125, 4.517402648925781, 58.203033447265625, 74.55266571044922, -13.499799728393555, 55.04176330566406, 1.118408203125, 119.84986114501953, 15.691177368164062, -25.056941986083984], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000619.npy"} +{"epoch": 0.9357520786092215, "step": 620, "batch_size": 64, "mean": 55.15177536010742, "std": 97.01700592041016, "min": -129.75762939453125, "p10": -61.31722259521484, "median": 27.151524543762207, "p90": 197.98210601806645, "max": 274.4456481933594, "pos_frac": 0.703125, "sample": [10.495063781738281, -88.98724365234375, -49.45585632324219, 144.36856079101562, -109.37957000732422, 14.018516540527344, 3.8767547607421875, 206.4858856201172, -13.030311584472656, 64.8868637084961, 191.1838836669922, 140.9325408935547, 72.08233642578125, 44.966522216796875, -1.1234359741210938, 274.4456481933594, 78.04705810546875, 16.334121704101562, -41.964324951171875, -1.55499267578125, -57.1953125, 20.366024017333984, -125.9468994140625, 255.54946899414062, -88.03848266601562, 148.58580017089844, -22.943077087402344, 10.321807861328125, 248.88180541992188, -3.5853500366210938, 116.63943481445312, 156.241455078125, -18.856464385986328, -9.376798629760742, 18.379932403564453, 65.786376953125, -8.779186248779297, 115.32209777832031, 163.73989868164062, 8.561935424804688, 140.63934326171875, 16.073532104492188, 87.97123718261719, 30.165674209594727, 11.01138687133789, -63.08375549316406, 226.65003967285156, 200.8956298828125, 3.434816360473633, 124.2624740600586, 1.5764751434326172, 222.8447723388672, -129.75762939453125, -11.162687301635742, -79.66920471191406, 40.77696990966797, 128.7596893310547, 24.137374877929688, 122.54042053222656, 117.23880004882812, 130.03765869140625, 97.05652618408203, 104.37968444824219, 32.652137756347656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000620.npy"} +{"epoch": 0.9372637944066515, "step": 621, "batch_size": 64, "mean": 67.53730010986328, "std": 116.70369720458984, "min": -165.9478759765625, "p10": -51.985921859741204, "median": 62.96242141723633, "p90": 181.85428161621095, "max": 581.1328125, "pos_frac": 0.734375, "sample": [-14.474647521972656, 38.19853210449219, 19.02825164794922, -37.05500411987305, -46.65362548828125, -165.9478759765625, 72.96771240234375, 30.589859008789062, -78.51445770263672, 25.4207706451416, 92.61392211914062, 183.19798278808594, 141.97427368164062, 17.376981735229492, 178.71897888183594, 45.03857421875, 61.42047882080078, 106.25283813476562, 72.5592041015625, 160.7396240234375, 64.50436401367188, 29.84613037109375, 133.86512756347656, -89.6209945678711, 223.09219360351562, 154.87347412109375, 119.09664916992188, -100.45100402832031, 371.0716552734375, -68.739501953125, 581.1328125, 75.90420532226562, 97.25283813476562, 196.46319580078125, 110.29069519042969, -3.3026885986328125, 83.38131713867188, 0.2766742706298828, 78.53819274902344, -17.581146240234375, -53.550140380859375, 78.06389617919922, 137.2169952392578, 69.35714721679688, 14.965818405151367, -3.195636749267578, 57.11448669433594, -21.0018310546875, -27.670944213867188, 44.375423431396484, 3.758390426635742, 115.6975326538086, 135.6770477294922, 20.474863052368164, 87.87714385986328, -48.33607864379883, -33.331321716308594, 277.0078125, -122.86699676513672, 87.07991027832031, 84.81768798828125, 50.73719787597656, 167.61439514160156, 257.157958984375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000621.npy"} +{"epoch": 0.9387755102040817, "step": 622, "batch_size": 64, "mean": 58.1452751159668, "std": 93.17922973632812, "min": -137.79421997070312, "p10": -61.24952545166013, "median": 55.7786750793457, "p90": 166.07218322753909, "max": 325.22772216796875, "pos_frac": 0.703125, "sample": [129.87850952148438, 33.40886306762695, 51.67137908935547, -118.17799377441406, 16.009422302246094, 57.529808044433594, -39.1416015625, 69.74344635009766, -70.72434997558594, -17.936325073242188, 227.6094207763672, 87.49407958984375, 3.298307418823242, -78.5904541015625, 147.85560607910156, 46.042259216308594, 118.90341186523438, 146.19554138183594, 69.60877990722656, 133.82725524902344, 104.6246337890625, 169.38848876953125, -14.672582626342773, 0.5283222198486328, 231.53195190429688, 57.907020568847656, 57.04994201660156, 77.5752182006836, -1.7806510925292969, 65.72789001464844, 225.52931213378906, -118.60299682617188, 71.67863464355469, -27.272624969482422, 16.70177459716797, -7.105886459350586, 145.93594360351562, 124.50896453857422, 67.10891723632812, -8.745567321777344, 158.33413696289062, 54.507408142089844, 123.59036254882812, -27.88191032409668, 80.41339111328125, 155.09988403320312, 325.22772216796875, -137.79421997070312, 14.333789825439453, 36.35204315185547, 195.93350219726562, 32.526039123535156, -0.5803909301757812, -74.2237548828125, -3.069643020629883, -35.99200439453125, 222.69503784179688, 47.733238220214844, -78.78886413574219, 32.15361785888672, 123.20903015136719, -7.550241470336914, 152.98446655273438, 79.96298217773438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000622.npy"} +{"epoch": 0.9402872260015117, "step": 623, "batch_size": 64, "mean": 92.52479553222656, "std": 145.04217529296875, "min": -135.94696044921875, "p10": -57.32406158447264, "median": 69.5223503112793, "p90": 253.3841857910157, "max": 669.1268920898438, "pos_frac": 0.765625, "sample": [86.76752471923828, 69.4604721069336, -20.228595733642578, -93.53623962402344, 71.41329956054688, 232.93917846679688, 89.66461181640625, 147.23507690429688, 105.44775390625, 68.24105072021484, -94.4820785522461, 46.712608337402344, 157.538330078125, 669.1268920898438, 20.524734497070312, 262.45843505859375, -32.58012771606445, 157.3478240966797, 135.77685546875, 72.10926055908203, 58.45112609863281, 58.83789825439453, 177.61776733398438, -37.741920471191406, -77.69359588623047, 99.61820983886719, 85.17271423339844, 159.50308227539062, -135.94696044921875, 69.584228515625, 135.2666015625, 465.5840759277344, 91.12388610839844, 262.1463317871094, -16.730133056640625, -111.61841583251953, 153.79225158691406, 21.1534423828125, 127.57125091552734, 134.48291015625, 187.18936157226562, 196.88058471679688, 53.113765716552734, 20.382261276245117, -8.579826354980469, -21.07551383972168, -65.7164077758789, 26.226255416870117, 19.508907318115234, 130.65106201171875, 357.73785400390625, 392.7366638183594, 48.33985900878906, 501.29425048828125, 17.812374114990234, 31.62744903564453, 6.6916351318359375, 5.967979431152344, 98.64454650878906, -0.3417987823486328, 117.57537841796875, -10.460151672363281, 34.235626220703125, -90.96712493896484], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000623.npy"} +{"epoch": 0.9417989417989417, "step": 624, "batch_size": 64, "mean": 70.87196350097656, "std": 97.61311340332031, "min": -181.25030517578125, "p10": -35.03183097839356, "median": 66.16376113891602, "p90": 181.78172760009767, "max": 376.2393798828125, "pos_frac": 0.71875, "sample": [-39.6761474609375, -23.14630889892578, 161.184326171875, 125.84378051757812, 173.89808654785156, 162.61915588378906, -7.864593505859375, 79.19993591308594, -51.99842834472656, -14.047121047973633, 75.6753921508789, 49.33995819091797, 117.49676513671875, 83.36756134033203, 158.46627807617188, 124.03152465820312, 48.30931854248047, 178.50839233398438, 199.76358032226562, 16.870582580566406, -11.667011260986328, -35.03932571411133, -35.01434326171875, 164.52688598632812, -14.445262908935547, 137.3631591796875, 28.63629913330078, -30.80315399169922, 13.260589599609375, 20.125717163085938, 187.10128784179688, 240.10098266601562, 115.16812133789062, 251.104248046875, 21.744998931884766, -36.90939712524414, 183.18458557128906, 157.8826141357422, 376.2393798828125, 117.73287200927734, 80.0429916381836, 139.3795166015625, 61.68321990966797, 45.4990234375, -37.38203430175781, 70.64430236816406, 233.10093688964844, -15.550811767578125, -22.164148330688477, 133.99703979492188, 98.00106811523438, 14.572355270385742, 27.35993194580078, 11.491327285766602, -181.25030517578125, 85.0007095336914, 26.199844360351562, -5.782875061035156, 174.46585083007812, -11.426555633544922, -112.3884506225586, 1.2327651977539062, 77.89491271972656, 173.04977416992188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000624.npy"} +{"epoch": 0.9433106575963719, "step": 625, "batch_size": 64, "mean": 52.47166061401367, "std": 95.29200744628906, "min": -142.564697265625, "p10": -52.20431632995606, "median": 41.95823669433594, "p90": 197.78567199707032, "max": 249.58468627929688, "pos_frac": 0.703125, "sample": [57.75892639160156, 164.9920654296875, 35.343292236328125, -18.090911865234375, -6.115560531616211, 114.64682006835938, 59.875892639160156, 45.029640197753906, 6.202802658081055, 210.48138427734375, 83.50387573242188, -52.18301010131836, 148.60594177246094, -22.83975601196289, 226.80593872070312, 25.201171875, 237.91917419433594, 11.692855834960938, -40.301815032958984, 18.519691467285156, 68.78901672363281, 142.73301696777344, 41.057762145996094, -78.22482299804688, -11.917648315429688, 105.38534545898438, 73.4908447265625, 57.618324279785156, 44.236698150634766, 24.21343994140625, -11.093372344970703, 186.9901580810547, 198.96759033203125, -24.36292266845703, 112.76528930664062, -10.216785430908203, -29.72483253479004, 245.16143798828125, 28.13404083251953, -4.0108489990234375, 30.128662109375, 249.58468627929688, 219.53933715820312, 97.97460174560547, 74.8045883178711, -40.56736755371094, -142.564697265625, -116.80691528320312, -94.8238525390625, 105.47898864746094, 3.878570556640625, -102.40240478515625, -137.22544860839844, 195.02786254882812, 97.83540344238281, 1.8072891235351562, 56.43589782714844, 42.85871124267578, 92.34973907470703, -52.21344757080078, 33.94664001464844, 67.24290466308594, 31.374547958374023, 177.48178100585938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000625.npy"} +{"epoch": 0.9448223733938019, "step": 626, "batch_size": 64, "mean": 84.34346008300781, "std": 133.25473022460938, "min": -159.18109130859375, "p10": -60.47240219116211, "median": 73.82048034667969, "p90": 242.22406463623054, "max": 617.3946533203125, "pos_frac": 0.734375, "sample": [174.50125122070312, 18.028366088867188, 162.24635314941406, -61.23982238769531, 617.3946533203125, 185.56809997558594, 304.42193603515625, -57.984527587890625, 96.46373748779297, 172.79833984375, -46.6973876953125, 13.434539794921875, 43.906822204589844, 0.5207328796386719, -43.307533264160156, 171.24119567871094, 101.31786346435547, -94.26716613769531, 263.5144348144531, 25.4551944732666, -125.51055145263672, 203.30929565429688, 110.27526092529297, 168.52671813964844, 27.747440338134766, -13.812894821166992, 115.2578125, 248.90704345703125, -42.02656555175781, 382.4352722167969, 77.35344696044922, -22.798383712768555, 176.14511108398438, 148.83277893066406, 73.79548645019531, -159.18109130859375, -15.203765869140625, 57.41474914550781, -7.836660385131836, 94.81610870361328, 26.846031188964844, 38.902122497558594, 256.2867736816406, 33.316532135009766, -71.89300537109375, 35.499900817871094, 54.141448974609375, 157.28562927246094, 84.48324584960938, 120.13064575195312, 174.3652801513672, 73.84547424316406, -135.26255798339844, 93.0560302734375, -82.44439697265625, 182.48419189453125, 17.372692108154297, 226.6304473876953, 144.84417724609375, 137.01136779785156, 317.74755859375, -44.51255798339844, 70.76258087158203, -58.68175506591797], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000626.npy"} +{"epoch": 0.9463340891912321, "step": 627, "batch_size": 64, "mean": 76.21550750732422, "std": 103.6563949584961, "min": -178.99124145507812, "p10": -17.28871154785156, "median": 59.97880935668945, "p90": 204.8919555664063, "max": 410.86175537109375, "pos_frac": 0.75, "sample": [54.5710334777832, 22.595516204833984, 54.61842346191406, 115.46795654296875, 61.783912658691406, 84.49595642089844, 159.08782958984375, 249.84706115722656, 410.86175537109375, 95.4754638671875, -7.0943450927734375, 208.77880859375, 195.8226318359375, 140.8486785888672, 28.13947868347168, 4.647705078125, 21.23779296875, -1.5353927612304688, 230.19894409179688, 80.37661743164062, 166.91175842285156, 123.77130889892578, 35.525604248046875, 103.54345703125, 14.400833129882812, -76.06689453125, 186.73744201660156, -79.77351379394531, 151.10723876953125, -2.8766136169433594, -4.468046188354492, 216.1227569580078, 103.34814453125, -6.910408020019531, 9.80487060546875, -15.372940063476562, -14.717811584472656, 39.874542236328125, -46.32020568847656, 180.7454376220703, 289.736083984375, 65.94922637939453, -105.50407409667969, -1.3766860961914062, 49.01837921142578, -18.109756469726562, -9.997222900390625, 140.28732299804688, 58.1737060546875, 159.4808807373047, 74.84835052490234, -142.44833374023438, 17.884601593017578, 105.45248413085938, 169.616943359375, 140.727294921875, 25.67156219482422, -178.99124145507812, 56.095001220703125, 54.01929473876953, 134.68646240234375, 154.31683349609375, 122.67076873779297, 219.97198486328125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000627.npy"} +{"epoch": 0.9478458049886621, "step": 628, "batch_size": 64, "mean": 78.78556823730469, "std": 117.9500503540039, "min": -328.76220703125, "p10": -63.94775543212889, "median": 88.80618667602539, "p90": 210.35554809570314, "max": 280.0077209472656, "pos_frac": 0.78125, "sample": [213.37640380859375, -20.570068359375, 97.08485412597656, 137.05186462402344, -68.85879516601562, -166.27005004882812, 223.902099609375, -11.018768310546875, 157.60256958007812, 182.83358764648438, 217.662109375, 280.0077209472656, 108.92733764648438, 75.94308471679688, 27.13431739807129, 145.71017456054688, -328.76220703125, -177.0576934814453, 257.21942138671875, 38.38666915893555, 187.84388732910156, 79.80479431152344, 15.434045791625977, -86.92324829101562, 148.08119201660156, 182.30010986328125, 27.304290771484375, 118.60455322265625, 65.99266052246094, 259.9229431152344, 173.41583251953125, 203.306884765625, 20.20782470703125, 195.80001831054688, 16.04574203491211, 166.95263671875, -17.28253173828125, 6.06689453125, 168.00262451171875, -52.48866271972656, 150.03573608398438, 91.4231948852539, 67.98857116699219, 155.4436798095703, 99.33458709716797, 29.673965454101562, 188.98782348632812, 86.18917846679688, 145.69561767578125, 183.8406219482422, 37.849815368652344, 77.14070129394531, 18.190444946289062, -162.87432861328125, 132.94140625, 241.37156677246094, 12.230987548828125, 158.0910186767578, -28.338851928710938, 56.377288818359375, -33.84569549560547, 152.89349365234375, -4.788173675537109, -82.27304077148438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000628.npy"} +{"epoch": 0.9493575207860923, "step": 629, "batch_size": 64, "mean": 60.58314514160156, "std": 80.91217803955078, "min": -90.28157043457031, "p10": -25.490182495117185, "median": 39.25373458862305, "p90": 195.9436920166016, "max": 235.22250366210938, "pos_frac": 0.734375, "sample": [-17.773887634277344, 96.51371002197266, -86.62059020996094, 85.81562042236328, 64.59567260742188, 65.97359466552734, 17.92755126953125, 76.7635269165039, -90.28157043457031, 78.65135192871094, 18.411514282226562, 43.24048614501953, 15.807870864868164, 210.7165069580078, -6.673269271850586, -29.408008575439453, 34.534278869628906, 9.118217468261719, 38.560211181640625, 89.41665649414062, 10.14990234375, -8.680686950683594, -14.522272109985352, 36.13357925415039, -39.47665786743164, -3.5825881958007812, 210.5779571533203, 200.01309204101562, -2.4895687103271484, 171.57431030273438, 163.8870849609375, -25.91783905029297, 63.165283203125, 79.38066864013672, 35.43130874633789, 137.4102783203125, 71.75566101074219, 24.9322509765625, 3.547229766845703, -5.905427932739258, -32.485870361328125, 41.91183090209961, 235.22250366210938, 16.635448455810547, -3.5362777709960938, -2.7211761474609375, 39.94725799560547, 121.79708862304688, 121.61331176757812, 162.10043334960938, 34.56974792480469, 79.96488952636719, 217.20068359375, 21.97044563293457, 105.6077880859375, 232.5165252685547, -24.49231719970703, 200.64395141601562, 162.16790771484375, 186.44842529296875, 3.28509521484375, 134.93739318847656, -47.33357238769531, 46.676856994628906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000629.npy"} +{"epoch": 0.9508692365835223, "step": 630, "batch_size": 64, "mean": 71.98338317871094, "std": 93.5694808959961, "min": -153.36328125, "p10": -44.79811096191406, "median": 73.0760498046875, "p90": 181.48088378906255, "max": 326.7082214355469, "pos_frac": 0.78125, "sample": [-84.54345703125, 129.3370361328125, 6.642398834228516, 17.093917846679688, 163.34222412109375, 142.4954376220703, 91.2483139038086, 11.754413604736328, 244.79483032226562, 172.36669921875, -8.583972930908203, 158.96322631835938, -42.642601013183594, 172.3448486328125, 6.124809265136719, 137.35302734375, -74.62268829345703, 29.78331756591797, 110.0672378540039, -153.36328125, 48.57550048828125, -0.6650619506835938, 83.68811798095703, 169.09996032714844, 188.66903686523438, 145.60272216796875, -45.721900939941406, 73.74714660644531, -105.4908218383789, 71.42853546142578, -0.7544937133789062, 23.120712280273438, 4.954700469970703, 233.57191467285156, 151.46392822265625, 0.231964111328125, 200.49676513671875, 25.313552856445312, 116.24375915527344, 120.53606414794922, 326.7082214355469, 72.40495300292969, 153.50453186035156, -84.74504089355469, 185.386962890625, 94.20865631103516, 149.5133056640625, 30.033477783203125, 84.54772186279297, -25.834869384765625, 51.80768585205078, -50.556549072265625, -20.124828338623047, 192.032470703125, 126.90733337402344, 23.799100875854492, 82.93087005615234, 21.74863052368164, 120.36489868164062, 169.97921752929688, -3.3071441650390625, 94.88897705078125, 53.01891326904297, 23.651500701904297], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000630.npy"} +{"epoch": 0.9523809523809523, "step": 631, "batch_size": 64, "mean": 98.374267578125, "std": 137.50881958007812, "min": -146.29104614257812, "p10": -29.57404327392578, "median": 68.80080032348633, "p90": 231.22402343750002, "max": 749.25830078125, "pos_frac": 0.734375, "sample": [165.8609161376953, 104.33598327636719, -24.385528564453125, 218.47866821289062, 116.54588317871094, 164.27230834960938, 226.60006713867188, 115.52214813232422, 44.862789154052734, -20.722625732421875, 69.1065673828125, -49.55949020385742, 115.07136535644531, -6.3664398193359375, 11.428470611572266, 51.737953186035156, -8.207000732421875, 26.328060150146484, 54.28478240966797, 258.252197265625, 217.8096466064453, -48.98924255371094, 749.25830078125, 203.98793029785156, 201.12576293945312, 67.30682373046875, 19.41064453125, 233.20571899414062, -6.324483871459961, 86.390869140625, 36.81296920776367, 68.07735443115234, -146.29104614257812, 59.24231719970703, 92.73493957519531, 195.11354064941406, 161.06201171875, -8.78277587890625, 382.857421875, 166.65406799316406, 68.49503326416016, -66.427001953125, -51.89900207519531, 123.56509399414062, -14.554962158203125, -140.97891235351562, 21.027482986450195, 31.166032791137695, -28.68408966064453, 296.9610595703125, -29.95545196533203, 148.52593994140625, -26.24920654296875, 219.27731323242188, -21.707836151123047, 180.05667114257812, 30.509624481201172, 317.30987548828125, 75.51608276367188, 208.06781005859375, 99.75729370117188, 234.1527862548828, 49.92270278930664, 207.9891357421875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000631.npy"} +{"epoch": 0.9538926681783825, "step": 632, "batch_size": 64, "mean": 79.82603454589844, "std": 94.74152374267578, "min": -146.39505004882812, "p10": -42.89292221069336, "median": 73.49394226074219, "p90": 191.59730529785156, "max": 274.4637145996094, "pos_frac": 0.796875, "sample": [99.14020538330078, 192.33795166015625, 71.61868286132812, 25.061542510986328, 113.86666870117188, 187.93124389648438, -2.7311439514160156, 90.46392059326172, 37.5206184387207, 186.35739135742188, 70.3516845703125, -126.87063598632812, 50.0267333984375, -1.9134101867675781, -60.098175048828125, 197.60694885253906, 187.75100708007812, 15.517662048339844, 163.7573699951172, -72.63680267333984, 92.59432983398438, 8.03708267211914, -91.96572875976562, 51.13404846191406, 44.43695068359375, 151.5103759765625, 119.27325439453125, 141.7628173828125, 30.303485870361328, 42.88644027709961, 177.89498901367188, 190.42889404296875, 68.14207458496094, -43.90476989746094, 162.91049194335938, 67.31492614746094, 274.4637145996094, 34.96568298339844, 94.48668670654297, 123.0282211303711, 170.51669311523438, 168.7364959716797, -10.37359619140625, -40.531944274902344, 24.76163101196289, 182.85418701171875, 109.90716552734375, 75.36920166015625, 209.316650390625, 178.15423583984375, 192.09805297851562, -146.39505004882812, 75.49771118164062, 240.23471069335938, -15.442031860351562, -1.2910995483398438, 205.1516571044922, 129.3253173828125, 162.15269470214844, 27.037904739379883, 70.36956024169922, 24.132015228271484, -119.77076721191406, 32.29110336303711], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000632.npy"} +{"epoch": 0.9554043839758125, "step": 633, "batch_size": 64, "mean": 65.05546569824219, "std": 97.33442687988281, "min": -106.59950256347656, "p10": -57.700836944580075, "median": 53.970632553100586, "p90": 207.10827941894536, "max": 320.9267883300781, "pos_frac": 0.75, "sample": [-4.595888137817383, 36.75908660888672, 43.732505798339844, 87.05317687988281, 169.27671813964844, 48.82408142089844, 93.12675476074219, 108.021240234375, 23.430967330932617, 57.256324768066406, -4.7708740234375, 21.24833869934082, 262.0450439453125, 54.889225006103516, -22.641185760498047, 111.1607894897461, 3.018096923828125, 92.84806823730469, 66.1794662475586, -11.532463073730469, 10.067596435546875, -58.882362365722656, 128.09652709960938, 69.7675552368164, 137.83096313476562, 80.35860443115234, -32.044315338134766, 3.2718582153320312, 113.4768295288086, 106.22258758544922, 103.82844543457031, 17.719947814941406, 193.95223999023438, 55.561798095703125, -40.92103958129883, 53.9583625793457, -56.52268981933594, -28.922012329101562, -80.91799926757812, -100.33446502685547, -92.75455474853516, 41.75046157836914, 25.502941131591797, 320.9267883300781, 47.72461700439453, 38.423423767089844, 53.98290252685547, -90.83573150634766, 87.10609436035156, 182.18016052246094, 234.75390625, 156.50831604003906, 149.18582153320312, 175.36834716796875, 10.447067260742188, -58.20575714111328, -13.15969467163086, 33.597225189208984, 214.85287475585938, 212.74658203125, 262.69110107421875, -106.59950256347656, 268.0592041015625, 98.39921569824219], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000633.npy"} +{"epoch": 0.9569160997732427, "step": 634, "batch_size": 64, "mean": 26.6541748046875, "std": 96.71776580810547, "min": -125.0738296508789, "p10": -102.1357467651367, "median": 11.066452026367188, "p90": 153.3381134033203, "max": 359.31964111328125, "pos_frac": 0.5625, "sample": [12.10250473022461, -81.88749694824219, -8.137344360351562, 185.8170166015625, 20.04156494140625, -5.635290145874023, -33.442440032958984, -10.454801559448242, 18.424705505371094, 13.606338500976562, 23.663911819458008, 1.3147659301757812, 0.45916748046875, -36.71648406982422, -16.955810546875, 16.099958419799805, 77.06794738769531, 63.471309661865234, -56.39769744873047, -35.80900573730469, -64.6189193725586, 6.181423187255859, 131.7254180908203, -125.0738296508789, 170.89744567871094, -1.1841583251953125, 203.18890380859375, 117.77918243408203, 359.31964111328125, -116.203369140625, -7.3643035888671875, 21.758188247680664, -4.057735443115234, -36.489688873291016, 153.61669921875, -32.62860870361328, -117.55355072021484, -117.34527587890625, 257.9398193359375, 130.20726013183594, 116.02735900878906, -53.022735595703125, -115.21536254882812, 71.64239501953125, -110.81356811523438, -121.99147033691406, -3.1923561096191406, 21.629268646240234, 179.66748046875, 75.05857849121094, -64.70541381835938, -35.68019104003906, 72.96712493896484, 28.578536987304688, 38.37554931640625, 10.030399322509766, 146.18214416503906, 99.09098815917969, 85.36901092529297, 61.309898376464844, -17.97698211669922, 32.05464172363281, -38.93346405029297, 152.68807983398438], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000634.npy"} +{"epoch": 0.9584278155706727, "step": 635, "batch_size": 64, "mean": 56.570438385009766, "std": 105.3299331665039, "min": -140.86016845703125, "p10": -73.82614212036133, "median": 49.786216735839844, "p90": 188.96921386718756, "max": 321.4039001464844, "pos_frac": 0.65625, "sample": [-80.85685729980469, 155.21517944335938, -86.15692901611328, 4.857227325439453, 228.89358520507812, 31.28850555419922, -70.21881103515625, -122.42066955566406, -59.8026123046875, 172.94671630859375, -3.482288360595703, 149.20880126953125, -26.849403381347656, -11.056221008300781, 157.0162811279297, 195.83599853515625, 129.97085571289062, 60.653602600097656, 128.79330444335938, 123.47427368164062, -70.80165100097656, 15.36627197265625, -75.12235260009766, 321.4039001464844, -13.206878662109375, -48.68150329589844, 151.973876953125, -7.308210372924805, -30.689586639404297, 71.07080078125, 46.16363525390625, -64.32381439208984, 123.08209228515625, 48.271400451660156, 51.30103302001953, 33.32980728149414, 161.2619171142578, 3.6617164611816406, 304.76092529296875, -83.84117126464844, 86.45913696289062, 197.093505859375, -28.901126861572266, 208.3144073486328, -2.7098236083984375, -32.15299987792969, 87.24763488769531, 64.8934555053711, 92.52293395996094, 3.3425445556640625, 83.60124969482422, 311.3909912109375, 60.9287109375, 67.16411590576172, -140.86016845703125, 96.3810806274414, 148.4032745361328, 26.07837677001953, -1.925933837890625, 108.51564025878906, -94.2996826171875, 107.0592041015625, 155.04998779296875, 1.9290008544921875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000635.npy"} +{"epoch": 0.9599395313681028, "step": 636, "batch_size": 64, "mean": 70.06010437011719, "std": 90.4043197631836, "min": -162.63766479492188, "p10": -21.4372407913208, "median": 69.74411392211914, "p90": 168.76777801513674, "max": 313.31793212890625, "pos_frac": 0.828125, "sample": [156.89613342285156, 16.306663513183594, 3.5887508392333984, 25.24652862548828, 73.24642181396484, 32.447669982910156, 68.63023376464844, 0.42003631591796875, 1.6456832885742188, 169.7257537841797, 103.278564453125, -6.783830642700195, 114.34481811523438, 140.175048828125, 109.60659790039062, 28.540298461914062, -59.23429870605469, 117.31271362304688, 32.32050323486328, 154.041015625, 81.6151351928711, 46.573333740234375, -0.21734046936035156, 132.47972106933594, 27.36054229736328, 70.85799407958984, 299.2832336425781, 0.2413921356201172, -21.04292869567871, 121.8593978881836, 56.80337905883789, 73.5055923461914, 202.94517517089844, 100.89288330078125, -21.606231689453125, 116.90735626220703, 66.51150512695312, 46.043312072753906, -56.38782501220703, 115.44180297851562, 119.93521881103516, 166.53250122070312, -6.3598480224609375, 313.31793212890625, 20.427654266357422, -162.63766479492188, 230.84837341308594, 71.40066528320312, 39.35783767700195, 143.7047576904297, 88.85086059570312, 75.89501190185547, 154.4441680908203, 220.16387939453125, 184.6097869873047, -112.61773681640625, -151.08853149414062, 8.525842666625977, 19.800291061401367, 144.6349334716797, 67.27754211425781, -29.28125762939453, 60.195159912109375, 74.0863037109375], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000636.npy"} +{"epoch": 0.9614512471655329, "step": 637, "batch_size": 64, "mean": 65.42640686035156, "std": 121.0149917602539, "min": -209.08990478515625, "p10": -45.57968521118164, "median": 40.89900207519531, "p90": 201.73840789794923, "max": 640.53369140625, "pos_frac": 0.703125, "sample": [41.146270751953125, 145.5424346923828, 35.50855255126953, 640.53369140625, -24.990745544433594, 74.04559326171875, -22.949134826660156, 79.52532196044922, 179.12631225585938, 39.851985931396484, -36.80236053466797, 19.827903747558594, 79.9505386352539, -97.5012435913086, 89.36063385009766, 94.89726257324219, -12.474250793457031, 74.7569580078125, 158.15420532226562, -63.68000793457031, 88.82600402832031, 87.45146179199219, 40.6517333984375, -12.859504699707031, 14.470247268676758, 66.67138671875, -39.38835906982422, 160.0706024169922, 292.00482177734375, 231.3057861328125, 197.9457550048828, 31.55382537841797, -121.02210998535156, 99.90878295898438, -25.13531494140625, 127.13491821289062, -56.451019287109375, 33.61071014404297, 27.11229705810547, 91.08513641357422, 68.26307678222656, 218.82440185546875, -11.810667037963867, 16.439762115478516, 172.83932495117188, 31.956504821777344, -45.48945617675781, 129.15087890625, -42.17545700073242, -5.067924499511719, 37.3250732421875, 264.5701904296875, 69.46339416503906, 140.03375244140625, 144.0834197998047, -91.94617462158203, 234.917724609375, 24.897859573364258, -45.61835479736328, -209.08990478515625, 70.86730194091797, 203.36383056640625, -19.496681213378906, 2.21124267578125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000637.npy"} +{"epoch": 0.9629629629629629, "step": 638, "batch_size": 64, "mean": 72.99552917480469, "std": 103.09318542480469, "min": -162.51669311523438, "p10": -32.198585510253906, "median": 56.25906753540039, "p90": 206.1946746826172, "max": 356.58966064453125, "pos_frac": 0.75, "sample": [-162.51669311523438, 139.3340606689453, 131.50384521484375, 0.2798347473144531, -125.42437744140625, -1.6453094482421875, 180.3454132080078, 76.97543334960938, 11.102245330810547, -50.66048049926758, 19.82923126220703, -49.50136184692383, -8.085250854492188, 1.2717437744140625, -50.6580810546875, 186.7010498046875, -31.130386352539062, -31.73358154296875, 208.05322265625, 5.6205902099609375, 147.57643127441406, 44.82050323486328, 118.52417755126953, 122.77471923828125, -91.5974349975586, 244.8615264892578, 84.09217071533203, 171.12672424316406, 8.298629760742188, 56.161529541015625, 39.176551818847656, 187.49964904785156, 103.23060607910156, 266.611572265625, 137.19329833984375, -2.3863468170166016, 11.684165954589844, 130.4777374267578, 254.0730438232422, 169.71096801757812, 89.6327896118164, 193.97462463378906, 187.10443115234375, 26.285350799560547, 225.38002014160156, -32.39787292480469, -4.420948028564453, 201.85806274414062, 25.378524780273438, 57.36200714111328, 3.064668655395508, 78.29170227050781, 356.58966064453125, 66.19708251953125, -23.446483612060547, 64.78308868408203, -3.353515625, 219.00733947753906, 181.87135314941406, 6.533329010009766, -19.84247589111328, 24.709121704101562, 56.356605529785156, 37.22413635253906], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000638.npy"} +{"epoch": 0.9644746787603931, "step": 639, "batch_size": 64, "mean": 86.00243377685547, "std": 139.22450256347656, "min": -133.78382873535156, "p10": -37.5713005065918, "median": 50.52642059326172, "p90": 222.12330169677736, "max": 696.3294067382812, "pos_frac": 0.75, "sample": [134.9473876953125, 115.86737060546875, 51.47560119628906, 14.739059448242188, 81.02066040039062, 115.46829986572266, 23.048545837402344, -15.55240249633789, -74.82218170166016, 98.60699462890625, 26.125839233398438, 86.30876159667969, 316.37115478515625, 55.201629638671875, 152.2447052001953, 49.577239990234375, 44.62077331542969, 5.3330230712890625, -23.920310974121094, 48.545127868652344, 115.07676696777344, 216.89743041992188, 16.08270835876465, 185.94439697265625, 17.570350646972656, 136.06546020507812, 55.615840911865234, 123.49549865722656, -55.62983703613281, 224.3629608154297, 261.415771484375, -50.84626007080078, 0.6648883819580078, 105.43213653564453, -15.642471313476562, 28.61089324951172, 117.4253921508789, -27.772293090820312, 311.7088928222656, 149.2889404296875, 108.61536407470703, 62.601287841796875, 696.3294067382812, 38.35100555419922, -20.75872802734375, -7.392108917236328, -35.087425231933594, -67.04571533203125, 110.60191345214844, 46.07138442993164, 326.86370849609375, 166.66244506835938, 13.504417419433594, 20.27264404296875, -8.956298828125, -133.78382873535156, 42.66206359863281, -38.63581848144531, -23.29425811767578, 594.97509765625, -39.531639099121094, 118.37789916992188, 134.88552856445312, 176.89263916015625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000639.npy"} +{"epoch": 0.9659863945578231, "step": 640, "batch_size": 64, "mean": 60.342281341552734, "std": 93.914794921875, "min": -205.527587890625, "p10": -62.95274200439452, "median": 56.814998626708984, "p90": 168.99517211914062, "max": 290.133544921875, "pos_frac": 0.78125, "sample": [135.25177001953125, 49.50429153442383, 170.47166442871094, -82.61189270019531, 27.172264099121094, 94.25619506835938, 95.07720947265625, 18.601051330566406, 212.58547973632812, 169.2218475341797, 73.46420288085938, 8.317588806152344, 119.75785064697266, 144.78829956054688, -109.53903198242188, 83.73898315429688, 74.30413055419922, 90.33842468261719, -75.83610534667969, 196.27610778808594, -178.27560424804688, 117.5410385131836, 191.90455627441406, 62.6754150390625, -66.16732788085938, -52.775943756103516, 123.46955108642578, 26.03351593017578, -32.414894104003906, 22.030078887939453, 157.39556884765625, 21.204727172851562, 44.340118408203125, 155.1126708984375, 36.8792724609375, 24.460838317871094, 19.283485412597656, 50.54212951660156, -52.36436462402344, 157.42880249023438, 15.686737060546875, 151.0918426513672, 11.129844665527344, 49.9029541015625, 186.94667053222656, 121.53272247314453, 108.1988296508789, -205.527587890625, 50.95458221435547, 125.87165832519531, 124.268310546875, 290.133544921875, 82.10154724121094, 154.88143920898438, -3.7881317138671875, -69.88114166259766, 168.4662628173828, -55.45204162597656, -11.35882568359375, 31.478578567504883, -5.200542449951172, 63.511077880859375, 30.872549057006836, 122.6409683227539], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000640.npy"} +{"epoch": 0.9674981103552532, "step": 641, "batch_size": 64, "mean": 64.0093994140625, "std": 104.00789642333984, "min": -195.7405548095703, "p10": -14.678458404541015, "median": 50.40834045410156, "p90": 147.6827819824219, "max": 649.4031372070312, "pos_frac": 0.84375, "sample": [-58.299007415771484, -34.75910186767578, 66.28627014160156, 94.73516845703125, 88.48178100585938, -13.089439392089844, 149.3038330078125, 7.140174865722656, 7.525032043457031, 11.64044189453125, -15.359466552734375, 36.11228942871094, 77.84512329101562, 200.8259735107422, 95.72653198242188, 27.734663009643555, 51.10014343261719, 193.83432006835938, 165.06295776367188, 82.98452758789062, 126.84947967529297, -0.32068634033203125, 35.288490295410156, 49.71653747558594, 74.4240951538086, 125.71920776367188, 26.146072387695312, 75.98524475097656, 49.465545654296875, 96.669677734375, -15.89129638671875, 143.90032958984375, 3.9184188842773438, 59.689781188964844, 19.893585205078125, -35.528892517089844, 62.50259780883789, 38.40412902832031, -140.60848999023438, 40.97906494140625, -195.7405548095703, 107.8951416015625, 69.50312805175781, 0.6158599853515625, 72.57943725585938, 258.4261474609375, 28.427536010742188, -11.30616569519043, 97.2981948852539, 96.99003601074219, 649.4031372070312, 61.43138122558594, 184.2667236328125, 126.6968994140625, 5.450775146484375, 21.2777099609375, 95.71440124511719, 33.76637268066406, 35.001224517822266, 27.942108154296875, 16.495933532714844, 29.3326416015625, 70.1441421508789, 142.9541473388672], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000641.npy"} +{"epoch": 0.9690098261526833, "step": 642, "batch_size": 64, "mean": 46.29144287109375, "std": 102.71231079101562, "min": -153.19395446777344, "p10": -76.42962036132812, "median": 53.42504119873047, "p90": 170.46737670898446, "max": 475.7622375488281, "pos_frac": 0.6875, "sample": [67.62799835205078, 3.8400802612304688, 31.907615661621094, -103.80781555175781, 15.689689636230469, -6.766607284545898, 75.59773254394531, 202.62098693847656, -105.17428588867188, -153.19395446777344, 475.7622375488281, 22.195693969726562, 78.96817016601562, -39.795074462890625, 99.8658447265625, 54.87312316894531, 112.98011779785156, 206.11172485351562, -36.968902587890625, -44.666141510009766, 84.49604034423828, 63.42840576171875, 196.14260864257812, -34.75454330444336, 122.39443969726562, 86.11216735839844, 198.6432342529297, 49.233306884765625, 126.14566040039062, 66.08121490478516, 64.2306137084961, 63.34515380859375, 51.976959228515625, 3.6492652893066406, -20.099441528320312, 140.08877563476562, -99.00428771972656, 40.137229919433594, 20.197349548339844, 27.807968139648438, -15.891170501708984, 62.502655029296875, 14.368906021118164, -23.211341857910156, 94.2472152709961, 67.45980834960938, -65.50619506835938, 130.99761962890625, 115.50994873046875, 220.34402465820312, -25.401397705078125, -0.15926361083984375, 148.92327880859375, 76.01525115966797, 179.7005615234375, 73.22975158691406, -53.878196716308594, 64.91336822509766, -133.07275390625, -134.24667358398438, 24.605819702148438, -69.18034362792969, -79.53645324707031, 81.99740600585938], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000642.npy"} +{"epoch": 0.9705215419501134, "step": 643, "batch_size": 64, "mean": 77.91246032714844, "std": 120.07101440429688, "min": -190.56134033203125, "p10": -57.267401885986324, "median": 68.75174713134766, "p90": 219.54413452148444, "max": 569.9219360351562, "pos_frac": 0.75, "sample": [21.462127685546875, 80.97640991210938, 25.36910629272461, 260.4714660644531, 10.037554740905762, 243.8812255859375, 18.682458877563477, 142.12557983398438, 87.52953338623047, 3.0179367065429688, 569.9219360351562, 32.53740692138672, 94.94775390625, 43.77186584472656, 133.52117919921875, 225.3599853515625, -31.309463500976562, -25.83978843688965, 178.9216766357422, 201.85528564453125, 73.90746307373047, 36.56782531738281, 171.58004760742188, 70.62881469726562, -19.62702178955078, 202.85391235351562, 181.56211853027344, -50.260101318359375, 193.29501342773438, 178.39971923828125, 95.02510833740234, 127.24713134765625, -34.957977294921875, 90.72341918945312, 61.058135986328125, -28.127857208251953, -2.0883617401123047, 1.651031494140625, 245.49456787109375, 181.82058715820312, 148.81715393066406, -60.270530700683594, 137.78062438964844, 41.13481140136719, 71.2520523071289, -83.28712463378906, -90.06084442138672, -190.56134033203125, 10.609931945800781, 66.87467956542969, -94.97957611083984, 89.84689331054688, 227.63851928710938, 92.56484985351562, 1.337890625, -85.68128204345703, -9.490280151367188, 205.97381591796875, 40.70227813720703, 47.9947509765625, -63.754886627197266, 301.80859375, 115.37801361083984, -29.2261962890625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000643.npy"} +{"epoch": 0.9720332577475435, "step": 644, "batch_size": 64, "mean": 49.759117126464844, "std": 110.43402099609375, "min": -266.5095520019531, "p10": -73.07014007568358, "median": 43.339447021484375, "p90": 181.7622756958008, "max": 364.4594421386719, "pos_frac": 0.71875, "sample": [141.20761108398438, -78.4575424194336, -266.5095520019531, 157.19741821289062, -30.164531707763672, -114.30441284179688, 28.78461456298828, 133.7205810546875, 126.54550170898438, 207.0875244140625, 66.73574829101562, 2.8282737731933594, 183.5247802734375, 116.98271179199219, 43.400634765625, 42.40156555175781, 59.9498405456543, 18.01172637939453, -2.0316390991210938, 172.58238220214844, -5.041248321533203, 198.406005859375, -142.2110137939453, -29.690139770507812, -4.34619140625, 127.2282485961914, 1.2180099487304688, 95.51779174804688, 1.9708099365234375, -21.814712524414062, 177.64976501464844, -11.294769287109375, 43.27825927734375, 108.40751647949219, 144.3240966796875, -49.87556838989258, -197.57949829101562, 194.671630859375, 227.020263671875, 20.031875610351562, 104.9135971069336, 158.9384765625, 76.13819885253906, 1.156270980834961, 11.1866455078125, -205.0087432861328, -4.275299072265625, 114.81578063964844, 62.00108337402344, 155.53936767578125, 10.094314575195312, 19.310531616210938, -0.43107032775878906, -60.499534606933594, 50.96168518066406, 55.75707244873047, -103.64422607421875, 50.0315055847168, 81.501953125, 2.9047889709472656, 5.289398193359375, 364.4594421386719, 125.07129669189453, 221.00656127929688], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000644.npy"} +{"epoch": 0.9735449735449735, "step": 645, "batch_size": 64, "mean": 63.114315032958984, "std": 86.23599243164062, "min": -145.40054321289062, "p10": -39.79890823364257, "median": 61.46743583679199, "p90": 165.11188964843754, "max": 294.1837158203125, "pos_frac": 0.75, "sample": [56.79410171508789, 203.86102294921875, 124.11858367919922, -72.44160461425781, 103.01051330566406, 276.8612976074219, 140.6793212890625, 45.54217529296875, -8.429862976074219, 59.462257385253906, -4.151584625244141, -43.09678649902344, 33.21832275390625, -21.469799041748047, 294.1837158203125, 67.35881042480469, 85.66069030761719, 51.04911422729492, -10.69464111328125, 37.404632568359375, 170.04190063476562, -59.09662628173828, 35.98389434814453, 85.33472442626953, 87.76248931884766, -32.103858947753906, 96.18389129638672, 73.66082763671875, 180.1339874267578, 110.6501693725586, 73.32523345947266, 11.387575149536133, 92.85674285888672, -44.65406799316406, 26.020095825195312, 106.40913391113281, -66.35667419433594, -24.088768005371094, -10.34564208984375, 147.4645538330078, 110.48216247558594, 85.6800765991211, 292.886962890625, -6.423515319824219, 63.47261428833008, 105.67210388183594, 123.32083892822266, 48.463783264160156, 110.35235595703125, 86.11788940429688, 39.19697570800781, 120.73872375488281, 42.457950592041016, 157.37826538085938, 37.355987548828125, 4.1720733642578125, 100.1800537109375, -27.123653411865234, -95.01211547851562, -145.40054321289062, 0.12236785888671875, 168.42630004882812, 101.3609390258789, 35.94792938232422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000645.npy"} +{"epoch": 0.9750566893424036, "step": 646, "batch_size": 64, "mean": 63.60493469238281, "std": 118.06346893310547, "min": -177.0977020263672, "p10": -58.353495025634764, "median": 37.656972885131836, "p90": 190.30928955078127, "max": 511.2752685546875, "pos_frac": 0.734375, "sample": [-58.89605712890625, 2.984457015991211, -13.912788391113281, 121.95738220214844, 183.5871124267578, 146.4571075439453, -106.66587829589844, 169.71701049804688, 98.742919921875, 145.52670288085938, -8.12359619140625, 35.24082946777344, 179.86306762695312, 159.01853942871094, 91.18228149414062, -5.271209716796875, 75.30548858642578, -153.61587524414062, 195.30453491210938, 95.2894515991211, 184.7191162109375, 511.2752685546875, -139.6150360107422, -57.08751678466797, -60.705135345458984, 152.51577758789062, 10.076900482177734, 7.8918609619140625, 38.576148986816406, 7.873687744140625, 60.345367431640625, 2.4197921752929688, 19.60643768310547, 199.6089630126953, 85.7943115234375, 8.286949157714844, 79.92427062988281, -22.735885620117188, 76.03800201416016, 150.7626190185547, -10.164407730102539, -27.297744750976562, 106.30009460449219, -48.59181213378906, 86.232421875, 357.7172546386719, 156.9100799560547, 246.11410522460938, 74.80795288085938, 250.6107177734375, 34.15956115722656, 16.0166015625, 10.21234130859375, 11.789016723632812, -177.0977020263672, 16.726455688476562, 182.71487426757812, -131.10989379882812, 1.168121337890625, 192.705078125, -21.912479400634766, -11.156524658203125, 47.8602180480957, 36.737796783447266], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000646.npy"} +{"epoch": 0.9765684051398337, "step": 647, "batch_size": 64, "mean": 87.92562866210938, "std": 108.29377746582031, "min": -132.4290771484375, "p10": -37.72535247802734, "median": 78.43631744384766, "p90": 222.4536422729493, "max": 448.09271240234375, "pos_frac": 0.796875, "sample": [-12.600505828857422, 54.342262268066406, 157.42581176757812, 33.65925598144531, 94.40176391601562, 84.62678527832031, 45.264129638671875, 352.69708251953125, -16.3978271484375, 268.35748291015625, 42.29273986816406, 38.96685791015625, 96.38168334960938, -18.425296783447266, -34.81739807128906, 72.245849609375, 448.09271240234375, -5.163148880004883, -46.472110748291016, 295.3270263671875, 170.30409240722656, 85.0062026977539, 96.47657012939453, 52.8637809753418, 198.41488647460938, 156.49478149414062, 167.27587890625, -18.79555320739746, 192.6150665283203, 48.13275146484375, 115.9507827758789, 0.4205322265625, 104.43551635742188, 24.32970428466797, 134.8278045654297, -48.432464599609375, 56.7211799621582, 182.6732635498047, 41.60808563232422, -45.790977478027344, 86.78547668457031, 65.98912811279297, 32.81789016723633, 103.23859405517578, -106.65846252441406, 18.35371971130371, 105.14159393310547, 114.42823791503906, 32.42152404785156, 97.25584411621094, -38.97161865234375, 237.00723266601562, 134.78082275390625, -39.14410400390625, -132.4290771484375, 170.33590698242188, 21.98945426940918, 343.7131042480469, 28.028953552246094, 138.21096801757812, 232.75596618652344, 40.753990173339844, 129.69268798828125, 145.00518798828125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000647.npy"} +{"epoch": 0.9780801209372638, "step": 648, "batch_size": 64, "mean": 68.37545013427734, "std": 118.8833236694336, "min": -220.39987182617188, "p10": -55.42206153869628, "median": 76.78223419189453, "p90": 206.6410400390625, "max": 393.150390625, "pos_frac": 0.71875, "sample": [191.43341064453125, 34.448753356933594, 81.9250259399414, -212.61639404296875, 167.907470703125, -12.766036987304688, -220.39987182617188, 28.438674926757812, 143.11538696289062, 105.68656158447266, -43.70772171020508, 106.0802993774414, 170.3795166015625, 0.0210418701171875, -50.255035400390625, -130.1984100341797, 37.41762161254883, -38.404930114746094, 313.0526123046875, 259.1180419921875, 212.94436645507812, 108.35218048095703, -35.892059326171875, 149.66690063476562, 156.8250274658203, 9.742111206054688, 77.24761962890625, 225.24200439453125, 48.97724151611328, 393.150390625, -16.44854736328125, -9.830574035644531, 206.34906005859375, 206.76617431640625, 4.607601165771484, -67.85257720947266, 72.07115173339844, 45.497886657714844, 123.5885238647461, 135.3465576171875, 131.9058837890625, 133.37322998046875, 178.44444274902344, -33.65956497192383, -28.14942169189453, 42.59429168701172, 87.65818786621094, 76.31684875488281, 92.78968811035156, 159.77798461914062, 110.25689697265625, 155.57470703125, 123.58935546875, -213.5583038330078, -57.835418701171875, 277.375, 95.83106994628906, -46.190162658691406, 19.11343765258789, -57.63650131225586, 8.575592041015625, -0.7050647735595703, 31.323684692382812, 112.23622131347656], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000648.npy"} +{"epoch": 0.9795918367346939, "step": 649, "batch_size": 64, "mean": 60.53472900390625, "std": 107.85176086425781, "min": -244.41648864746094, "p10": -64.36413345336913, "median": 68.2420539855957, "p90": 168.787094116211, "max": 321.9388427734375, "pos_frac": 0.671875, "sample": [119.76161193847656, 83.85391235351562, -29.309322357177734, 126.0241928100586, 92.21478271484375, 146.10623168945312, 51.95335388183594, -3.839540481567383, -11.465530395507812, 156.7201690673828, -9.555856704711914, 321.9388427734375, 121.10395812988281, -1.5283851623535156, 130.06834411621094, 135.8406219482422, 114.44406127929688, 66.4019775390625, 147.07681274414062, 142.66888427734375, 195.10098266601562, -120.66497802734375, -171.77072143554688, -57.606651306152344, -244.41648864746094, 134.68936157226562, 55.11528778076172, 49.89934539794922, -57.79553985595703, -112.46731567382812, 124.27900695800781, 43.40064239501953, 149.62510681152344, 22.256807327270508, 126.36758422851562, 104.85122680664062, 70.0821304321289, 151.4067840576172, 124.28620910644531, 146.35597229003906, 36.00634002685547, 275.1251220703125, 94.54003143310547, -2.8055152893066406, -22.222702026367188, 173.95863342285156, -9.571517944335938, 12.958473205566406, -6.6575164794921875, 38.36425018310547, -13.078681945800781, 239.21383666992188, -47.051734924316406, -105.1640625, 256.70849609375, -51.23939895629883, -126.23204040527344, -67.17924499511719, 104.91236877441406, 140.82025146484375, 189.03607177734375, 48.70703887939453, 0.6132545471191406, 80.98681640625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000649.npy"} +{"epoch": 0.981103552532124, "step": 650, "batch_size": 64, "mean": 72.57879638671875, "std": 115.97875213623047, "min": -191.140380859375, "p10": -56.11669311523437, "median": 44.63825798034668, "p90": 201.27669525146487, "max": 525.2547607421875, "pos_frac": 0.78125, "sample": [-60.7895393371582, 248.73841857910156, 283.6159973144531, 163.41000366210938, 22.770652770996094, 175.96966552734375, 139.06768798828125, 4.3472747802734375, 46.65753936767578, 8.215045928955078, 162.52618408203125, 125.2659912109375, -68.21871948242188, 14.2938232421875, 3.113077163696289, 525.2547607421875, 135.9748077392578, 61.333953857421875, 203.2972869873047, 9.303964614868164, 137.97328186035156, 64.73587036132812, -27.632619857788086, 25.43783187866211, 174.3265838623047, 204.76004028320312, 194.0640106201172, 159.3407440185547, 81.53397369384766, -105.4697494506836, 29.085643768310547, -117.12474060058594, 24.707050323486328, 151.66348266601562, -191.140380859375, 43.37065124511719, 188.66778564453125, 91.88777160644531, 22.54967498779297, -59.86908721923828, 11.249725341796875, 94.7186279296875, -108.4013671875, 149.8289794921875, -46.654998779296875, -6.718193054199219, 252.97950744628906, 196.56198120117188, 40.759490966796875, -47.361106872558594, 99.31216430664062, 9.12359619140625, 180.47911071777344, -4.297412872314453, 14.404268264770508, 14.755495071411133, -35.14033508300781, 16.73175048828125, 158.30575561523438, 236.23223876953125, 56.94061279296875, -2.063037872314453, 45.90586471557617, 20.37468719482422], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000650.npy"} +{"epoch": 0.982615268329554, "step": 651, "batch_size": 64, "mean": 67.13368225097656, "std": 101.49483489990234, "min": -140.8427734375, "p10": -54.329944610595696, "median": 39.647132873535156, "p90": 194.81421203613283, "max": 358.98431396484375, "pos_frac": 0.75, "sample": [6.869895935058594, 3.0853424072265625, 118.20770263671875, -1.7445068359375, 40.310203552246094, -72.24703216552734, -49.94605255126953, 79.96831512451172, 9.328414916992188, 175.1079864501953, 28.769264221191406, -1.1505126953125, -57.06866455078125, 15.323249816894531, 98.38127136230469, -140.8427734375, 35.36548614501953, 195.57284545898438, 6.880500793457031, 177.7373809814453, 153.9770050048828, -18.90044403076172, -48.15257263183594, 76.55625915527344, 26.043685913085938, 4.796619415283203, 289.1349792480469, 101.3172607421875, -56.20875549316406, 193.0440673828125, 60.273555755615234, 237.34237670898438, 21.321544647216797, 27.824134826660156, 118.02290344238281, -1.7838268280029297, 152.7741241455078, 49.93788146972656, -60.791786193847656, 97.96531677246094, 211.50918579101562, 37.07316589355469, 155.0874481201172, 22.711658477783203, -24.303546905517578, 203.33651733398438, 358.98431396484375, 156.71722412109375, -92.69078826904297, 168.84234619140625, 38.98406219482422, 23.088470458984375, 16.136184692382812, 122.35295867919922, 102.609619140625, 97.46150970458984, 142.60520935058594, -3.3504676818847656, 103.076904296875, 61.394866943359375, 59.72185516357422, -27.74205780029297, 340.95550537109375, -70.40958404541016], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000651.npy"} +{"epoch": 0.9841269841269841, "step": 652, "batch_size": 64, "mean": 53.10411834716797, "std": 114.09524536132812, "min": -289.00115966796875, "p10": -66.67907638549804, "median": 32.424692153930664, "p90": 194.11224822998048, "max": 354.5637512207031, "pos_frac": 0.625, "sample": [-4.086986541748047, 180.95486450195312, 162.23550415039062, 4.275901794433594, -55.76696014404297, 189.5415802001953, 29.317399978637695, 43.29963302612305, 183.1541290283203, 5.319786071777344, 160.36639404296875, -94.87178802490234, -1.8065147399902344, 18.588899612426758, 114.28887939453125, 177.35089111328125, 38.11515808105469, 118.98767852783203, 41.178001403808594, -24.093246459960938, -53.17901611328125, 210.43096923828125, 82.81254577636719, 4.5819854736328125, 34.06850051879883, -19.662254333496094, -289.00115966796875, -22.75518798828125, 204.4677276611328, 44.950706481933594, -6.255126953125, -35.48158264160156, 35.83932113647461, -6.425605773925781, 104.30054473876953, -94.72223663330078, 178.83740234375, 27.75235366821289, -33.960296630859375, -71.35569763183594, -15.251754760742188, 248.9918975830078, -24.976516723632812, 100.34923553466797, 30.7808837890625, -14.11172866821289, 183.65731811523438, 305.5667419433594, -8.283113479614258, 354.5637512207031, 303.25079345703125, 43.188514709472656, 108.20663452148438, -28.14624786376953, -32.84444046020508, 26.202865600585938, -78.92503356933594, 34.849456787109375, -77.48007202148438, 176.66998291015625, 52.28717041015625, 48.880126953125, -116.4272232055664, 196.07110595703125], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000652.npy"} +{"epoch": 0.9856386999244142, "step": 653, "batch_size": 64, "mean": 68.94950103759766, "std": 93.84224700927734, "min": -166.78819274902344, "p10": -35.845444488525374, "median": 63.09233093261719, "p90": 197.8585281372071, "max": 276.055419921875, "pos_frac": 0.828125, "sample": [27.243745803833008, 146.90679931640625, 19.855979919433594, -6.291961669921875, 56.8606071472168, 53.21766662597656, 66.32844543457031, -104.8415298461914, 217.02944946289062, 44.45196533203125, -61.0069694519043, 8.204757690429688, 103.77864074707031, -22.945266723632812, 276.055419921875, 137.96026611328125, 67.87564086914062, 265.6987609863281, 8.753978729248047, 170.72964477539062, 89.73346710205078, 218.79444885253906, 71.41580963134766, -10.325828552246094, 81.89163970947266, 34.87555694580078, 176.85809326171875, 25.310707092285156, -13.08449935913086, 11.6778564453125, 221.06570434570312, -51.62879943847656, -41.37409210205078, 51.71528244018555, 65.77629852294922, -88.21934509277344, 186.00057983398438, 62.38849639892578, 140.9810791015625, 81.32281494140625, 21.329299926757812, 10.283279418945312, 261.71551513671875, 77.34573364257812, 103.14086151123047, -166.78819274902344, 145.8732452392578, 4.04815673828125, 202.9405059814453, 108.23251342773438, 20.141319274902344, 177.86483764648438, -117.80148315429688, 2.0507965087890625, 0.05002403259277344, 28.04807472229004, 146.89154052734375, 148.57183837890625, 30.429180145263672, 92.699462890625, 63.796165466308594, 112.47795104980469, 2.6624374389648438, 145.72381591796875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000653.npy"} +{"epoch": 0.9871504157218443, "step": 654, "batch_size": 64, "mean": 63.60081100463867, "std": 105.6814956665039, "min": -118.40579986572266, "p10": -68.39172744750977, "median": 39.09990692138672, "p90": 178.4962158203125, "max": 429.8267822265625, "pos_frac": 0.734375, "sample": [28.209854125976562, 235.7057342529297, -118.40579986572266, 146.80783081054688, 28.96502685546875, -74.63175201416016, 180.00784301757812, 97.70594024658203, 12.918338775634766, -86.77861022949219, -34.23921203613281, -36.90911865234375, 145.48910522460938, 18.75720977783203, -16.437904357910156, 17.94849967956543, 156.10745239257812, -91.61386108398438, 171.55311584472656, 23.506057739257812, 33.239044189453125, 26.458179473876953, 175.18495178222656, 429.8267822265625, -13.719594955444336, -11.418800354003906, 4.281681060791016, -5.683967590332031, -65.1913070678711, 57.23747634887695, -0.5888290405273438, 95.01768493652344, 387.096435546875, 82.35063171386719, 98.34406280517578, 166.81735229492188, 117.37698364257812, -19.39935302734375, 14.634796142578125, 55.4600830078125, 131.5161895751953, 55.17634582519531, 27.398147583007812, -69.76333618164062, 23.60332489013672, 6.214073181152344, 188.95338439941406, 179.9153289794922, 71.58064270019531, 89.16433715820312, 144.34715270996094, 245.04547119140625, 4.5260009765625, 148.982666015625, 164.19012451171875, -81.58604431152344, -12.174629211425781, 81.23646545410156, 44.96076965332031, 84.89802551269531, 59.79037857055664, 29.013916015625, 128.04542541503906, -106.57229614257812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000654.npy"} +{"epoch": 0.9886621315192744, "step": 655, "batch_size": 64, "mean": 88.75831604003906, "std": 102.0514907836914, "min": -96.70960235595703, "p10": -17.184777450561523, "median": 79.9732437133789, "p90": 211.02971954345708, "max": 557.3125610351562, "pos_frac": 0.84375, "sample": [146.14015197753906, 81.80177307128906, 4.856147766113281, 29.110591888427734, 61.65069580078125, 258.1335754394531, 216.3769989013672, 94.7823486328125, 114.7984848022461, 61.50006103515625, 14.933258056640625, 139.67434692382812, 6.6151885986328125, 10.069953918457031, -17.82720184326172, -27.670166015625, 75.82929992675781, 148.26617431640625, 144.65988159179688, 38.533870697021484, 198.552734375, -27.248794555664062, 119.63538360595703, 260.89703369140625, 60.10955810546875, -29.945465087890625, 68.1698989868164, -50.54862594604492, 232.71051025390625, 20.595046997070312, 118.00657653808594, 115.74652099609375, 127.10991668701172, 17.230003356933594, 557.3125610351562, 25.005481719970703, 76.59030151367188, 140.2451629638672, 78.14471435546875, 266.15020751953125, -5.9489288330078125, 31.5352783203125, -11.145103454589844, 149.6324462890625, 113.10507202148438, -82.4679183959961, 68.15567016601562, 125.40666198730469, 168.06134033203125, 283.07080078125, -15.685787200927734, 36.84600830078125, 122.04861450195312, 93.08340454101562, 1.16436767578125, 95.10380554199219, 111.23049926757812, 60.514808654785156, 152.73403930664062, 26.561370849609375, -96.70960235595703, 93.0031967163086, 92.36361694335938, 92.16458129882812], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000655.npy"} +{"epoch": 0.9901738473167044, "step": 656, "batch_size": 64, "mean": 68.70849609375, "std": 95.5845947265625, "min": -154.0803680419922, "p10": -29.805095291137693, "median": 49.96872520446777, "p90": 206.62442321777348, "max": 303.74395751953125, "pos_frac": 0.75, "sample": [127.20256042480469, 19.90064239501953, 157.24057006835938, 161.4754638671875, 75.32804870605469, -2.5320587158203125, 45.25760269165039, -17.576927185058594, 62.80181884765625, -30.664840698242188, 9.559673309326172, 210.79022216796875, -2.1646347045898438, 10.537322998046875, -16.828353881835938, 99.47404479980469, -120.68180847167969, -10.549308776855469, 94.97037506103516, 154.53721618652344, 294.8755798339844, 27.61841583251953, -154.0803680419922, 122.25886535644531, 197.2908477783203, -31.76198959350586, 138.49496459960938, 257.9107666015625, -83.44645690917969, 77.74029541015625, 91.08834075927734, -20.151710510253906, 41.27704620361328, 44.15669250488281, 54.679847717285156, 303.74395751953125, 61.381927490234375, 63.955543518066406, 216.84153747558594, 35.1409912109375, -5.0661163330078125, -75.38981628417969, 70.01155853271484, 217.9636688232422, -24.18896484375, 189.81570434570312, 38.275909423828125, 60.66389465332031, 28.114315032958984, 111.04953002929688, 168.11996459960938, -27.799022674560547, 74.2899169921875, 210.62452697753906, 111.3451919555664, 18.668716430664062, 40.77107238769531, -63.51329040527344, 33.072914123535156, 39.47967529296875, 32.840423583984375, 36.334075927734375, 155.55943298339844, 189.2075653076172], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000656.npy"} +{"epoch": 0.9916855631141346, "step": 657, "batch_size": 64, "mean": 78.76895904541016, "std": 111.70889282226562, "min": -192.98760986328125, "p10": -46.415967941284165, "median": 72.8762435913086, "p90": 220.45617980957036, "max": 340.8836669921875, "pos_frac": 0.734375, "sample": [225.6320037841797, 115.01434326171875, -28.93862533569336, 340.8836669921875, 135.1536407470703, 48.783477783203125, 324.7970886230469, 192.55694580078125, 85.76091766357422, -120.55644226074219, -24.173198699951172, -22.87431526184082, 125.07929992675781, 61.308006286621094, 287.7767028808594, 74.55569458007812, 106.27655792236328, 177.47235107421875, -115.70692443847656, 173.00894165039062, 24.70416259765625, 99.21341705322266, -127.47747039794922, 25.663820266723633, 58.605125427246094, 169.7933349609375, -17.4256591796875, -10.756439208984375, -6.738264083862305, 49.869056701660156, 202.5513916015625, 157.43199157714844, -135.74363708496094, 161.88232421875, 122.1072006225586, 67.49603271484375, 96.7579116821289, 176.2180633544922, 230.94537353515625, 8.087182998657227, 262.2207946777344, 71.19679260253906, -5.8650054931640625, 154.08261108398438, 22.03240966796875, 99.14401245117188, 245.40090942382812, 0.8877849578857422, -53.90625762939453, 42.13656997680664, -7.072105407714844, -192.98760986328125, 131.48880004882812, -6.039766311645508, 40.6181640625, 70.37120819091797, -56.976112365722656, 132.22125244140625, 98.11183166503906, 208.37925720214844, -8.09958267211914, 83.4484634399414, 13.426765441894531, 181.99720764160156], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000657.npy"} +{"epoch": 0.9931972789115646, "step": 658, "batch_size": 64, "mean": 63.69807434082031, "std": 121.57917022705078, "min": -213.85150146484375, "p10": -73.4686798095703, "median": 59.813276290893555, "p90": 175.07666625976563, "max": 500.752685546875, "pos_frac": 0.734375, "sample": [86.54901123046875, 69.66746520996094, 152.6840057373047, -213.85150146484375, 136.95596313476562, 34.198150634765625, -153.88417053222656, 37.37300109863281, 173.1149444580078, 169.18191528320312, -24.260452270507812, 93.39718627929688, -75.2105712890625, 65.49980163574219, 165.38609313964844, 54.12675094604492, -30.812942504882812, 134.40086364746094, 500.752685546875, 152.22564697265625, -20.024744033813477, 38.660400390625, 24.930503845214844, 136.57894897460938, 146.55401611328125, 168.4379425048828, -69.40426635742188, 109.26455688476562, 72.45559692382812, 5.845947265625, -2.6439456939697266, -18.339588165283203, 87.47586822509766, 115.11820983886719, 2.88983154296875, 78.44432830810547, 213.893798828125, -143.65040588378906, 46.34911346435547, 80.31832885742188, 225.119873046875, 11.622604370117188, 107.74681091308594, 175.9174041748047, -7.982809066772461, 28.262121200561523, -77.43940734863281, 237.28524780273438, 17.30646514892578, 81.89657592773438, 0.4090728759765625, 6.715841293334961, -19.93170928955078, -132.46392822265625, 77.78386688232422, 122.72647857666016, -25.255695343017578, -178.37020874023438, 273.5299377441406, -23.63756561279297, 386.3935852050781, 35.45733642578125, 147.35076904296875, 5.585760116577148], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000658.npy"} +{"epoch": 0.9947089947089947, "step": 659, "batch_size": 64, "mean": 63.20502471923828, "std": 108.5801010131836, "min": -176.46728515625, "p10": -81.75769729614257, "median": 57.11354446411133, "p90": 206.34718933105472, "max": 359.5083312988281, "pos_frac": 0.75, "sample": [61.70185852050781, 105.34609985351562, 179.41831970214844, 120.04796600341797, -15.632125854492188, 208.70321655273438, 76.35952758789062, 120.8268814086914, 67.36768341064453, 284.9709167480469, 222.32827758789062, -14.914199829101562, 48.90727233886719, 1.0343666076660156, 0.9366569519042969, 40.16028594970703, 119.34576416015625, -99.72492218017578, 24.788671493530273, -57.00244140625, 159.48834228515625, -91.28482818603516, 24.206558227539062, 83.87332153320312, -12.833574295043945, -31.575485229492188, 21.92039680480957, 54.66223907470703, -23.982858657836914, 104.57017517089844, 40.17262268066406, 114.2918930053711, 102.14944458007812, 100.44725036621094, 88.5344467163086, 359.5083312988281, 216.16534423828125, 100.71836853027344, 200.84979248046875, 6.095134735107422, 121.19558715820312, -89.837158203125, 32.19964599609375, 152.41598510742188, -39.77350616455078, 17.972900390625, 6.444915771484375, 317.4845886230469, 113.25629425048828, -84.9441146850586, 9.656646728515625, 173.6598663330078, -176.46728515625, -124.84066009521484, 181.7915496826172, 53.476806640625, 14.691093444824219, -154.51112365722656, 222.54818725585938, 94.02066040039062, -17.645187377929688, 124.13678741455078, 59.564849853515625, -74.32272338867188], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000659.npy"} +{"epoch": 0.9962207105064248, "step": 660, "batch_size": 64, "mean": 88.58277130126953, "std": 94.72992706298828, "min": -127.92111206054688, "p10": -26.58498039245604, "median": 65.32088088989258, "p90": 217.3581283569336, "max": 317.15032958984375, "pos_frac": 0.859375, "sample": [213.0773468017578, 51.071006774902344, 54.4614143371582, -59.75590515136719, 51.99559020996094, -48.962608337402344, 36.835205078125, 172.13380432128906, -127.92111206054688, -66.16265869140625, 157.37844848632812, 305.4693908691406, 74.45555114746094, 122.21745300292969, 50.638465881347656, 305.2523193359375, 167.42303466796875, 114.55931091308594, 26.051267623901367, -32.35173034667969, 90.37269592285156, 241.292236328125, 56.18621063232422, 52.59992218017578, 28.649429321289062, 182.3634033203125, -8.441390991210938, 154.2493896484375, 125.10458374023438, -37.70257568359375, 96.3621597290039, 283.9696044921875, 317.15032958984375, 89.32012939453125, -13.129230499267578, -43.233856201171875, 34.40058135986328, 4.8695831298828125, 137.1290740966797, 48.78529357910156, 39.16297149658203, 39.99199676513672, 160.80282592773438, 93.35830688476562, 171.0594482421875, 123.58538055419922, 5.16510009765625, 46.42998504638672, 170.16983032226562, 142.25115966796875, 31.911815643310547, 219.1927490234375, 48.459556579589844, 11.719919204711914, 147.43408203125, 50.21425247192383, 91.89076232910156, 93.75236511230469, 31.8658447265625, 4.063873291015625, 55.53352355957031, 142.92518615722656, 104.33335876464844, 235.86004638671875], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000660.npy"} +{"epoch": 0.9977324263038548, "step": 661, "batch_size": 64, "mean": 55.61132049560547, "std": 106.8237075805664, "min": -260.53082275390625, "p10": -53.821233367919916, "median": 49.79432678222656, "p90": 179.04861755371095, "max": 323.79168701171875, "pos_frac": 0.71875, "sample": [29.13916015625, -35.71790313720703, -57.689361572265625, -84.36811828613281, 91.67086791992188, 90.38114929199219, 155.62640380859375, -3.49530029296875, -260.53082275390625, 138.76792907714844, -109.54093933105469, 11.68939208984375, -29.715194702148438, 184.8880615234375, 162.71151733398438, 148.83810424804688, 209.32505798339844, 88.04441833496094, 323.79168701171875, 51.439056396484375, -163.53135681152344, 152.1247100830078, 122.72178649902344, -22.997467041015625, -21.819622039794922, 0.0842437744140625, 35.71282196044922, -22.231952667236328, -1.9481887817382812, 6.914985656738281, 103.46484375, 71.0563735961914, -151.11605834960938, 67.4324722290039, 102.3104248046875, 45.01382064819336, 66.95677185058594, 172.53892517089844, 16.908390045166016, 151.65298461914062, 76.71746826171875, 35.51097106933594, 1.1519927978515625, 125.67958068847656, 268.8546447753906, 104.92050170898438, 224.5989990234375, 17.716089248657227, 17.775602340698242, 94.68828582763672, -44.79560089111328, -13.399452209472656, 17.661392211914062, -24.926267623901367, 67.64344787597656, 130.01470947265625, 128.7188720703125, 55.63616943359375, 48.14959716796875, 181.83848571777344, 30.571380615234375, -104.92431640625, -22.998497009277344, 305.81640625], "npy": "outputs/llama-3-8b-base-margin-dpo-hh-harmless/margin_logs/step_0000661.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..488ed7a --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22dddd9b4bf59a58ac9754704862dc0b60abca7f1f9941029f73d8f470387557 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..7ae8031 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4a0d8c26a315903fc2506660d8ac2eb82c1e4d9a761e6a7de89830e1a119f6 +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..b098cd7 --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb91ee00c3b0d5e03dfd307f40fd2ef3d3cbbbc3e59f31d9c3eb8752f8c55c0b +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..9305f9b --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260475ae0d4bcf28067357013492b27c4d85c1c30e038fc60aa98d41d148934b +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..ee3f2dd --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b65b9bfc70c38a9b581df0bd366fa51eb271f7deca6d5bb203f23b0569b850b +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..3f1aba3 --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20bcf72e5f5861b7eb97e9beb568c738f1dc5daeae0c399ea479f43adffe0819 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..11a4d61 --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c20cd28294aecb64d00f02ccfe5cb47390523df1cd6fd3b1ee82c6a4b70da2 +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..9aec1c1 --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47b9df5882673cf8dcd2ca4c213fc1729b3ca424a6cc0b882ff9a3cf78f9d283 +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..ab5ff4b --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5676450062600601660617909075d6b89eac1fbe805b81f4bb2e63732fa6f7 +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..597e4c3 --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af22360ed5be151d1082137f98045725e187c559da51df9a18e27f40e6f4ec7e +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..260d21a --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564f0c0a728d7243f4959f2bd938d37cb01d179f9a0855ea735caec5eea5ad43 +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..f77871c --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817185dd50c7c50639c1563a35f2488058c54d3a59792779a244b6f671569112 +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..be3a011 --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251c2941fda1e995ebde0279564c09d6b0c48c729821b803272a903f687f70bd +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..7222ca0 --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9145eb505f9d5979aa185bedce1b9efad1a2a637b581aba29ff2847b6afe559 +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..1fc98a3 --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e00116f64b7281be8b83dd08c1ee13b363030ca3e95c978b0c5e0275e99005c +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..907f9af --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3b344b7ff2b0f281174193a8f0454615c9331b11aae7a0c88a53f88692ae2d +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..d9e8502 --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3dff65aba870d959d9833b242c11c13929271aef41a2a727732c54a7ab8347b +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..7429915 --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02d86da1ef9910f1398a4077a2948622f918c27d09de3d11a611ab17ae136d8 +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..ba09b62 --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbb0d0928b0686d625297028a5b6bcdef843e6a7ca71eb3438911b2e9b39b28 +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..49172ac --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4a2c0604800322878100f62ac3092a044b30e2c6294c118ef5f8627336a79b +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..e968060 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf263cb5fc9c5a2002c5959106d0ba359f737c93ceec75c7c75769dfe373c3c +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..8e4018d --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdfcee64bd533a8b7f1fefbcc0eb96348c974fc8caecbf4e119be0ab1fae7665 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..a70c398 --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c69d47b8736e6b835dc29d335da6a3f564c9bbaf5e5a11a773468578e63740 +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..bf63b2c --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd61c8e637319283c044239bb017bc3aabf3171e6ba430afe6647304d3be16c2 +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..530f1e0 --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a8d527738bd313b06faed9416093d063ebc5eb70d0539df8f416897f709c24 +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..ab06f55 --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc63377292607f7f5ba2c7850e07c63997a47f640b4e2052a82f1fd7540f047 +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..2fe2721 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5788d1a0bbe61b8543ea3a84567bd1885aa346f1540c8223eb1106006655517f +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..7452ba9 --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:620ec16c9fd0f22f30b9b912f1b53f46b64b054879f1f68a9b00b01d199f6c59 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..a740541 --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c0acd1649c4c958e9d1c8a272195503362709bae14a207830180b67c45564f1 +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..4ced2e5 --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9b2c52deb346c0d9747fdb591978d8baf3800ae51ccfece41b49cdf7b8b18b +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..3093dbe --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624d3fede52f987b7533797103f8270eded26853c27981569f5b6abd77c54e57 +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..b6c54cd --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2f2d9ff53c78c5cf4626709ac2d591ec3c1f34c5a20868acd1258b4be7995b +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..d8505ce --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247538163fb538175c5eeb4138f693a64b4593b2dc49d2dc76a8ca2120ee6fea +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..d919821 --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d14d76dfd415062b314aa9b07616f27cb938dbf28f7fc8b41dbad256071325 +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..079e8bf --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5207940ed00e70dda61ee934bbd7116125b575db34c45fc4757afd3ea0b88b3c +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..f16de21 --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9267c10138e38c513bd8b34250df1bb0a1f1cbf52fc35a14437d215247b4910 +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..6dbf085 --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90b5e22d79d06359da7a075cee9f14e3c600e6611987970e5d2cb72d882e5fc +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..a315cfd --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3c518a6407f00cd6cb4321045569193f4e96215842387e662ad1f2bb13bdd9 +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..24638b3 --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1be26c4c950d5058fbc700030e90a51e2984993b2394c211e810d645886e3b +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..ea8feff --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2815ad0de52a1a2ef6708ecf59acb71311ba1c810686fb2ce356adae21887dff +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..b083e72 --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff00d49431d3bcf5a511335f74776b85821c71c6cbf98d0d1d1968ecbe9a7b1 +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..f9b9b59 --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bd53dfcb348b4e1ac4cdd9798b581d56a4b15a59291686b78b4a50a7e3d796 +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..5be7d39 --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b674e1af02449f4c450e8819993804d6255d04732edab0fd85050b8f08c283ad +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..2bb4d28 --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3d6bb55664b2902047d8661549654008783fb481a1a3530595291e1a72f0d2 +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..df11775 --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7665bbb055bc07db910294f5330b830e2fcf5d4341ae4458af184b9c18c9c243 +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..20fe6c2 --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17266d80472515e1d165501e46018e9b351cb83a6b2d3d7268965d2433727b16 +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..2e12790 --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa7530c0b84d770808ae5ef8fbcbecbcf03932a152240fd286a5ad2b7027870 +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..4f0f4e8 --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376f6b75a87418c753f099348fef93ff810a7cb3c23c31db6061bbd33076d83a +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..71d7dc5 --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8afeff77b753d59e1592686c04d173bb8b10813d283c8a620232ddeae7a7857d +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..5db23e0 --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e61c883685d94be0809e5387231355bfea10ada2d1bed260e3111b844770f3 +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..dd71324 --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ca9605be471c42b7ffa0120f50200b15e44b2274e981716afc389de62d7414 +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..4b6fa87 --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de8ed39099b8a0a1108e47d192233314c10c5a9dd50db21ec52d0550e708606e +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..cecee44 --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96c44c4bb0630d3cea7681e1b7d67d991a49ffaaecb9e3ec9a9dc33c1974de0e +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..7573bbb --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75083aac3e564c4db70f6f4941e4029da77b64eabe03756a00f5d1b41e0504d2 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..6a3a77d --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c52136f93c0a9109a54d29975c7c19511b433cdeff681ee650c388a8cddcbe +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..efb68ae --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ba40c39edf78682093f845bd7ac1ab568f3c928b704ae79b1263afcbafd017a +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..282a7ad --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92d670948f519616ecae46fa29c661850aeac61d24deb313b2a1c1da4e4b5038 +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..e56f080 --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b30c591a0e29670325f142b0edd17a118fab37f00b2646c480d0d2df7d26e2f +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..9edccc2 --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4539740833c08536bfb067f28deff279498d764f196c7d723e50535f6e137dbd +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..98ebb23 --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd58b718895f09aca3bbc9edbbf3903cecf585c36efaf1808c586ee20fea0139 +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..b46d354 --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092cc228d6f6e4f4c8b82a418aa0e23b4baab5ca9d8b9a5f3553de5a91babf09 +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..23f6944 --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdb79a7998ab5d7458664497bf2012da4a0b020dc89d4f435cb1704cdb8f635 +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..353a982 --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e854fcf771e526a538c843d63c7e63f70f7f1de43309f94edd291d7016f4a043 +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..81ee388 --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4882fc9a7a6b4a73a285f374b0ce85f1062e8218a18291831047cc72754eb8e4 +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..f53869a --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe58162c0871d5771cb3662f99f52fd7c659206cde76dcb10d351f4361fa7ff9 +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..b2b5f5d --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a283cae824de21cd4f8b0f9c80b31297454ac39010a955489f6d0a9b6d6a70fe +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..eb4f94b --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89525bcc78c5b2d0d58624ba96a83a893fa602f96ecdc4a87e161a20c3e86b79 +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..ad9c394 --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade6de2ef7b8279193ba811a439f9b45d2dc6f1131f48c853e39bb9a41ee5185 +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..ba7b368 --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c47a6e11ec7378929fae8390926ddbbbd0862699f2f46d2ce3de8a315222cd0 +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..fdd31d3 --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d3f61c1d7a3ecc9acab7e915c0a03404a8c13a3de269b098850977a23f56276 +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..e9e832f --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f060e613276792dad17d86c6f7d4d4cc3b53e94e7b43b16d23f4f6d428d7a28 +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..14054f0 --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab32a32703a20f38b9af9169fd4e93153442148e22e5bf0a406cd1ae7cff30d8 +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..93313fc --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e9785fdb68c7097a1d3ec218f95f85cf01da00dc790ef5950b14b55eb8affe8 +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..5984ed9 --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f59dc04ba8b62bc8ee3291d01a5e47227cf2a5b06891a472063a8d077327d2 +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..ac024c7 --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5468622b1fcabc21812bb5b6637f82c6f061178dcdf1b974628fc77decdef41 +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..1b43b29 --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1bc3b8a82387859d356ce17cbce9a3e1d6f1991b58d28340267c4a36d869f30 +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..4c0eb62 --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05216155df2e246a80b30e5d5da97b2dac7f30fb46bb27ade14bb3b40f460573 +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..e5ab0ba --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14d553ab8e0136317448907aa526e65b4e29665827d8f96468552ec3a626590 +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..43fca3c --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9da4f118b80d49194dc96cbb1d9bc4a55c0411a400fee1da101252d44d4e47f +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..07e7f2e --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa3a201d1f257ed72406899f8512e3b171ccf22b77dbcd521de4c5f8d5cfdc2 +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..a0b1bf8 --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baccaa1f5dea9dbf474e29bb226f4f39cb5a5b1342508f620ea5c067bd531413 +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..e185028 --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bce5e02162a15bdbe3e2d2f462d8c7d594abc1a01e6a4a0d23b1fd1e6c0022d +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..e1ffa0b --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987f175a36e8a396f20a26265ea4714afeaadece572e7b2f1abdc59a60941e7c +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..2f9f279 --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a092f91f43c9daad99cd3098b10b2bd8bfbe16ce75ed566074413adf0a7df733 +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..482b148 --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963c8cb97ee9039724516133db9dcffd5034bf7d6883ad74212743ff2292723e +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..647238a --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6fc95c7d011336f70e218c5971a5e478e9e6eed1fac1bd0563570489793655 +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..319d776 --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e33be2ea6579cf2eeccf261c26bf9131da175e3abe08f2a3e1f9c5c18ca44b +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..92b6c64 --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5339454a45c132924e058fa6baa4a97cad13e70709996731e380238260fb8307 +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..852d127 --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545d62cb1f4f16854b426706bfca4af7137fa756a55f25962337d887fa3e06a3 +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..1478046 --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:709c79af192ea6b05e87206b43ccfbc078667049110b3eabe09b131fc194ea35 +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..e54a00a --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755842061742b4f9aaaf8b8d97508a1f0267037d2d0e155b00fb79ed0d92f7da +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..5d36b76 --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f67667814c0db49b53d4f4b27a9b6ee352659502fea9f14afdb23c6103fcc771 +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..e348b02 --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:645112eebfdddf49e5f7088936d611d2467968a5561722463645ccf5baa4a2db +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..3a4efbe --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f4856d853624fb57852dea21cba9c47ab6efbb090b2ddc56aa1a93dbf07a3d +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..6740515 --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c5306795d24e223431e69d15994262d1435046cf7474a4342feab323bb28a2 +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..c2be50a --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5023363f03c0f61d0526e94e386ce56f290554577650b87bc6b97e29653843 +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..b0cadfa --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f252b0812ffa0b20067d6d454a8981f36ec0a0ce040ffc684aa1ddb8ec17268 +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..309b850 --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8284f83ba40612ab20b82ebbe72704b141631ff9fa460ba4f08241aeaed50905 +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..addbae5 --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbbcfcfaac0a21d0f5a9e4a8a3ed206dbdf0ef0854f52fc1596e4eeea43803df +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..1a684fb --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:645b0522c42a33c852970080bedb990ccb5464474bb7a979c9f3bbfe29639cf3 +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..52c078c --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce25c84881f306107a180950e91b5b797884580db0c1cb67e9a284e4e59257a8 +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..e46f9db --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ceeb0bb37133f075b0d2f4ae07149c69f2eaa4319c89745dff0c8f7ce2da041 +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..8240227 --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5a213bea9b666652a17faaca9804d7cc18e2cb23bad8fbc51f28b116ff87d4 +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..517bf01 --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ed0f8d51a749710658e2708bb2f23af8c937dc70bece79f789d545e1380b1f +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..4610da2 --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8967f485e75e0ae29953980764224710edcb1725ea780820525d303c967d313d +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..b3fb38a --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e0b28b1d4eac0d44f410cbc516485da3930fc402457fdc982bb46a2c097b19 +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..7a62c15 --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0429dad46bbbd46252bd7dea35c3f1b555be4e7b33405cc315960aed5df98c26 +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..346860c --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba789ac9a57ae0114f50ff0657926b5ffd6cff2f6b8d291335885a8e16d5972 +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..c469d11 --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcad5674fc93ea9aad6d313217a921b8891653d36453103d7dcbab47567e8e22 +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..0fb64ad --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0988543548413cc80471ba949859d3c738c08462f9c948322b3519bc6c7225fb +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..bfda0e5 --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34d8f7cb5aedf079a83f3b0aaf4df9d83db7e66e3926a3d8f053c2801696cbb +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..e9f8eea --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdcf658407a82d2f4307cf5cb8efc0fe753eed85ace150c7f573cb6cbee2dad +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..b348c7d --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea86e28af44cbf805e0267078571fe32f2d8345e67c8ef0e21c71f4f900918c +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..3413fa3 --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c86254249891e31a5df39eecb20e3141cdfda63dd7a4d565fda025cc471bae +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..5aa8c1e --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8187073167875bc7c8d27468ab8cac81c46da7934a8f7dd1237c217d93fc7ab8 +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..511fd8a --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf294eca7c9a2c889b7fe403622555590eed100a86efb36dfe8ba2ea250c0a2 +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..5d6ec1e --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f75b3031c53be10c45936cca2fb66fa844c1433e5c9fda742c52445b1a10bf +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..ac47e60 --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b1daf6bd7c432db4c2bb817464faee11a5fb97f8c39a24323b24529e7a9dfa +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..d4470f5 --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b7ddb0f60ee72535a482ffb17f50dbb5c1f47086bc6ecac85379e3adece4fb +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..390009c --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be048f912f1d15b49d0c3e726c4e81c2b3c8524df834693ff4010006fea9a91a +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..4c9b707 --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8efd2e44deec5ccf5f5a0bdc44504326706a1e1de714c968f0a0c48b1b63a9 +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..4b221d7 --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d186dd4c0a101bf27535d09cd0d0ba85b2c3522794a642f3bf9b2ea338af88 +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..8d41722 --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6fd86d24d1a24411ddf4b7fd6c5c6f154dc4b152b678b8ccd0b7e8cf10ed14 +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..5c865fd --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b7e803ee5e654e521e186bb5474449493208b390d8127159be11f2b3aeeaee +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..db4ae3b --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae9f911146525f684a788d86c0ea59ff456886f9dc0ce2dfcc34220f052316a +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..8284297 --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d2f279f707eaf8256b3718781c14a7075aa2c581db968593f32800d9014793 +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..b1c00f6 --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d12031b6a3e01bb7fc839807372e11be3254a94aa959bab9ffc5e52636756c +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..fcec6e6 --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8924f0fdff18a383a86e7f5fb1181aaf52bdecfe9f5232e0d07c523359308a7f +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..827fde1 --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1c9a3d8fb6466d5534bcb3004f2420f31865c17e77cc1402cddea37a55dc58 +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..ab57685 --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a0fb87140ac01e109cc88dd5ae349e6ed57369f07437cdec7c946d718fb7d1 +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..d931b5c --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3062fca9614540cc3e6a1659c3c4411af038c573898f9f7ed9223e8695c6b79 +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..cd87143 --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b59efc6c98596cc9f258625f92bd4ff0948cd8801ef50593e56a0c09827129 +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..44a4937 --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3fb521ae1d90bd2de4fdf56efc2baaaa1fed79825cef3ea31554b9795efa1a5 +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..ee31624 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363074eabb614c55beca2a5df664e12e0a8c97530f1cfef99099f76916271264 +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..55428b5 --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4850a91c8cbbe5599533525c5f9dec23a6a2a27d138d560d773cb2499cb19c +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..0e4c703 --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a760bc7d4a2c94c9bd41bb318b2a6172716c709ab5a8f25e86f537136ea3927 +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..8749d95 --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d78b40a9c5ea9465a3126b766cbcc7f3885295ee47c92523c644b3eb3a873c +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..959e6b8 --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af20f3e0f9671409b0da977c9a00a4776549af435050ce17919d3b3e4d522c6 +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..fdc4c6f --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd9c260683ce662b774c9bbb61b5954131c600d44cecdc06342397e63f8f4f6 +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..f29afba --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66195e3f952b7709d748639182adc6ad38f504d8e6d7c0d829dcf27bd511f125 +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..d35087d --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4929313e20213ca9c3afc9993450e6c74e65b6967b046fb4c25e4c1a44c74282 +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..ddddf9b --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ad9c86117f3afa7c2f2a844a04896268183d518aed635f58cb47d7670671b9 +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..af07195 --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ed880812a47a6d970ebcfc7652b41af55ebd6d088ec37e758514e3972072eb +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..7daf60a --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1541aa3e700e5033a6222af17e22fab05acd5251db59dd5477d7a8c1ff6c98 +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..eb19ce0 --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f671d48e16920021a2d45fc892f82cfd5023a03d0b0062e9641263e26fce331 +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..61474f7 --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e308084bab35e11cb3a469f299ba6a1153e7449cc08c6a2db431ecd4f2bf713 +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..bb50fee --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6aed1518ff40772d7825e3f8141b6f93676075ac19de5138bddd0821e15396 +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..1c3807b --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a954a51b8bf6fb5c141ff61e134397d251f88e6cf02d547c123f0289c2b6c5f +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..f666fbd --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5add7354b40eb1daa765555a24e7f57cf31980425886c32c1e973373de0cd976 +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..bfc4b81 --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bdf2ccdb0b65a135b39c2e8845739ac6d9a0e605bf55d72070661758c13b484 +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..210b620 --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a8d177f9b095f6049a084ae8895358e40ae77de9f5b04953572de9ec690c869 +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..3180500 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f3e62f3343c7bf308182bc97a7d8e9c8d503e1ec5b7a0c2bee705c65f1fece +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..31a8e8b --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db4dd7d22e26ed4a287b38d232eb56a323bcd80236f521a6b91c84f8fa271a98 +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..d7551ce --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf9e256fd08521ba082837e83186658c49d14f18bec207d50b937f95b2d3062 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..eb0cb5c --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b2f95ed49ed53f47362158ba7171abc2fed493e0619d908fc5aef8e425eb86 +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..7ecde67 --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b96482078bef4e2becd1dbf647d255154305011152908064cd287e7be353b53f +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..b847b09 --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a939c79018dbb7ce4ae412508871aafc382bb909709d8fd427c87936777a722e +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..296ac01 --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a27c3d3273a9efa7fcd161659ad875a1f6f2b8750f877600aa2490f943518f6 +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..88ec648 --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3f9341216bae6e0a3fbe4e897172f6313e573d70b175d6a06c5d9806ae8eb5 +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..77bfab6 --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d962ca1163c39448bc332f577f874a892d5bec8f9f7b6cfc8b5016ef814af0c +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..3744b85 --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a615d5270e7fee2be017f080fee2df95f2f96b15228aa84b5323fcd186939c8a +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..4a537d5 --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f6a297c1b4fab0bbf4976d69fa991ae6de2fccb637815318dc21b34a177489 +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..8c5d671 --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73545783fc8037c29acc17613cbacd6930f250f9129855c90a7f7dcb4f6d771 +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..7de23fe --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2db895fbd3e2ca77b139d7e4b7746031ef39cc6a5ffa3a1294b1c41543439b5 +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..07d8b51 --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff6950a48167e3e7fce0f698bb485f37f0832af29718f660d9c29697c12a155 +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..8c81d10 --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef1b93e470be5f9ebf2fbe413e0a9aabe26ecafa5751cdce972d2b9a6a33831c +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..4387bf0 --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6347f25051074b553d069762c58dd6ae216ebc5e99edb4749f11858e398cf24f +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..44dfac8 --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e852e88baa184494662d0fa16e059297c0ba6363501de9536c2bc1db9104695 +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..653daf5 --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd06bfae6199b22f8b8efba2319ff7b87c599fef38c2c47b2f525b211d027e2a +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..abb5a6a --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c45454a03a4213c00a81c3207d0128f7cbca408236d7b7f081e4f13a2844102 +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..f845e67 --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c16926fe1c06295d8d130c6413430b0d205a4c8ac145c13109339a3a68d13576 +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..5263931 --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219e17c5a77858cc72264d33cb31640ce4ead3f5d6c3116c0b6b5fbbb48b3de8 +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..72a0557 --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dade18667ccf5d570728106886426c77e464b04302bfe706507e560858a846d9 +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..7a342c2 --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6adbee876bbb3da17b6f6fa51e6ae04cca27fe214ceb5f6b7fe575ed8220c92 +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..42941cf --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae13bb4f6bbb19f470af47bae37f3283476681d4dc7f115bb05306999ef1176d +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..01878e5 --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e169faf005af4c27025fb838d13db1f4cd54bfe4819cbe075afac99574d6780b +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..3a073c1 --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b346be0026425c2833fca10115339a5397e0e34a5fe4fc4cd524972a68ed99 +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..a20056e --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3849c7369a6322ef49bb324d3772b1301b0aaba20ab34fa316abaa7c5cb43d3 +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..3c4b24c --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4362fa17e9bd482862da2ae53e93d0e6917dcf9015e635d4af57883aa3b0608 +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..828fce6 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3427b184247372b8ca518980cb5ec4d800389d3af02fac440ac65bff0071452c +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..311406b --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23eef0b1ccbf3ddc5fc94388f425b32d2ea46d9254af334ef30334211fcb34aa +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..8a63ac7 --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:046a246c5774e16078073d0988c867da9033b5fa85f705691ec64a8b46230fad +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..ab0a639 --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fe78405e8c8bd14d2167faac81a8855f4376df6d9dbba9527f450021cefa53 +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..fe7e7eb --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d99f62ce5b5ae17a15ef81274b1d4b6d595c435363bf0902aacba9ee9a929c03 +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..4eb055a --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a9fe310e48550e927deb14361d32a8321a02d6399e70f9037e3d707c240b91 +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..5273d48 --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7357ffb781e8e83a9c79564a3930b5d843a80e77aa73038bb3016f291ab77a49 +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..102d616 --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a746fc4326daf74cd01835a3c8c148e61ea2afebad6db9fa440c0778605a777e +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..430d795 --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c699556c51f64099ef065a4b77b395cc0da809faee6444f73fadffa902294a0 +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..4c94833 --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:397b0521083253dc32e563ea9884aeaa2502d4533533f97009325c16b3307e82 +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..37863eb --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8352e436d9a2c54a029da021d27cd1f818489db12b792285d7e31768fe844a2d +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..adc1517 --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b461a767deff06655e2dec1c6a371997720488f9703b1db28812451e60d5bece +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..63b40a9 --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dce7be0d644a4c2afe28b9cb5868e92d4295a32aa5c7355f4b2fde5a51a1fc8 +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..b2fe665 --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5201731c059956e6e40b558b7ad85df47a0e82c4cbaf830485897b46017be41e +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..037c36a --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db071a5bedd4afd6b88062413f9d93f039d3f4b479e5f209db2ac07596d276ce +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..e539b0a --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7041d0396264e96042b36eb52b34075eb63eda901ebea914ffd7f61a8be845 +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..4205bac --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4454b2ca8db40f87152556b2a0b42a8745864bfa504ae34cfbd1344b53ffb5 +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..df76a06 --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a1bd6e0f4205852c785f2270bf7666c86e4b42cb0d801deeece41f83086255 +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..bd7c782 --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0860a3234a18455b8d958e1f715e037fcf73028c3b9d4df8538e4d5744df725e +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..ed418cc --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8804dbd2e3226839b5cd4b0cbd623fd878ea8254dd8325576336de60fec6c0d0 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..cf1ab58 --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d85c43bb5158a5435a0fefc6cde22ea2381616ac5cbdf810b29e776dc1b29f +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..73f20b8 --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80378b6bef1e21e34b3c0c16b6cbd5e4cf09fdcf861420387d1d8ad255bcd0aa +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..68261e7 --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9aced4329e915820e6af19823d3c2f32145be28ad272a5e2eac18ea18863875 +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..01215f0 --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15e53404f503cf8ad2617a01531a58525df57778f2cb59285846e82a906e626 +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..21fdd9e --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7794dc52e511098cd0abdbfd09bfbce82a4e78e68ab8b5123f2830a14ed1bcc9 +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..72ea78d --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16daf9dd1649bbe7fbe2201b9b2e20a93484da8c3f2f34bf6e7ecfb46285493 +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..cd4b3c8 --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ddfed45b16cea7748ad5cde70f2740781868794d1c4ba90d90a322b30f67532 +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..9ee41b8 --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa15e3ab3ef7041bf5ce2c49b43ef7c50742eb9c8ddf0a880808b3eedd94212 +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..53f2e18 --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9fe05f7c2e2bc9a7bd2c81545f176f4b5f6b557e768f598dd28d75d396cbb96 +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..7955078 --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2bf2b98ac63c2c9e4f3c62856c2cd75f1fbdac9e3900a1ed9c70dee9875f3b9 +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..fe6b695 --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200fea7876b18d0bce6301ef8fe96c1312966ddb56bb04187d5866035341534d +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..098a4a4 --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e8118ca22064dd3b9872860be2972bd20f0b112494145fbfdab0417d1f8765 +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..f1aff42 --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fa5c67dc6261d05583c1761edd13580089f9b50e1110802ae0b4eac27c0f95 +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..7b2cbce --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d164a88d74848e8269cbbbbf26dbe06f58353ce640624bc784a8964dc228f1ab +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..7678cc7 --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb5666606da96b07d6b33f158d89223d89942fbf4fef68ddede9d52416817413 +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..b02723d --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45d3b680edc886aaa9d233b25d0eafe6a6bd5087d185b3393fdfb74d90f5723 +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..9955d33 --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37d8bd5b5164930bc624a73a7a21672eae971dac85cfab529e617dbdfcf88e80 +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..c424930 --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e4424dfd2f55ea0463ea5ed1c50398aba76aa489414ac128f44353d828f4f6 +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..91e2a39 --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3b1608717c272a21707b58b69b018bd787c98bbb5626adc1bc5df6076a2ce5 +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..41ee619 --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64209da8eb54a075cb6088d76f9ddd88ea54f9b91649e918e93a29af279ee76 +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..4deda60 --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2004b3e10f17ca9fbb875222378294f612c9f39996bc47e8cb5a3a1b6d2a2ccd +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..3b5fd4a --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0164e32089a2f671b3eec6ad975e4b185a998a8933a6e8f8e14df2a52fb5884 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..1778112 --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94aba5c07c62a685457aa89c6ae1b90142b66a186c95070def9b7ae1a5f580fe +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..8376b32 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:076fe2859e6dca88fecda6f320655b056eecd88e2b1dacab2986ffc67fb3f4d1 +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..7877661 --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc6fbf9e7ac52a33bee7566d0635ce6c506762a83ed9d50744d345c91cc822f +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..1899944 --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1c2fd70845c9d8e345730e9f7b46b50a38b49e9d054af67bba9c7804225fc1 +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..e68ebab --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253173927929883e28741f9bc67f4357c88470036e22e29135b8fecd487137d1 +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..a2aecfb --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f94ea019e5ea858812428e53e765b35c1e9fa5cc27f9e0fb2cde54dc27b9031 +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..c7a65b7 --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1dfcede0bdad22723c89595e5fbe3d93da430efaa22e2354db72e37ad86fd7 +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..272b464 --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c6eafba1dceb4d9911499ff25f7c52a238d602d8000bd9f3d798edf8ca7602d +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..b6d692c --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1f7ebfa25df10336a26dfe095bc02315b72b11d1de05181c9c8ee372edd0de +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..cf03556 --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbaca09f5e05841b9ec4de384c8f9c3e1d9408eae505349266526d91f44300d7 +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..180d79b --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99cab5b4c0cc0d16c8712dc0750a315ccd7c56262636365644c0c09a8bcc013 +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..dac4625 --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feeb4b97eb79bb60f155b4809b829df3d62f0cfafe02f98c0df2dd7fa173e56d +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..d3075ef --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a939db98dbcc2dd2a8619d7c7d181a18b24028d241cfa222cef41c7f15c49f +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..fc4e55f --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9082254ccff4e2b76d9eab157740aaf4aa2f024a1f24cf715b1e7c772568f4a6 +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..caebb9f --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d8c3566d40c8faf70d563bfde30e5ac1710be18a0ea93a6d816033ddca4604 +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..02b4313 --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d5fcfb62ef8a59757ad55899f4a815f0549777ea8149ffb4af29e45fc4e063 +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..3c580c9 --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71319dcbbc124e29678e9f7179652909330b34ab6b5817b360acde76c780d4a0 +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..338279b --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb79ac6065c9c2c74d54949870b04034467cdd91fb91aa1e06cecac0065cc104 +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..b0c6877 --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f38ca1fc42d1fd7e9984b6cf654c79322696d1288121c60e0aa901bf9fe093 +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..14e38e7 --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14afefa68875425f2bf1ef5a899317c0b8c573270f3f27d0124fbc7343010a49 +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..ec4ec83 --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2618922d7a27ae078f59c880cc50164a15fe2043a873e7b47401ad654ba44c +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..06e1302 --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb88fc0ae9d9e01e4cc366ef9402e371f0f639103d72505e5a25bdd9bc34c71 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..04ba71d --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50948a586ba89718c0e79c1d69be7e3561f6767177c5757b18175f29adf3d60 +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..df32179 --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eba8bab3d472271655f949754aca1fa66b53d8fb2b8076114c4fa508fe3e744 +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..a9a3b6d --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786524f0c83ca09aef5f12a45362e93d3597f264f18ba175577d955e46d634d9 +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..176573e --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a38aeb3394c8b691b647bcccd2de7c2430a3c474b9b523ded8901b9e970f65 +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..e1d6d8d --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1919b1a7e47b46f301ddc0ec8b4df5fba687fa24ef4cec405ac33a92ad0c90fe +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..7a92238 --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7285f0b50d57bfedc20412823c95ce95d42fde58336f3b7b40f0c31e67d13d9d +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..f849afb --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c5498e29b1e731ca98f56ae412fbb5ac2235724bac70acf37ad8495b842a60 +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..5e187a1 --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47a9d70095ef6c366a34284a05cf79f59e76e4579052b97eb7c5a00f2c4865e +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..6915765 --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74871d56bef190b68d991b9862aed7326f1c1c7be75f684dadd717592af58991 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..9d1f2e9 --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe05462de93e77c0cb1707b96f1aa3caa5200cd174f8602338190cf024f15c1c +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..16c63e0 --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ffae22463834a4af9ed6ea9dcc35c68d0a60c7431f1d3e27315ca15e39c6ca +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..b8fc6e8 --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80cb6013facc1ca89a4d4316456f955fe0d0ff7d238fb7007da5ab1029d890a9 +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..9bb057f --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7736002809a1e1137045cccbc98df56a7b71588db4369853e7d0f9c4c24bfed +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..e3cb539 --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3597452767af1c5056a26a03a6318087292f47479f3b73bdd3effd3f6f43ca +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..0e81c3b --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ac6f8c1fab0330dd460eabfa7d2fa59f4b2d34a5daed09b81d9a4423ab73a8 +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..90ea424 --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59448fee67f9f2aeaf41170af3aa702fc0a0f4dbb3b6a62fc4a0e2013530d77a +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..95531bc --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5070e3fbca711e29c6b8831c31cb1f06f04ba5e99d59e71ba49d2dec08094614 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..82ec6db --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f488b6eef5e7011f82bb58ae7a50bcd16b1b313453ee1fe35846cc5e9fcc2f5 +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..75a2732 --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e13d8850e17dec1ff13c91835dd7be533e815828b2c5488976dde256247534 +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..764f445 --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de95246845835d73f948461f9f847a97387b7bd6f4672aec544c30c61daf01a +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..a236488 --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b594a47eed1cea6ca57825ad658391f0bbe416ba630fbfa26272164fb72cfb18 +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..139dada --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb38278cd2d65def35bcf0099ab247f439530e92cb2d61cc06dad6447962e4c +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..445f1b7 --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3190b2a50ad6bf9d7e63723c469cf43e96a196982bfe075dace6578dae02c08 +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..d4413e6 --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc8bc0fe9e0ebfea3ce075e23e93d9b1b6ec68443331954b128afc6a9657683 +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..e825c06 --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3588687144654c327d539b6d67fb67713af0d738fd21fa80b45f108e9261644 +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..100d067 --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb2e6f8980b95de6d8455db3d7b0a38c9d5b89c4250d0e94b06ed0190b3f5f20 +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..cdc0d3b --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ee6a410795b5e8da34569208afb4743c77530bcc61edecdd4555dd95deb2f3 +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..4a70972 --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:613992e6fb87a00f30e1dc8997b7aa2865f7bc6472db465d7a7b73961aee33f7 +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..6589487 --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e3912c0201d486c3d9d45bc9d499fd0ee130b201ae54cba167a381047577301 +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..f9ebeb1 --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefef91574c706573187e7314ca64c1815d8dc83b94151c282d39b52649bc053 +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..39de117 --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52155d00e8b272dfad924e737c49839416751c944a7339d0ad6b919dbb8f259 +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..1e37fa6 --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4285b041814c15f95b550d864ff086b4e4906ea0220c17cc8b5ad08fdad306d0 +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..533842e --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fdeca500130b2a2fe6660321fc8f3f58a0f75dcad731ffe611ff4ac0063b2e6 +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..b426656 --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883f91da9cc016e6972f297be897a8324b5e221ae38456ce6f155dc3525c6633 +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..880048f --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd77f9de49b5d2af3050fe9100e1a17005b4e4eb772f5adbb2d19d073829deb +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..780b1a6 --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36283bd3de182a7b404a6e2eb4856a82720e2f57956c6cb5f4a636fb2c289e3a +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..255c912 --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5818c61d65e8bc7e7487b7d60a08b9ff80aae852232a32c8f51f7350707872 +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..c676138 --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a8d834f0fc8ed1b0210573ebcec57ad104bb3efc67a28f6b006ef812320bb6 +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..0e2506c --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d90858db021a0b8ee31b2a6887601ae3b91eee46c47a1833b70d8039f23a047 +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..529217a --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8805110df83f29cf86da3ea5312bf6039a5e2879204b65abb7f019fa865f6282 +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..6ea91b0 --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ef469d6d2cd14eab07f837cc4c919bb5ae6d01b3b5405ad7cc28c5c2093708 +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..937ad4d --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:683bb8e14f4024a9f46ace3f27ae7d04a13c29bc26a6bcaad540187705b32dfd +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..a9e0513 --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011f60dcb45646aa7068c3591b8a63f2bb50f6a18f6d695c939d7aeef8e738f3 +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..263d9a4 --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c8fe666dba4707301e55ef22c953fff2f3ed7e8f76f47eeca6848dea499a63 +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..84891ef --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf164d0f9ec630b4a86d23f2341f29571e283a82611177d7ab9890fc1bd4b3f +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..2f474f4 --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3651eea090de0d1b2ce137b93ee45f00c55a8fa0c7c301eb990d68ed674cb9dd +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..236bdaf --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ab97c7df88bdd2b04a487b60e66ec264edfb65c7f372f0f4cdc775e3c88046 +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..3c80df6 --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9935245db3dd753fd78a10b287448dce0007f060dd2b47d006b46ad35334887a +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..4696499 --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96dc121e1d5fb04c8c250047db5c78e61e9682e0f6808a88c2f4100960647582 +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..93f332b --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc01fa714eecea2ebd2391df875ce3712e928b47dcc50702e5044113b8dd8ec +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..d065a79 --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02170d1be0e2c6cd45567dafddc5e43105f47d6efac6bb9e4861ae76ddbc885 +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..d9e3508 --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32991f4e9171df007c431f580ee36837a15134b9616cd2fcd880a5744755239 +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..33e7ac7 --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d57380f9da534a0e5f2817373ae8d6bd1b1025b315dbec301e874c9d544318 +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..a39b248 --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc67a7068f7844ffd462b9e3e0ee75b4703dda50f99f8bdc200cd7b83646340d +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..6fa6798 --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c135b6302fd12da9199743d9863fb31c0d8f150069cfaee9f11d89c88fbaf65 +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..3187ce7 --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd599feea3eff198f6367017c52dcc9d887c776ae77dc8cee312fc5cdf8f60ae +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..3a984ec --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21eb6887a8a6ff267d13f6ba05801051824cf1f62ff342d55d14f639411ef5da +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..5a2aaf9 --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386be2481d32a99204c4ec7cb019d1bceed6c84989738ac18b39d63a7d430be +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..d93abc9 --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb0050982bba1b312fbdb5722c226974e83aa1437982d84f12627a3a8e789ee +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..f83651b --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16aefd7adca3af404a9d0ad2e892f40dafde19746b0240c45237741ddb4719e +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..837b7e8 --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5aecc44a0310bae251cf47effbcbe61e0029505afa1e3096aa3ad13c9507bf9 +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..9a0e77f --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688cdc4e39fd680bb2f2432d9d97b0bf90d8262f99b5ff887c009bb9ffa855e8 +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..8844f64 --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0807c70295ec6f94b44eece67d9be5f1fcdfc7d8fe384ed797e5a1c73dab5af4 +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..5e28f5a --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:358905b0e48254c3f77e89dd1ba76d9174c6d93a5863c037c87f52a158d72bbd +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..974bfe0 --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b483c4994a4e72cf818743b87bc9068e5c959b0a0623ce76d66198fd6b2aa34 +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..e4eada5 --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d446b737ec808a52aca2d90d5684039d41cd55a2bba32a4fa5465ac4883090c +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..10d76e2 --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4a87757c1a8d83b55ef17d390c42a2e5c2ef567055ebb76679e26a4b80057f +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..307261d --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4803547cde215c9d656735b31f866c2693c7e38e47a43a29cc398874967fa78e +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..95bc7c4 --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215578518d17f25b314f851e9057c17e59e55461f287c976403fa43c4bf3e87d +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..ef80b67 --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc9aa70b3436231ae6af01517f664a2738b95d50f6abce642e35947c1c95795 +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..78e25c9 --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7216320afee65538a780b2c355acc38893f3f33bfa8dd11a3e4e1f03648358bb +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..0c9c57b --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6fefd7f421c9daedd7b5d1abaa68c64aea20646e265261553e149a15f58efb2 +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..b554c99 --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a826309fc5304a697a7ae37c39b7762e0126ce4a1f8e758d7c73b2c6b583c4aa +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..1351dd7 --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd887c1b0fecc0fb5032dcaab4579e99f9176bf3e498e92e4b3263c2152e68ab +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..f8c1ea0 --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:504aad6953e2d911c415e895de1cf5dfde961caddf59bc486e554374fe35d1c5 +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..b0d7653 --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d9091a7a0431a969ab5cd90bd90442042e83ca57f2667376968786f96a45e0 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..9e732a7 --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4fd05f06330a12abbaa6e93db774d1b1d287b79801d5c96f88b4280b4d0164 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..362c65c --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c96fa091e6cc322fa3f2e9108a57f21f4a16ee76394071a2d7eb4d94a97ae80 +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..c4484c2 --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d286d0532d10def941c3e04d289e2b1a0a65fcac1935739eb186043bfd950da2 +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..94e85a0 --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee63a4ff360433a729be983dc9c67e263bbecb1341c1397e17e90d9d00cd0d8b +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..37703d4 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da5bc36fda80f8e2fa576bb759d2ae26e01723d2d1cee7ab87b3adcfe181a3f +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..2fc0e36 --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faebad3341ee943c72c177a0383ea0f54c24ef1b42fe04dbb8a9bf986295300d +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..891acc4 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7b43a76e9c9e06d8d0a9d259acba6348964a8a56fca0a5d713961384b0868c8 +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..1f1d2bc --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6914c1253a121d7e577c87019ed312e47bdef6d9598062fd373a88c5cc5221 +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..c5e206b --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8653be1e7dbf7b04e043821ed87939b275dfeea7e97b9d79a10cb310326cc4cb +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..fe03fd5 --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32939007d7b0b7e5cba563d4a8571579cce0ebaf8defd4ea6929f67ec9de8120 +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..ac129c3 --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a9526292597f7def6057355f70945eaaecd07d9ddc7a0e07a33c1195c5db7e +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..495a96a --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243102d41eb04c9d83be1e4cd918e6f69a60bf81c3cd01b9814b3296420be5c3 +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..7ab8021 --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b579007dc6984a7a2d2ca767640a3058d5c29639d7a4edd4dce33dfea380042e +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..f5032fa --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797a5d5b4c1068c372d452349e6ede5acc7ae37f3da7c13c9d7ae5f2f7b62172 +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..ee1ec0d --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ab30d59e964ed7a16983d6c4d6752c7a7e37255efb1dd812422ac6336f8c7f +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..a71b605 --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9514cc4956a5ad34f01ff8bd682156c40cef43ca98b77f03919453221b798218 +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..31ba1cd --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b3125c771dd61a08e83f5570f67ead91ee9989c859ca79a7bccee37efe771f +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..a1095a4 --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6dc5b64bdb40c415c23c597ebae457dfd890a2f378561c5493d82953ed0892f +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..2f0cb60 --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ee31cc02e3944ad2afaffe3f7bbf56c80f4271affe249248550c2f4802bdec +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..f614bb8 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef150e83fc33aa47cbb78b1aff02738e739a229affd8dc890e3eef82ef3ccfeb +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..4d6788a --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8177000b94905dba9b56a10c8f13e1fffce21298e78b8e588cb113c7fb2638 +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..67e05ab --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d9f1f95a5bb694183c387f1e46d74dd321b4c3dcb4928909a87a7f98308faf +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..8fd851b --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e05ccae4f6007bbaf16eb9ba6fd1235889df8fb3406da93dd92c0cda0baed41 +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..2c0b3d3 --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d34a2cd45a94f3f3e034f6a85a8f85be88f2f6ea2926625d5d6d21c5fa8ded3 +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..f73c1a5 --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd332fc8985a93e784639454e8a2fc15efbcf3635cfad4240cc2c8d5a3bb91ea +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..be0f6dd --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a892e6538c6976abee6ad7784262a396b65709d8bc02bde79d9233045532732 +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..2829784 --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07cc75498d5665cf48ce59f1ba85c25a303e547d3cdf0f89b9129378b314064d +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..535b404 --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad834f1c94e2004e8c3234b8c71bf5932ca8ea1ba349dfdfb75eadfce40f0de +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..960393f --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e42cf85941cd48a6204ecf61e7632421d5ec3e262819898c76179487cbad34 +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..e962e48 --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00ee10cfb7d951a4eed255eeeb2591487c1abd5ac75115d7dd46dc3de48a1e5 +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..3f89956 --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10057d1aa8f968459de0b9eb73f76f87d15ec7f3412db0517bb56c0b91b9dbb1 +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..9a3197e --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d6d13d46c159e5790e3b9291cf751fa77b04ee7b3ddf50b6580713fe828491a +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..f51b451 --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e21ff178818b31f70fb65cdbb0c0097ff79fd8de009209d54d4c71f645b3db +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..882caec --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e023f745b9ddd90c21e706787f7417714cd1a668e1cd9c7f510f06bdeb7b0d7c +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..6497941 --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9df13e9bb3aea526b35e24bb271b3b63a280d44e74e7ec21b468845c577906d +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..44dcb6e --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc94fadc87a4177633e469587d03fd7a873a8e1b32906eb1bee747ff43f7342 +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..c97489a --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffce2dd9d756f4cc7ec4a22cb3a50ab936753ccd27defaa344890a06f5ec29f9 +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..fea1a17 --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59972ea9a489562d208884f2b24d438b0f68bc9944543b81b2ab41572d013e1a +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..44a1083 --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6134cfe7f495c5ddc9f1debe5db679c43fa963618850db1d9cc3b316083d7905 +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..102958e --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a24998b179bed6b2886505a601ba8aab3220ad7b3dc6d8255c56992bebcee46 +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..feb3c69 --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07732a3795f609ebae45d79766a5972434da9f3c22b0c8c7a735d6558ba0c94c +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..15f504c --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9e9080a9dd5ae7ac30f162605cd659399546cdd41e39e77a3ba32610560b01 +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..af7bff7 --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44964bfb6a7341bd8e02560082ab035d800b23007b603c10414b15f832dd5e1 +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..925d526 --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67af6e61cdcb94dffb173d37b92962c385b923f1e0c552168acb6b0b2a0b6713 +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..d41c924 --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:febe0d0ef6275d31d7a029b4aac6ce4481870238f83e18c2f4bb797300fbcb34 +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..03c1dcf --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f91c0e52d10f8b3918e89eeb46f200d80b0bd25dfca59514367bed1f069ddc +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..daa3614 --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e6cb7a7489a8e571672905fcf79c1be0037cb12d24b29b6de56af5351a9c01c +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..4462979 --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5537152a62153f9711b69bf3a6a154b7494e5b2b790cc3ad8cdb92492aff36cd +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..af1df27 --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77bc470b10ac480e8066172d49a23497574a9da61b21cfa1dbb88eccff042b42 +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..57df756 --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54de134471377daebb5d14bd5c5fb3f83905343c7f76144ebbce80d6fe696c09 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..d387b1c --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488a5f2d11ac0b36d297c6802cb313e22f09fe95d04f5a01c1db141d99d30b92 +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..0e69c7f --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cec293e0651c947fb71167fef43dc552b98efaa1986cb138c21b4fdc463db2b +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..c60012c --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13452bb3ca45c19475a4b9587a887fb96e37c2137253c1e1924e4dbbc95aaee2 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..e0e14c1 --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bce4f9d4c889acef7c10c80b0b90695fc0cbb5b8127866795d5352dc7f4eea8 +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..ab58f4d --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a63aa7baac9389fa0972aa2576f20b72528bc8df32220e0266a4cd5212c7257 +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..a69f81c --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e723221fbedcfb46e78c4901489fc8d8e42b68eeb5d449e5ac7e1e6f428f3b +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..58cc4eb --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42e0535094dda800d3daf524cb5dd0f4337700ee151c7c9a3f774eb2a2c910d +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..587a0f6 --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227744b188b8b1caaca2dc02cae5f3ccd21ad9f738fb119069f573af3954bd74 +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..df8c03b --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0455a3e830730a439c9824817d193a74aba498ee9aa61342b9b191d8c0123b +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..b3333ad --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e314e1f9d4419898ef7af828346b956212126d3caaf9ab717fb56f1fc7ee0405 +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..1703fd3 --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d041181d1b6db28733a414ae5da09507b1ac05cccb3cde3d17fb77df2b865ea +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..efca9a9 --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf582fe7f2a10117e5086d22cd6e7ec566847b6d53f30f0b31710b1ed1768e65 +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..a7aaab3 --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023b7f3b11b388ddc45fdfecbd022e7050a6fa188902a08c08a7f50a30352eaa +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..2bac370 --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df4ca10ac9fc6897e91327e34337923776ee3724ea383268e2b8b7bf486b86e +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..62ef317 --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44bebfa9a2d18482caeb11e15722c0889207c63345ae7c11bec5dd7967c6ef0d +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..cfe34cb --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e9c52d0dab70055594b82e1a0a7ffb9bcdbca7664e9c0e3bcc10842caac6f2c +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..f3386e4 --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46e52d1d29ab031258de4387c998b68297bb292b48c2aae506a7105d79190f5 +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..203257f --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564ad37d0f089488a3f94511ff597bffc0fc6d7d1afb987e11dcef5694a99745 +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..5509a38 --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab2beb0e9e4ab99832d10b2b694644eeb3fab4c71ee1a384a6bf18515600831 +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..0f0055b --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb2dceca8f4bd382bf39bdd4d54f5122292e069adcc34732daed203c801b48ba +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..b31f2ee --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a2ec3f4b7fe46a043a9e0c18d385e51c82ea9d1426ce2428ddf177151038657 +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..b0e1585 --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06ab7daf07c4d2f4eca015fd6e16258da1f660995b66158504bc9a4ae6c1060 +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..fe0b193 --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42157ac5ea28cde96fafb7788bce21162af39b7f9eb14bbd35ebccdd3e5f4691 +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..d0ddb0d --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf9f30b6022ef39f09f07b35214cf1d4001e384568e708e05402d80ab693fa0 +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..9056ea2 --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33bd22694b12545b4c1539630be136596f8fac31056b49db25266a51ae4e1680 +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..b5c61e7 --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae98f3b9a371ad74cfdb4f2370af76bb3c26bbd3dd1ec4ea1fcf1c17d3969b57 +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..e063ecb --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2342ff0c2759d5f7f5936d71ec0907bda7d01143c063f501dd99198938e81c8 +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..3f4a3d4 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf868aaa6b430d55cd8937fc40214c3a0c18d85402179146316a76cf15dcbe65 +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..4b5b31d --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d445d4e65febd08d4faa20b1b7a86616cc211e30b9202ca45a8cc7fdb5d77d7 +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..9081357 --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:945182514e6b2028efe651c0cd19770cf7b2e4cf6f7c7a8bd525c9df24910f4d +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..66780f6 --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3e7d47f175580859a33637b319082754681e5101b4afb313986f51cda3433a +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..eab4c83 --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:650f7efec987a493f5bfb512f2272ef1874d43ed27854ed7f5615255dc596ca6 +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..29ca36e --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba3c08342cba754ec280f188366e7c9b5ba10841e49d045c63fc745e7480743 +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..b74b6f7 --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee797793bc0cfec7084f936132097832dab92051c810f51934d88ff8cb1ef137 +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..8b13518 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d15eaf45a57d549cb0d91e7b88dc328950192a700e23fb7b0a5255b61d00109 +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..ee5be9e --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9a43ec494b42f2d52516b8b1f283133a37cfcde8e2f298e7e1a08e311c20ff +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..d264409 --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8672d95e95ba98bfaf785fdc843c5ad71d6e39398e2222efc3a1f6614e81a32f +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..80b9c86 --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206fbc5c882ec02911fe0267d64e47730457c95099235505350585e862bfe690 +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..0687e3a --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d775e3b996ec601331c5472854eac0c0f272a72eea982c0bd6ac9bff09db9b69 +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..b274510 --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d535c5af5db4193cb5901952662f4ae4d2eb68e83e1a3d3ca8b1257719a93b1d +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..6e5b363 --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939281398480ee1d2c954dfd278b95c11f08baf289b328e14b306741155895dc +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..897a49d --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624f65d468b8e24b4a703a5bc972a28efb99d05d609c1ac708630afde1213636 +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..baed0e8 --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea582d73b5d0a815a84187702969a2cbd7c8bf65c7486cac5c5c7dad969dd42b +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..16dfd41 --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfeaf543bcb39d810a094b18840b85727ee6a25e4deff2a382353241fcd9b8f3 +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..cbdd7fc --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f450335cc65e87c14c1d87bf63b6cdf67ab668fdf425a470081feadba135a3 +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..3e64e1f --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee3b1de9150bcb81bc9a1ee8451d1b2484fff4dc1816bdd3095c9b991ba0d5a +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..49a1d5b --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7009578af44cdaf60274ecdecd20e20358aa8ddebbd5913abc3d29e41b8302 +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..7da8b85 --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a97f5883dfff1fea03f5fe4de72cb7f0e499bdac584a2b1c5a9dbd1744a48733 +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..3177cbd --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aab837a58bc4f0741c7c79a4c2c4e9816d36a777b801e468361d5e0d8531c07 +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..8400cdd --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bbb2bfc9a741b56d77ef8030183a70a265caf00d0d1165ed9bd012c268903c7 +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..5fc9cee --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0d0e43a01aa21fe130f7ae9ca6d69fbab51bf41f61875067e5bee054deecc4 +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..51375c0 --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb823ff7be1ab4ce63f4f89a4dd2e084e57e22e635b2be6a152226468f2ee2a +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..9de9b80 --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c39a7341c0b1a1d8525e423c1cb6037cf248bd553c73d7f9c91fdbe1ac5a269 +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..dfade28 --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec713b1a5b276b380d4b9be5897df228efa04e250f8067726ced260bd8f3d54 +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..251c78f --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7ae6bcdac35d0a9d9b659e8b2479f0f45d9ac73ddce36cfce4f49e40959539 +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..055b420 --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c832cfd87d0dc584eb216b6aeb5d183df8402a9943935422922c24a5b5adbd07 +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..bd8960a --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f606d09acb403c49abe2062d0b85ebc17106f80d365e8872ec1348bba25c6b +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..b08e18b --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb715c168af5f71aaeb0a88815858b0b8aed8e8965a98103631ed58660e06b4 +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..5428fe6 --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0b58444a6f77cf64016df5f05dc0fa198e19cc2426b971cc272f77d36ea61e +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..30dcd57 --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd9a0692c885c2388e9ad5f33b0f2ebfd2695836469450ec84138f25ffadfb41 +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..fe0d407 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ca6063944b6bf6b7ce1fd08621bcf109125623a7fe1d5f76357c98f2369c74 +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..8c2bb05 --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c4d50f352e8741525ee058cee10c89e9d44fc74f4e15293a7b1d05c1f4f02d +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..35789cc --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556dc33b8e89952de85dd64a885587db654abd7c83bb5c8a06eccca926ec9193 +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..5efe399 --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ddf560778e906afbb0815b0abb7dd7f77a8298250023414ced2766f5f97adfc +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..15311a7 --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0217dfc4c494e8259f0ac1d2d1944ce4965ff0cb536ea62033d1d64cbf7c8c9c +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..c83f40d --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8bcea3e1c452a687abc638c5f2a50afcee6cade490ef8c9953366cd5e84a24 +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..66b1df8 --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74294ed0a2cb2ca428eea91fbf69522a321eb0767cd670e505b28673973b79e3 +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..f346430 --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2a694fe52a594efba9a4f5871da3daef387f88934999c29811b2b48b3c238f1 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..a74be78 --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c356f7bd95dbac06482e78122bdabcd95c69583ed27b984e926ce687deb09a +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..9ee7101 --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6859b26c86085c95e1c7d0f159bedbd0c0c7ec22e7c92453f21cf2e4d3ebda3b +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..f9fd693 --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5f807afdf3c4cdcae8a780be299572e053adf723f3344bda2a4e9b43d36744 +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..e3bc544 --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23da92e930174432695d545f18f1d77253db0466c4ecd2e55441c1ef52fade01 +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..df3c1f6 --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a49bd53c1a2cf05e5db43607fb165b66519adb1144b8c7bcce9b4fc2f9ee9d2 +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..a7c4d2e --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4bc57faefe037343c6a9c011e5963d8bf2ee168431f14032ed01c2799ecd24 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..c15e663 --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f40690456c0a324995589a5651a7e15da852a96a1f118f8609541d54bb9fca0 +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..ca7a139 --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e871b63fbaf2b56e5e4623d1206fb6c44927fc7cf820e4411ee3277f9f2aaed6 +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..91435e7 --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799e20167af80bc48a14f4c3fc6d02ecc3ca7ac94220b6ef886cce009dc9d2de +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..521cd0c --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952d8475d024995097a1cc93535ec081193e7e86bb206cf2fd29fc6f15edc4c5 +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..44cad0d --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02563c68af699e4eb77a7977ebefef132f4c46ca43601aa8c1cc0f0a237c4187 +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..721c0d6 --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5946bd509fc8fae9dcb6364458496c048ea127f0fdb17e1fc2893b32aa613b +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..51326fd --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56521d6b74ccd78d63f71bedb2dd384e6b728a62fb75c1d4508486a0bf91624d +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..c3bb4ee --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7a7b36d0f3b5a082f8f77de958ccfaddf4430c2da78d1ced4490c3c387b835 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..63e2091 --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8b69319ef2bc4033e3ff7593a6066c6e61e0c1cfcbf2b0bda1165776d0e8aa +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..40fce1d --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:209b91007ef5cea67d57f23bddca7f785b4dfafb66a4b216a9fdb79a8dfc3848 +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..64f5d24 --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44cabef84b56c9f6d403b963bae1f732584bd53f63e426a9008722f3f9ebc2e +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..111402f --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc98912c63f7d5ed1e4105f1248fd684975abd52a408d431eae545946c144656 +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..e1b099a --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a477fc0894629d64e7c1adc9681c43458158820ca3c165e73820058246303965 +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..39501bc --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5500ec186d5f8f911ecd9689830555b5d7f621d0289d91c37070a50ee8499d2 +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..72d501c --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8803c78cc0ef62eaf15649153475b0b1a6c373d448480d9442e470f0fbf0e6 +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..fee9488 --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c50a5e30d2c8a8b40b2d73b7df7f2ea626519ba1ee9ff1d2266efba58d6819b +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..f7c49b8 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d74188907451daf1abbdd2790ef4ad7f181b4dd94d7085a4261a9de7b964644f +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..297eca2 --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daccd00794191fcbb237d19d5646459d9fc67d0b8278356b59115dc31c9387d5 +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..f28ec58 --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0ed7e72f376e44efc68ead2c5e87bed41ff094c2d9e699f59016f081b1cf2c +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..2ff5869 --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b4dd7311528d34393aa201eacccbd839ce9ce75efb90c2ea36bb4ba7fdd5fe +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..3a159e4 --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be3fba1d6c0feab1f723aa17b5ddb093ffc7c896d74ab87b71df21631630fe2 +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..1e25574 --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80440d8c1c1d45c04159e0a152448e35aed31d3094393b8cd69a6f5049843cc +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..a68ba7e --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e01622a3651a3de230465ec148363cc7d50c4546b27803ec22a87cd42b8ecd +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..e5df4f2 --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c2e9e49758e9e9300121745a537be0c1ea216b83b868da42c651e9ba90e8c2 +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..5306054 --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe54715fbc4e070f780cebc73e9a08727fc71f79e49d0d845ad9b86200e3fae +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..f3a374a --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1fe23236167340861bffa63571b4c5a42726564a66ed63b5f0133cb598d283 +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..29df9c5 --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb62e718630e8b779d44318333fd82b83973052db987343330e9ac5c011abd21 +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..db772c3 --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d27ae49d55c9877367ce699dac494be32dc894b7e128f78abb0b85bba71532f +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..c99b745 --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0983de45ed65c5f7254c989185deb1d6c07b2613dd7bb0e6f81706822e00d395 +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..e50873d --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed26d03dd6d92d3b8d2334721598b944debc89f2bb37f95f5bf1d629718a7489 +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..1f8cd51 --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7f84f0fe1871a47f54add91046c746b2fe464eab14371afe9a9ffa569574f9 +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..c1775e7 --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be885f6d92e2bbf908a834ec774bb7bb5cd9b3055691e4a5c331e4908d737b8 +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..26566e9 --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d19edb45c70357c80d6d6265afb8ee7284cd67e3c155d9f326e2d3b4092d4c6 +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..dc9884f --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3607cce71a60b1884fb036e43c7202f2c30594086cc4915241881d1cb41c327a +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..5a91716 --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69f3b92c74dad893075e873347384c93bdb7f3afc74409915c60c9214683d169 +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..6359fa1 --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3825b4b622e0fd796536da12d5628a01046dfad1dbc4f98aa9827687bc0fdacf +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..73922ff --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaafbfed26c970701240351e9c935b21713ba7fe4f791140fb86b8f693c77098 +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..0543639 --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e10c6bd7ad0cb0f1946fbaf83cd675c9add3d5ea920a8efec989c49cc0e943 +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..3162de5 --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1450b3cd5ea77268f509a0a458673296b4f89c2d88eede04f4cae0e97eb5aa +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..95bc1fb --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c853ebcc514174cfe3696426e387816b2ddf30558d535958a199d8cc526f1a4e +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..d37a400 --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9cd6f63306344ba852db2336ce51f966f8b81a3e1d3cfc342d95906374fe6c3 +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..9e8f35b --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9d14f3bb61aaeaf984f1cb2903944caeb4d6017f77c3509de79f0563599166 +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..2fb7c66 --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2976c3c5b44da258fca2673821c0ba0e040950622b3fd725359ccb3ee0ffc7e +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..64c5c9b --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c1ed63955670c4554a1e3b6dd0bee4768dfdff6bf8b865a1a36e172c40e91de +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..02b0caa --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3160231a80fbce7660843d56c17151cd319401f33252bea4b495d65247d82b2f +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..d21de44 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d39060d4f3f1fae65f8ed7aa8ea03db129d035d2eb7d727f517386ad4486167 +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..b0d5d31 --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bb390f02f96219d34af8fb0c0df93b1361981b0e266af20281839210b32f35 +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..5811508 --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d32eeaf4d46d32e1128e7e5914729f1ffe69905407c1fac7b30c17df1f3a585 +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..68303fe --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539af2cd540acb6822c519220c6b7d787daf3bdd56eac29050c559355f9c35ce +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..cb6ec57 --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4764b9b4413ad3900eba970593565c4b6556f9fc2ab97b2c3ab20e44de25e84c +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..7106023 --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed4cacfaaec4bf5af17b7583d908ac80376d4610bd8657d5fb39a5f30344569 +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..9e0d85b --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49af16532e28057d6bdcea032a02b9cb674530ab56e23453449d47c36747ebc8 +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..f2b2b50 --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a3589a5f9479b695f0ea28063fb0c9a8f87f152f1b60d37fc93b31b329815b +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..ed36814 --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3293f1ae86dc922048468d0a777cf60b08b49d0f3e7577859519067b3f03668 +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..df20857 --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc87ac8db8749232bf696140d03d01b1a734aad9040c8067478763ed0ff977cc +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..1a93e0c --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace0c84f7200d42283ffa4be3faad052ba8835a7a90f985c3e66eed06408178b +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..d7ee44f --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f409f791d8da6ba94ff0e3a24b9304ec38d49cbbe8cfff3f51f881ca028abf6a +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..498e012 --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee9612bbc0c5048473799950aa2b6da5676aa02774da5229b9b8a7c31f4459f +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..90e08ef --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3481e8c3f8c4e2e61ea889901edd9190a1f90757f3e843601783521e4d40816 +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..4f764cf --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00567b0033cbec1e3660eadef67e4ac23702bf6e95263bfb47cb0f05b2202d08 +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..0b2ad2e --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:122c08e7b3de7a9b11c4e259b438f5a5f9394218d118dd95e1157c3b93605684 +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..e236cea --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e6e20978effd82d61c608a35eec0097426cdf258ca55342af68e93cfbeb9f61 +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..c8e6cd7 --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38b50e44597d8c2810fe54afeef08485013edd3d3127b1c0f36d38c1939a17c +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..077183a --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937000bb823d01186354682e866a04f314abfa95e605f63f6d7e7c066e9a5e95 +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..f30482e --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbee932b07fc0cc830923f7456bdb8b0749469763059bdb63337eb365669f965 +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..cf6fa19 --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab79acaaabee3828e3ad4e44092e844d6903c4d8effc5cc7ad0fa56cac5d6107 +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..970937c --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b36e030cd9d851f5fe1ac4172c81d489dd700db5935b01c264659ecfcb86c6 +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..03cdb30 --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a21587ff81906be1f34dc84c7819b85cab8b2b432b0a1663f99f73326ca9541a +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..72abfee --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c92d0a6a742f7d3e73e170097db93b9dd00e9747a55974ee6b75f174c6c7e6 +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..7f51dcb --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e8058e801b0d75e191af979ab5981d7311ce1c7f9211e309ccf185df7b41e6 +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..940bc14 --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ed5013d25112699ee696784132b10f6c3bfdac6ade10851a705c6f14349037 +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..44112aa --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd87a3a74d7fdc10d4786c2b75a3eccddf824d722072be0b33de5a704df69a6 +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..0fd7c44 --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788e72c3723dcea702a23dbf6d361672cbf56845b96d5275a1c453943151f864 +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..118e684 --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7ac9872680d5bc5cdf9894fe7b7245acb3b51588c4d88cb8ffb3d5a5795a4c +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..325e80a --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a897d997cca2a5604d75f80a841a34473131d3453a1925099547fa5cf9722597 +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..44d71cc --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593f58d6e3af3474e7569b483d45c5ff38e78bda3218d183730895cba391ce73 +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..86a325c --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d985e470c0cce21ed184a39134b550f8c0450287c342c8806aeb007b50bc4c +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..ca39849 --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af27fd5969c9865c6e008a2d55a66f0ee10852d4e50bc0a6ca753d5c28bf213b +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..adc22ea --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a905c48aab61edce513e5aa8beacb7ce2bb9ae1a1fc38715dea03bf209360c3f +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..7179a20 --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7000cf1b250cb7b637ecf78a4531b8eac1d86a8822c066606ebea274ae335aa4 +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..04931ac --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f39aa4826a3796904bce3d6f3085d86d538214df3e9c26517aeb8b686aace39 +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..e400b00 --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c81bc1354a288c37f5ecc2b04e7359ea91cccf67afedfc3dabfe3b08d688c63 +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..1f4f038 --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8983876adc99c0baba75ec15780f617f36f013d732d68ba245ed76def905d8ef +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..9e5c8ea --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7052df9fbfedb3724637e037a345104ad0eaca71f55bdc6006bb82740c6e7d2 +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..12cfd76 --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b279d6a448a78cd8b576a9462edde339b0e44feff33471908d24d4753021e4 +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..a9d1c61 --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d2642782603bbd73e5dccd5796684572a9ceb19d34f622eb228b1f8c0bf0cf +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..61f3da5 --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c61672ba68e881564dca72aafcfc070b5db6b55a50ba3e7f863f862927ff506 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..464e1a6 --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4838b9d870589cb275e5e8dd5bb7b437b5534653ec15e82ae7661a8aeed0427 +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..652de4c --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1ccee8ed9bb905288d79276cc6c61971dea62fa9abae6505b356ccd1f0c6eb +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..db52114 --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:063fc11797c7b8d69b996f465c8693c84ad77d765fc06a94a38fd80b85454018 +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..5549053 --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f17860449f53767d7713d432c870aa4b18ef325a7965bda3f36c89bf328160 +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..d07197f --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2931392f4b01a869bec26b93e9a55f0e6a387fe0744e5520e8530787c7924a3 +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..f6a9b47 --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:740e768824e9810ce447b57e6ae926a7ae47a07c73330e46869797e3306768b4 +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..b2e8e4d --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f24a2928a18c7c73a6756db93115dd870c4c62186a4d8181b4ea7247ca2f9a4 +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..2e18034 --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:516153447376f44d3636564d673eaf4038e65a3daf21d8ab07258a84fffb9eeb +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..7678557 --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6157daec9d8ace8b777a232090fe2e313fcd59c7991476a098fdb7ec91a4e49a +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..f542545 --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63dd09fa36e3a31722eacce4b41da5341e52cabe36f0919744cd4312a555dabc +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..aa3f074 --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc801f8ed8dc2286e11914ffcad588bdaf4a096724ea4f62805087f143156889 +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..528fd74 --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e865d0f27153022f7419e6f18b733f145996c6998e8f16a25eb3558ff26eb576 +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..334aaec --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72981b5b57c1eb90cc4c52b31deba00071e42142686ea59682549d53a1a93141 +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..dc42a43 --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e820d755f9259c97c24d3e8071eac62c2cdcc7458c2b6bdbb5b95573047742f +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..e41d1c7 --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ddffadebcf1e85cf7415397d616c280b2e44f2e3dcafb0c72432594d460eb5b +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..9639b53 --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9eb8811969164b104a829e4a8fb4cc280b6f557d17951f7a10333dcc71e0e6 +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..13379ac --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e304453e3bd8a96e13dc258d71661682f8f8876823c6a940b4226c124e5f5a28 +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..93b24a8 --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c120b3a12ae310714cb8ed59deedefaa3b31d0fd5f35d31c92cb80a510732ef +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..e2d1835 --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505cfa8347cec36ba76dbc7516aadc53c5e01134628130bb7c1793c8ed4dc32d +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..b633f6d --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f138b12f3ac208cb120406ddfa1eb7323c5879685212fcf05c02b5c88e904d +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..b29b925 --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7fe35e670783747207d9fd1840a4c90ec4bf0110f6561835c25b1990acae2f +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..40327e4 --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449735a7aea741ca16ea4be1f70ed3c4efde3d0cf2157c50f52043ffb34b0b4d +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..b06186c --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3716e7f30f4342ee6e21025cbc8fe382ff070dd228354309b993a5f6a20ee35 +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..6b5d9a7 --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44dfd3ee2fb1cc3cc08cd6fd1d4ee4960e47bdb61186a6f5090b46e9ccc66f34 +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..95d3910 --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ec9077e700c86db23b199b07520f18e270704154e0a3717e0e1f56d1b560d0 +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..844b5fb --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e91f39403b774766cb435c71afde252771636c9c5a36c9b1e8d8bded39e0a9 +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..166ddd5 --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f77d5ef1e061f695ee58831a70cf35235d3e8560f2fffc0eb0ecfd7d4b0c93 +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..4a7e5b2 --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3e4e04f2bbf1c7ea3485ec877b07334c3f9f62bdadc405bb10e039a0181ed0 +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..3a22fa1 --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7faf0358987e73b1096cad0d68039325ead66a881e9786cd65202369a2538d +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..0be54e6 --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322e3c60b142f3a49f63ab56e9b050dfe27cc9c458a66dcc365b0b756b47f221 +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..6c10bb1 --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fbf2f039455b8d85af513f5ed826011c10372a098b8d8d3b881d6924acdfc02 +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..c0a3963 --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f966ebb8fa4788cbab261f7099fe80aec330174e8871d19c56be323eb9e7b279 +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..a5196d8 --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bde4a185649a667060ec6d6562685eb72b7c807f6bbee85230985ed432e7485 +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..2ee82fb --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c580afb26deebf0f5aec64c0a7b682b38709ada04736e3add9a9905909de01b +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..274515a --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c162c43796d9954be310209c466e25c1e4ac885c933208b15184f98c217d6dd +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..1fddc9c --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b1ddd245f8a4993eb61bf8a89f0295d99e3665fffa53c06dd758081d7bc3c3 +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..4b3265d --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7672dffc2450b5e077be0731106dd04dad0ab7b2b3ba68d4edeccace2c67c329 +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..56fc881 --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec3528115722b138f1ddf3526c2e3909126a260555e17e03b208810a99af81c +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..af50977 --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d965b472416a1dfc243aeb21683ddd4322431ec15540eed60b3760c5250777d +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..c45bc1f --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b128ead7fd7f097211705233c4b58922f1040b6847f111a82289fcc5bf9d71 +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..7130be3 --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c272af56131d8d8cd4bd24faffe6bcb3b7836cbc8e871f81c52baf6ef1e779d +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..1d16989 --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af67beee872a8e9bc11e088be9509d386aac5af58bcb7a48b53405c60f0fa53 +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..31157ec --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a94c8747471b82ffba10676778ea4adcb86dc19e4a22a0f6f27a8c6372976ea +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..aee5d25 --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e1b4f1b7420c0c06775bfe7f08ec64bb9a405897ce4f78836d610f94776413 +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..ef74838 --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18660de208a0772d7579d2d6675718daf3b682eb84ebc027ad273919f5248246 +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..d812359 --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1d33c6328202c9dcd907ad9a6a846e533d7fd34d83147ddfb3d1d1fa2acb4c8 +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..2edee9e --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e96d7fa460c58d919d90e75a233c5547d3cdc59b072fc021b6c307cd9eee06 +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..38376b4 --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3ad8008665e89340e670d130d8fddbfddb10043bfcf535f6f02dc5af0fd4e3 +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..80bb3cb --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a07f0ede8a7f1bdde1e2ec5faa71d6235ba49f6fd0b51a7a71093337f810b1 +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..e979c87 --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca7b64de183f9ee49529a7f125d696ba7f2f09d15a8d36e3f1512b6d03a8f03 +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..5468d73 --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a40c8faed955e1a9b6e1d55af8fbc921a15b02ffb876f6ed3fa65e777d9edb3 +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..fc9733c --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34deacd2507f4c520709cebdbd601addc3c4ceb2f7ddba58794ab5e12cb11d9c +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..1fe6143 --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd67568395a68e297701c202d8137eba2c21bb510d8f4aa9a48fedad81f011b +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..3bd8f3e --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df5d2652415bc02cb885c5cb84d9e5349d3e5f2efff2c3f5165be8e30b47e9c +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..e86ed72 --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c106d99ef327534e1bfb17e0ee2af6df757671de2f48b844fc8dc81b387c2e +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..2cb936b --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8e4984a8026e54003e27692904e48574015b4a5b470a90675552838dd7fa39 +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..4a6ab0e --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f781cf4a2a3fd3e47a939ad7078ebd2fcf035c5a810fc7c4e659efe64b2283b2 +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..88e5170 --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac0446f013eaa9d11ae72737fd2ca8936bac08442e8a4de68267f9994ce04fc +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..35bebe1 --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37cd907389bd1625560ab0e578d81fd3beb9dc881a8c28c820f51dc762cc8362 +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..aec1f8a --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd318040017d40f849bdb3ef8ae39f93bee2354f1489863fa97ea3d06605f04d +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..059b1c2 --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5069cb9fff710f10d7aa984033ec85335f08a72a5b95459564da8120b463229d +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..ab79441 --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfaf6c2f833de0e174735a3d763846799a13a06fcad0e39f06457a903c023f7 +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..abdfed7 --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1efa125c642e95d4791b5e323166c6086f36368bebb1460a145128e64decd467 +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..b4fd3cf --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5577a1a6bb60265142e335d177fa15113e309da3d55ec8f251a99ae67e3ea8 +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..24d289c --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d46720078a351763c7a98d7d2e042cbf5e0548ba7fcd611ae7d7a44642d293 +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..774cf91 --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b4ec1b3d5140b5f45bc4a80dd88676637398ec87a0e47659caf83cae28fc08 +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..1873cf0 --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea062daba51c29e9256697c4d2e49f1460399faea9bdaee3130a94c6d0451a15 +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..b70507b --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895fee578e19ebe3da3bb2ecbe33828133430392684ca22f138ebf4144c00e2a +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..119fcc9 --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8797be72bacefbfeeb9b4926896625bb8dfbc9264bd10cf8858f23637d1a7ae1 +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..6ff02b1 --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a490bfe9e90b8611bcbbf5bdf1fa8caf7ebe0cc64a89dd66ddf3150813de7d3d +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..f0abc83 --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710255e551dc7c8182a700352b52daa1ae622bc96bb31a7139c893210121690a +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..8e0c4fb --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8021475a5e9493f2c0be5bc760f24020b9c6a9e04ed00f56557dd8933e4a68ac +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..dd5ff19 --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020a2d6fb880d377d638c3d25526ffa03a49c1fbaa8652919c985cb549f1babd +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..611c285 --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4669ce2b52b56961f6b7fd6538374d6753adb2dadc3cdbbbcfdbfded530bc5d2 +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..b1a12a0 --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c617d0eebb05c4ece61308a46ab5198b489fd608b6a3d60689c3c9dfad900d01 +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..26cf489 --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c3af45e711a9d43b1576cca63b26fdacfd4f160afa5e849fdb8111b3c7707a +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..e254151 --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6545300778016beb617b1c864b23948f52ee2cfa34c21a7ea3099c2961c41ba0 +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..f301acd --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a2dc4d26ba44cd75947702e732344021dcce6178e3e5dd8e5d64bcf79cf35f +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..7ff3c59 --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471a1c64b4189bbd55eca3320720f2d97c388348062ab69225b9ee9a786c1183 +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..41464e9 --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a85a99f2df85247ab0c994e587e0ea47f9c4cd5b159cc02ebe3dfd6380fb7b +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..0111de0 --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa1d58444407e2cd72034339161817d8cdd5c304a50c1f0265070b79c42e447 +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..8c7bb4a --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c5c07ff3a824ab9ba8f0cac90c160caa59091eab87ac6095abf4eed21258af +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..84fc3d9 --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6e26342e2ad593b89d5de2be35d4b8e2c928704bf5acb414b49507ae561b5e +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..06e6bf2 --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25bf3918abff8098851821d579a19e60f989ea7054592e30aa47fc94e0a458d +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..471c63b --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56bbc774791de70232aa50c04c541d8df2e60065748e885ac7b2cf790b6d2be0 +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..2d245d5 --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9913b8854c4c1639cbe04663f43fcf6a46d0d7dc0f92b56fe118fa68215bb6d +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..6f81bfd --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c62f6ea4e96a80ea7de532aef5383f25f709a212fbb4b320d162fe59afb3dc27 +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..51dd6a6 --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a116d28e5e8da007e766bc702f2f3077431a0f1935fb76a2572f3f55e84c00 +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..9ea6f73 --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420e0828caa0fef828ade6534575e43e3f827ca312cc2ba41e8927062dfa89d3 +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..ea53df9 --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18af0da1ff67396567a2265eb7b2772af9190752cafe239fabc26157bbfd8c08 +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..42bc5b0 --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7281433212f719d50ea7576ce77502e03d3fce02824b2fa44ef2cedabbcaab +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..5d31fc1 --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fce94258c6e7e63d54c45682ec7217eabdf6b337664fbd44c495acd94184c24 +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..c5670ab --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cffe1b8b7b10d4eb3b941b08185b885368aad92f2e5afc58ff72d8d0c27acf08 +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..93f2745 --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27ecf6c4693d699018ad81cea47adabbdc0209a413ee3bef7012cd8613fd9a2 +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..2fbc633 --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b4d8b71477291e9ae3aa60a10d617765c39125724569c541c7adaf91ececbbf +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..37fb686 --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c517ec2e6deff112f8f4f564aea8eb0707c78566f18816e8b7396f0cc2fd937 +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..138aa83 --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:550377b4eb826c54e3ec8697e1d814f8cb149e07cc930542ecf336d1356b4161 +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..792ac52 --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a994ce3755b9156b6dbd76ff52bbbed035568feb57f12b4ae75680db00298041 +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..ae08cc4 --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ffd861888b85bfec5ae93c3655a5821da03eff305f5605d50c0816ca312494 +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..12c3e15 --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce709109c557889fa97a2a98a09e192f773a8520187328505b8cf5fb513b2484 +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..784ed40 --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445bb4a9226dba92e50d11edbc6980ed3fca06fe7864b55605e7e73847415fd9 +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..cb3e83f --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7638ea3a0d289dc205c7ae56f54ef7f438ee6a070e464da039afebd830d8b0e +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..9366da5 --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706d8cb02195e49d5062c863d3412da8c1768d2e6fae97ea33e18103b8230fb6 +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..dea1593 --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1c9dd565e55b06aaa6be18199657b173cd052c854d891fbf1e24bcefca6c41 +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..0f8af67 --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91cbbfdf6322c26decd59fb30de9e6a76c549b0732d9d2e0785d59c3e4c246e +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..3e6591a --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f952f7732b43dbfb3885ef139251a3d4ef34f7c205ede9e044a1fab1296e294a +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..408a9ef --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a22d5ceb9b036ad39c92888e07eb925a715f75449d65f3c44734b9feb57ec0 +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..0e88f4a --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb25daaf5c752c2a13bda3605096a5857a05caa253f1a8f41f5dcb0bbca046a +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..4bb77b6 --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abaa84879f807d765ae6083c46fe14b443c727116fa49fd388d8ec4d689e6242 +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..50f598c --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777e367aa8f8cfab3cd10cc6c4c4445d7d17ff32512247b8eaa3915539592c42 +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..6957125 --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f3fb1f7d5a04b5da76b9fd7b4882466273f0381abbf77b1fc65613da6cc1ee +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..66a8103 --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d28139c3cf4a551b2b19f19fe5e0feb20ca206fb7c6d0234a799f3b4630dcd +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..98425af --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aac6f73034ce166e506c19bb7753d564dd6515ee25c4f136117e9ca2a5f24321 +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..8dc2230 --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6ac228432a9571549cfff5a99e22ef04a3a6a7cc7c25374bf898d72299d2f2 +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..fd54c57 --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c012548766ae5deddc24091a7c472622a127868cd75793d0230957a1bb683277 +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..f1a937a --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c0184bc8d298b3b08ae89df702d1199eaf17abe8a81e6726e674d2c3169145 +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..c3a8b03 --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:093781e46df02f5387a7f3b66570271d1707826d3ac74bc6937a16e761279571 +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..cd19dcb --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0936f16e42c02ae3d8074b28667616a64a95562850e89094d457215ecbf006 +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..c02979e --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04774d94f03dfccd821affe9deff106f90bc4f041f14cc6d66980c6276513757 +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..19275e6 --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02338d5c9dbc1b228bca7707bc67808135cce1d612461e4d87935e6d62cedda3 +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..edf7693 --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1600667718e1dddc1cd0c9f1622062a160d0d8c101559cc1b7959bd9566fe7 +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..3639bee --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2de278bd5ca3cf7312625574331399fab572a20ecec12ef94074b7e46150446 +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..f65b7d2 --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f402f25bbb62b9c3048a87be9d7555455294f739f25095a55c81f887202c3fde +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..cae51b6 --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53fb4479d64a077e022d825a03ae524cd0da1890ee0ff8fcb117126075a2541 +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..8ee48ec --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf18a5dc2557a29246997d0936b47a4fa1a2c6f2262600dcd4c6f805a908dcd6 +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..f3fc2f0 --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0d7e6852c2b3bbe80b59b9f1b5eb62383db8dddb5652539686bdbc14e2b42a +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..762af13 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f1eb3b222dffc96a88f6f133ee875f78ee7d6c2b72948975652f56c4495418 +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..dfd7a92 --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42849235ddc9d705b0931305f087109b285b1cde4ae9189c680df25bac28b84 +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..cec3e9a --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f69b625fda4e5e121238f414ca43d5c2e856fa1e9ff95e1d6ce38a3c038cca4 +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..bc9b522 --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a1eb86b4f965eb01bfdebe2be2cd29a32a5e84d34188b4402e0d6db3491fa7e +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..9fc1c90 --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f4d8cf8a8cad2f71ea5c8695c371f2048852db0e96bc226ee96f323b7f53f89 +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..331afb0 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ac2bbce7a52b91de507826e1c6468713071c5ab88ccb2a7df46ab5c4490fc7 +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..655ec3e --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388f0ac70a43599aaee4b5d2ac1cb1deb0c58591d80dc836df34b444d0e02c6f +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..7783322 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e512d3eb05b10e1b6d97c87c6e8bba9c6e3a9eb408b85a6b3073aae09dbf1f22 +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..b8d86d2 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902f270ad9cf1abbf63fa4dfb1b0495d7e9e813f071dc5fd516da32fb72a6917 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..8e00ae9 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242623f90c02514141a42b5ba6b976948282a0fe2ebd3c43df76485403ae13bc +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..e10a949 --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642b90db82fc7d559128209ad9364463cbdd2615fcef309a8d003166a7b7565c +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..bc18de7 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12832ea05b67d85574bbeea0372d18e7ac9463833dac69d58699d231003b22f +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..221be7d --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 0.999244142101285, + "total_flos": 0.0, + "train_loss": 1.1443162902220294, + "train_runtime": 1702.0491, + "train_samples": 42336, + "train_samples_per_second": 24.874, + "train_steps_per_second": 0.388 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..5ce8def --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,9990 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999244142101285, + "eval_steps": 300, + "global_step": 661, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0015117157974300832, + "grad_norm": 2.8220584392547607, + "learning_rate": 0.0, + "logits/chosen": 0.15524622797966003, + "logits/rejected": 0.17812994122505188, + "logps/chosen": -72.87983703613281, + "logps/ref_chosen": -72.91278839111328, + "logps/ref_rejected": -47.538291931152344, + "logps/rejected": -47.55305480957031, + "loss": 1.3863, + "margin_dpo/margin_mean": 0.04771292209625244, + "margin_dpo/margin_std": 0.24055811762809753, + "step": 1 + }, + { + "epoch": 0.0030234315948601664, + "grad_norm": 2.786863088607788, + "learning_rate": 7.462686567164179e-09, + "logits/chosen": 0.13265366852283478, + "logits/rejected": 0.06028885394334793, + "logps/chosen": -47.69560241699219, + "logps/ref_chosen": -47.742698669433594, + "logps/ref_rejected": -54.36896896362305, + "logps/rejected": -54.41385269165039, + "loss": 1.3859, + "margin_dpo/margin_mean": 0.09197819232940674, + "margin_dpo/margin_std": 0.19671888649463654, + "step": 2 + }, + { + "epoch": 0.0045351473922902496, + "grad_norm": 3.1143651008605957, + "learning_rate": 1.4925373134328357e-08, + "logits/chosen": 0.12916997075080872, + "logits/rejected": 0.0313495397567749, + "logps/chosen": -57.6026611328125, + "logps/ref_chosen": -57.59052276611328, + "logps/ref_rejected": -123.58332824707031, + "logps/rejected": -123.56676483154297, + "loss": 1.3866, + "margin_dpo/margin_mean": -0.028696417808532715, + "margin_dpo/margin_std": 0.30107247829437256, + "step": 3 + }, + { + "epoch": 0.006046863189720333, + "grad_norm": 3.436974287033081, + "learning_rate": 2.2388059701492534e-08, + "logits/chosen": 0.11005942523479462, + "logits/rejected": 0.08743590116500854, + "logps/chosen": -72.13690185546875, + "logps/ref_chosen": -72.2152328491211, + "logps/ref_rejected": -72.17367553710938, + "logps/rejected": -72.15191650390625, + "loss": 1.3864, + "margin_dpo/margin_mean": 0.05657494068145752, + "margin_dpo/margin_std": 0.35384583473205566, + "step": 4 + }, + { + "epoch": 0.007558578987150416, + "grad_norm": 2.9546422958374023, + "learning_rate": 2.9850746268656714e-08, + "logits/chosen": 0.1004139631986618, + "logits/rejected": 0.015552863478660583, + "logps/chosen": -56.2198486328125, + "logps/ref_chosen": -56.174278259277344, + "logps/ref_rejected": -111.51732635498047, + "logps/rejected": -111.60870361328125, + "loss": 1.3863, + "margin_dpo/margin_mean": 0.04580581188201904, + "margin_dpo/margin_std": 0.2617151737213135, + "step": 5 + }, + { + "epoch": 0.009070294784580499, + "grad_norm": 2.960731267929077, + "learning_rate": 3.731343283582089e-08, + "logits/chosen": 0.09757982194423676, + "logits/rejected": 0.021553121507167816, + "logps/chosen": -33.81324005126953, + "logps/ref_chosen": -33.68452453613281, + "logps/ref_rejected": -102.40830993652344, + "logps/rejected": -102.35445404052734, + "loss": 1.387, + "margin_dpo/margin_mean": -0.1825714111328125, + "margin_dpo/margin_std": 0.29179731011390686, + "step": 6 + }, + { + "epoch": 0.010582010582010581, + "grad_norm": 2.716484546661377, + "learning_rate": 4.477611940298507e-08, + "logits/chosen": -0.05511504039168358, + "logits/rejected": -0.10139390826225281, + "logps/chosen": -36.465389251708984, + "logps/ref_chosen": -36.51213073730469, + "logps/ref_rejected": -55.43703842163086, + "logps/rejected": -55.4085578918457, + "loss": 1.3864, + "margin_dpo/margin_mean": 0.01826620101928711, + "margin_dpo/margin_std": 0.24058791995048523, + "step": 7 + }, + { + "epoch": 0.012093726379440665, + "grad_norm": 2.8784360885620117, + "learning_rate": 5.223880597014925e-08, + "logits/chosen": 0.09966567903757095, + "logits/rejected": 0.06772118806838989, + "logps/chosen": -91.59811401367188, + "logps/ref_chosen": -91.63668823242188, + "logps/ref_rejected": -115.22460174560547, + "logps/rejected": -115.18936920166016, + "loss": 1.3864, + "margin_dpo/margin_mean": 0.0033463239669799805, + "margin_dpo/margin_std": 0.3163236379623413, + "step": 8 + }, + { + "epoch": 0.013605442176870748, + "grad_norm": 3.1453640460968018, + "learning_rate": 5.970149253731343e-08, + "logits/chosen": 0.08192189782857895, + "logits/rejected": 0.07255452871322632, + "logps/chosen": -91.2304916381836, + "logps/ref_chosen": -91.08434295654297, + "logps/ref_rejected": -78.1643295288086, + "logps/rejected": -78.20406341552734, + "loss": 1.3863, + "margin_dpo/margin_mean": -0.10640597343444824, + "margin_dpo/margin_std": 0.37841445207595825, + "step": 9 + }, + { + "epoch": 0.015117157974300832, + "grad_norm": 2.9692389965057373, + "learning_rate": 6.71641791044776e-08, + "logits/chosen": 0.1450178623199463, + "logits/rejected": 0.08948463946580887, + "logps/chosen": -67.3464126586914, + "logps/ref_chosen": -67.3323974609375, + "logps/ref_rejected": -115.37638854980469, + "logps/rejected": -115.22101593017578, + "loss": 1.3866, + "margin_dpo/margin_mean": -0.16937530040740967, + "margin_dpo/margin_std": 0.2835836708545685, + "step": 10 + }, + { + "epoch": 0.016628873771730914, + "grad_norm": 2.9334216117858887, + "learning_rate": 7.462686567164178e-08, + "logits/chosen": 0.06959305703639984, + "logits/rejected": 0.031854622066020966, + "logps/chosen": -48.15754699707031, + "logps/ref_chosen": -48.158531188964844, + "logps/ref_rejected": -66.28083038330078, + "logps/rejected": -66.29977416992188, + "loss": 1.3861, + "margin_dpo/margin_mean": 0.01993393898010254, + "margin_dpo/margin_std": 0.2706565260887146, + "step": 11 + }, + { + "epoch": 0.018140589569160998, + "grad_norm": 2.821261167526245, + "learning_rate": 8.208955223880596e-08, + "logits/chosen": -0.059022121131420135, + "logits/rejected": -0.055310823023319244, + "logps/chosen": -68.31194305419922, + "logps/ref_chosen": -68.27070617675781, + "logps/ref_rejected": -66.15010070800781, + "logps/rejected": -66.14984130859375, + "loss": 1.3862, + "margin_dpo/margin_mean": -0.04150247573852539, + "margin_dpo/margin_std": 0.30395328998565674, + "step": 12 + }, + { + "epoch": 0.019652305366591082, + "grad_norm": 2.898451089859009, + "learning_rate": 8.955223880597014e-08, + "logits/chosen": 0.07420723140239716, + "logits/rejected": -0.013622896745800972, + "logps/chosen": -55.22724151611328, + "logps/ref_chosen": -55.27293395996094, + "logps/ref_rejected": -116.98251342773438, + "logps/rejected": -116.99977111816406, + "loss": 1.386, + "margin_dpo/margin_mean": 0.06296110153198242, + "margin_dpo/margin_std": 0.41959983110427856, + "step": 13 + }, + { + "epoch": 0.021164021164021163, + "grad_norm": 2.963911533355713, + "learning_rate": 9.701492537313432e-08, + "logits/chosen": 0.1168874204158783, + "logits/rejected": 0.0010670926421880722, + "logps/chosen": -54.75418472290039, + "logps/ref_chosen": -54.73517608642578, + "logps/ref_rejected": -79.82144165039062, + "logps/rejected": -79.80023193359375, + "loss": 1.3859, + "margin_dpo/margin_mean": -0.0402069091796875, + "margin_dpo/margin_std": 0.21686886250972748, + "step": 14 + }, + { + "epoch": 0.022675736961451247, + "grad_norm": 3.3150715827941895, + "learning_rate": 1.044776119402985e-07, + "logits/chosen": 0.09902211278676987, + "logits/rejected": 0.0021891482174396515, + "logps/chosen": -47.44508361816406, + "logps/ref_chosen": -47.35077667236328, + "logps/ref_rejected": -98.93363189697266, + "logps/rejected": -98.7593994140625, + "loss": 1.3867, + "margin_dpo/margin_mean": -0.26853013038635254, + "margin_dpo/margin_std": 0.3253557085990906, + "step": 15 + }, + { + "epoch": 0.02418745275888133, + "grad_norm": 2.685232639312744, + "learning_rate": 1.1194029850746268e-07, + "logits/chosen": 0.13864967226982117, + "logits/rejected": 0.09606489539146423, + "logps/chosen": -37.010860443115234, + "logps/ref_chosen": -36.986331939697266, + "logps/ref_rejected": -55.56471633911133, + "logps/rejected": -55.6058464050293, + "loss": 1.3861, + "margin_dpo/margin_mean": 0.016601920127868652, + "margin_dpo/margin_std": 0.2442459762096405, + "step": 16 + }, + { + "epoch": 0.025699168556311415, + "grad_norm": 3.2944958209991455, + "learning_rate": 1.1940298507462686e-07, + "logits/chosen": 0.06346956640481949, + "logits/rejected": 0.028996460139751434, + "logps/chosen": -53.554481506347656, + "logps/ref_chosen": -53.60247802734375, + "logps/ref_rejected": -77.51017761230469, + "logps/rejected": -77.53253936767578, + "loss": 1.386, + "margin_dpo/margin_mean": 0.07034468650817871, + "margin_dpo/margin_std": 0.30959659814834595, + "step": 17 + }, + { + "epoch": 0.027210884353741496, + "grad_norm": 2.7822513580322266, + "learning_rate": 1.2686567164179106e-07, + "logits/chosen": 0.08662936091423035, + "logits/rejected": 0.05197212100028992, + "logps/chosen": -43.53667449951172, + "logps/ref_chosen": -43.651275634765625, + "logps/ref_rejected": -70.06555938720703, + "logps/rejected": -70.07490539550781, + "loss": 1.3857, + "margin_dpo/margin_mean": 0.12395095825195312, + "margin_dpo/margin_std": 0.20322147011756897, + "step": 18 + }, + { + "epoch": 0.02872260015117158, + "grad_norm": 3.082880735397339, + "learning_rate": 1.343283582089552e-07, + "logits/chosen": 0.09144282341003418, + "logits/rejected": 0.04711594060063362, + "logps/chosen": -59.105987548828125, + "logps/ref_chosen": -59.121559143066406, + "logps/ref_rejected": -95.91956329345703, + "logps/rejected": -95.91903686523438, + "loss": 1.386, + "margin_dpo/margin_mean": 0.015046358108520508, + "margin_dpo/margin_std": 0.3638499677181244, + "step": 19 + }, + { + "epoch": 0.030234315948601664, + "grad_norm": 2.926210880279541, + "learning_rate": 1.4179104477611938e-07, + "logits/chosen": 0.12322017550468445, + "logits/rejected": 0.061798982322216034, + "logps/chosen": -51.08586120605469, + "logps/ref_chosen": -51.031883239746094, + "logps/ref_rejected": -96.53536224365234, + "logps/rejected": -96.57363891601562, + "loss": 1.3865, + "margin_dpo/margin_mean": -0.015711426734924316, + "margin_dpo/margin_std": 0.19928568601608276, + "step": 20 + }, + { + "epoch": 0.031746031746031744, + "grad_norm": 2.9375104904174805, + "learning_rate": 1.4925373134328355e-07, + "logits/chosen": -0.0577910915017128, + "logits/rejected": -0.00928708165884018, + "logps/chosen": -76.64593505859375, + "logps/ref_chosen": -76.7261962890625, + "logps/ref_rejected": -53.63877868652344, + "logps/rejected": -53.602439880371094, + "loss": 1.3867, + "margin_dpo/margin_mean": 0.0439220666885376, + "margin_dpo/margin_std": 0.23736098408699036, + "step": 21 + }, + { + "epoch": 0.03325774754346183, + "grad_norm": 2.896641969680786, + "learning_rate": 1.5671641791044775e-07, + "logits/chosen": -0.03439096361398697, + "logits/rejected": -0.11815059185028076, + "logps/chosen": -36.69358444213867, + "logps/ref_chosen": -36.67463302612305, + "logps/ref_rejected": -58.37761688232422, + "logps/rejected": -58.41578674316406, + "loss": 1.3866, + "margin_dpo/margin_mean": 0.019217491149902344, + "margin_dpo/margin_std": 0.21051761507987976, + "step": 22 + }, + { + "epoch": 0.03476946334089191, + "grad_norm": 3.29414963722229, + "learning_rate": 1.6417910447761193e-07, + "logits/chosen": 0.2472844123840332, + "logits/rejected": 0.1739306002855301, + "logps/chosen": -88.78631591796875, + "logps/ref_chosen": -88.72676849365234, + "logps/ref_rejected": -151.3695068359375, + "logps/rejected": -151.45941162109375, + "loss": 1.3865, + "margin_dpo/margin_mean": 0.030363917350769043, + "margin_dpo/margin_std": 0.18649427592754364, + "step": 23 + }, + { + "epoch": 0.036281179138321996, + "grad_norm": 2.626323938369751, + "learning_rate": 1.716417910447761e-07, + "logits/chosen": 0.10805254429578781, + "logits/rejected": 0.052521951496601105, + "logps/chosen": -46.81217575073242, + "logps/ref_chosen": -46.81597137451172, + "logps/ref_rejected": -77.96964263916016, + "logps/rejected": -78.01282501220703, + "loss": 1.3866, + "margin_dpo/margin_mean": 0.04697418212890625, + "margin_dpo/margin_std": 0.1974717229604721, + "step": 24 + }, + { + "epoch": 0.03779289493575208, + "grad_norm": 2.9760308265686035, + "learning_rate": 1.7910447761194027e-07, + "logits/chosen": 0.027542775496840477, + "logits/rejected": -0.008236430585384369, + "logps/chosen": -66.06083679199219, + "logps/ref_chosen": -66.00652313232422, + "logps/ref_rejected": -94.2716293334961, + "logps/rejected": -94.29405212402344, + "loss": 1.3861, + "margin_dpo/margin_mean": -0.031876444816589355, + "margin_dpo/margin_std": 0.2860802412033081, + "step": 25 + }, + { + "epoch": 0.039304610733182165, + "grad_norm": 2.988525152206421, + "learning_rate": 1.8656716417910447e-07, + "logits/chosen": 0.16524234414100647, + "logits/rejected": 0.14268483221530914, + "logps/chosen": -67.2355728149414, + "logps/ref_chosen": -67.12877655029297, + "logps/ref_rejected": -115.98387145996094, + "logps/rejected": -116.02112579345703, + "loss": 1.3867, + "margin_dpo/margin_mean": -0.06954813003540039, + "margin_dpo/margin_std": 0.4333837032318115, + "step": 26 + }, + { + "epoch": 0.04081632653061224, + "grad_norm": 2.903107166290283, + "learning_rate": 1.9402985074626865e-07, + "logits/chosen": 0.1587177813053131, + "logits/rejected": 0.14436712861061096, + "logps/chosen": -45.9591178894043, + "logps/ref_chosen": -45.86199188232422, + "logps/ref_rejected": -56.50844955444336, + "logps/rejected": -56.52033615112305, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.08524084091186523, + "margin_dpo/margin_std": 0.2513440251350403, + "step": 27 + }, + { + "epoch": 0.042328042328042326, + "grad_norm": 2.764413833618164, + "learning_rate": 2.0149253731343282e-07, + "logits/chosen": -0.02600286155939102, + "logits/rejected": -0.024466849863529205, + "logps/chosen": -100.1274642944336, + "logps/ref_chosen": -100.07420349121094, + "logps/ref_rejected": -98.97936248779297, + "logps/rejected": -98.93499755859375, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.09762585163116455, + "margin_dpo/margin_std": 0.4535467326641083, + "step": 28 + }, + { + "epoch": 0.04383975812547241, + "grad_norm": 3.2600700855255127, + "learning_rate": 2.08955223880597e-07, + "logits/chosen": 0.09348219633102417, + "logits/rejected": 0.09450555592775345, + "logps/chosen": -77.15126037597656, + "logps/ref_chosen": -77.19854736328125, + "logps/ref_rejected": -73.5864028930664, + "logps/rejected": -73.67513275146484, + "loss": 1.3857, + "margin_dpo/margin_mean": 0.13602125644683838, + "margin_dpo/margin_std": 0.40513163805007935, + "step": 29 + }, + { + "epoch": 0.045351473922902494, + "grad_norm": 3.0433361530303955, + "learning_rate": 2.1641791044776117e-07, + "logits/chosen": 0.15750174224376678, + "logits/rejected": 0.1133815348148346, + "logps/chosen": -59.61478805541992, + "logps/ref_chosen": -59.58109664916992, + "logps/ref_rejected": -81.88029479980469, + "logps/rejected": -81.91694641113281, + "loss": 1.3854, + "margin_dpo/margin_mean": 0.002964496612548828, + "margin_dpo/margin_std": 0.1755043864250183, + "step": 30 + }, + { + "epoch": 0.04686318972033258, + "grad_norm": 3.4319612979888916, + "learning_rate": 2.2388059701492537e-07, + "logits/chosen": 0.1796724498271942, + "logits/rejected": 0.1043396145105362, + "logps/chosen": -69.11215209960938, + "logps/ref_chosen": -69.12844848632812, + "logps/ref_rejected": -135.9136505126953, + "logps/rejected": -136.04110717773438, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.14375102519989014, + "margin_dpo/margin_std": 0.2275710105895996, + "step": 31 + }, + { + "epoch": 0.04837490551776266, + "grad_norm": 3.0030133724212646, + "learning_rate": 2.3134328358208954e-07, + "logits/chosen": 0.08424107730388641, + "logits/rejected": 0.03786729276180267, + "logps/chosen": -79.7166748046875, + "logps/ref_chosen": -79.69361877441406, + "logps/ref_rejected": -119.91200256347656, + "logps/rejected": -119.93031311035156, + "loss": 1.3867, + "margin_dpo/margin_mean": -0.004748940467834473, + "margin_dpo/margin_std": 0.19554254412651062, + "step": 32 + }, + { + "epoch": 0.049886621315192746, + "grad_norm": 3.1022226810455322, + "learning_rate": 2.388059701492537e-07, + "logits/chosen": 0.05021132156252861, + "logits/rejected": 0.02479320764541626, + "logps/chosen": -55.22189712524414, + "logps/ref_chosen": -55.10992431640625, + "logps/ref_rejected": -82.52067565917969, + "logps/rejected": -82.57884216308594, + "loss": 1.3863, + "margin_dpo/margin_mean": -0.05379873514175415, + "margin_dpo/margin_std": 0.37445366382598877, + "step": 33 + }, + { + "epoch": 0.05139833711262283, + "grad_norm": 2.8541648387908936, + "learning_rate": 2.4626865671641786e-07, + "logits/chosen": 0.057025909423828125, + "logits/rejected": -0.0010375156998634338, + "logps/chosen": -52.59141540527344, + "logps/ref_chosen": -52.49810028076172, + "logps/ref_rejected": -74.43905639648438, + "logps/rejected": -74.50889587402344, + "loss": 1.3854, + "margin_dpo/margin_mean": -0.02347743511199951, + "margin_dpo/margin_std": 0.2468748390674591, + "step": 34 + }, + { + "epoch": 0.05291005291005291, + "grad_norm": 2.7856571674346924, + "learning_rate": 2.537313432835821e-07, + "logits/chosen": -0.023874841630458832, + "logits/rejected": -0.03130623698234558, + "logps/chosen": -67.71501159667969, + "logps/ref_chosen": -67.55126953125, + "logps/ref_rejected": -55.26748275756836, + "logps/rejected": -55.42961502075195, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.0016133785247802734, + "margin_dpo/margin_std": 0.33188915252685547, + "step": 35 + }, + { + "epoch": 0.05442176870748299, + "grad_norm": 2.8720510005950928, + "learning_rate": 2.611940298507462e-07, + "logits/chosen": 0.16390232741832733, + "logits/rejected": 0.08551906049251556, + "logps/chosen": -52.384647369384766, + "logps/ref_chosen": -52.303955078125, + "logps/ref_rejected": -88.28939819335938, + "logps/rejected": -88.45569610595703, + "loss": 1.3856, + "margin_dpo/margin_mean": 0.085601806640625, + "margin_dpo/margin_std": 0.35991254448890686, + "step": 36 + }, + { + "epoch": 0.055933484504913075, + "grad_norm": 3.7995986938476562, + "learning_rate": 2.686567164179104e-07, + "logits/chosen": 0.06787224858999252, + "logits/rejected": 0.008850198239088058, + "logps/chosen": -60.5541877746582, + "logps/ref_chosen": -60.51344680786133, + "logps/ref_rejected": -103.91683959960938, + "logps/rejected": -104.0807113647461, + "loss": 1.3853, + "margin_dpo/margin_mean": 0.12312948703765869, + "margin_dpo/margin_std": 0.36983656883239746, + "step": 37 + }, + { + "epoch": 0.05744520030234316, + "grad_norm": 2.977374792098999, + "learning_rate": 2.761194029850746e-07, + "logits/chosen": 0.05500081554055214, + "logits/rejected": 0.019983187317848206, + "logps/chosen": -50.67215347290039, + "logps/ref_chosen": -50.62315368652344, + "logps/ref_rejected": -60.629703521728516, + "logps/rejected": -60.82992935180664, + "loss": 1.3861, + "margin_dpo/margin_mean": 0.15122318267822266, + "margin_dpo/margin_std": 0.3122575581073761, + "step": 38 + }, + { + "epoch": 0.05895691609977324, + "grad_norm": 2.931081771850586, + "learning_rate": 2.8358208955223876e-07, + "logits/chosen": 0.2395174652338028, + "logits/rejected": 0.20490288734436035, + "logps/chosen": -45.89991760253906, + "logps/ref_chosen": -45.8764533996582, + "logps/ref_rejected": -69.75498962402344, + "logps/rejected": -69.87886047363281, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.10041189193725586, + "margin_dpo/margin_std": 0.28835514187812805, + "step": 39 + }, + { + "epoch": 0.06046863189720333, + "grad_norm": 3.1318254470825195, + "learning_rate": 2.9104477611940296e-07, + "logits/chosen": 0.18963474035263062, + "logits/rejected": 0.16616499423980713, + "logps/chosen": -39.96923828125, + "logps/ref_chosen": -39.950111389160156, + "logps/ref_rejected": -62.60026931762695, + "logps/rejected": -62.79283142089844, + "loss": 1.3859, + "margin_dpo/margin_mean": 0.1734316349029541, + "margin_dpo/margin_std": 0.2163560539484024, + "step": 40 + }, + { + "epoch": 0.06198034769463341, + "grad_norm": 3.0570766925811768, + "learning_rate": 2.985074626865671e-07, + "logits/chosen": -0.12535785138607025, + "logits/rejected": -0.12432264536619186, + "logps/chosen": -65.45966339111328, + "logps/ref_chosen": -65.39937591552734, + "logps/ref_rejected": -83.19218444824219, + "logps/rejected": -83.37960815429688, + "loss": 1.3852, + "margin_dpo/margin_mean": 0.1271369457244873, + "margin_dpo/margin_std": 0.2651064991950989, + "step": 41 + }, + { + "epoch": 0.06349206349206349, + "grad_norm": 2.934258460998535, + "learning_rate": 3.059701492537313e-07, + "logits/chosen": 0.06298904120922089, + "logits/rejected": 0.05617032200098038, + "logps/chosen": -61.81056594848633, + "logps/ref_chosen": -61.71239471435547, + "logps/ref_rejected": -81.27043151855469, + "logps/rejected": -81.32711029052734, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.04149121046066284, + "margin_dpo/margin_std": 0.3146214485168457, + "step": 42 + }, + { + "epoch": 0.06500377928949358, + "grad_norm": 3.4698517322540283, + "learning_rate": 3.134328358208955e-07, + "logits/chosen": 0.029873725026845932, + "logits/rejected": -0.042266130447387695, + "logps/chosen": -68.39386749267578, + "logps/ref_chosen": -68.25798797607422, + "logps/ref_rejected": -94.93944549560547, + "logps/rejected": -95.09349060058594, + "loss": 1.3861, + "margin_dpo/margin_mean": 0.018149971961975098, + "margin_dpo/margin_std": 0.2860589325428009, + "step": 43 + }, + { + "epoch": 0.06651549508692366, + "grad_norm": 3.261988639831543, + "learning_rate": 3.2089552238805965e-07, + "logits/chosen": 0.059243109077215195, + "logits/rejected": 0.07599075883626938, + "logps/chosen": -76.56920623779297, + "logps/ref_chosen": -76.42689514160156, + "logps/ref_rejected": -84.94781494140625, + "logps/rejected": -84.98534393310547, + "loss": 1.3858, + "margin_dpo/margin_mean": -0.10478079319000244, + "margin_dpo/margin_std": 0.2691563367843628, + "step": 44 + }, + { + "epoch": 0.06802721088435375, + "grad_norm": 3.200819492340088, + "learning_rate": 3.2835820895522385e-07, + "logits/chosen": 0.09944377094507217, + "logits/rejected": 0.05152427405118942, + "logps/chosen": -74.33448791503906, + "logps/ref_chosen": -74.13058471679688, + "logps/ref_rejected": -82.69816589355469, + "logps/rejected": -82.89010620117188, + "loss": 1.385, + "margin_dpo/margin_mean": -0.011955618858337402, + "margin_dpo/margin_std": 0.3313651978969574, + "step": 45 + }, + { + "epoch": 0.06953892668178382, + "grad_norm": 3.1459238529205322, + "learning_rate": 3.3582089552238805e-07, + "logits/chosen": 0.17175917327404022, + "logits/rejected": 0.15970298647880554, + "logps/chosen": -96.9361343383789, + "logps/ref_chosen": -96.75468444824219, + "logps/ref_rejected": -82.88079833984375, + "logps/rejected": -83.0982666015625, + "loss": 1.3848, + "margin_dpo/margin_mean": 0.03600466251373291, + "margin_dpo/margin_std": 0.3888518512248993, + "step": 46 + }, + { + "epoch": 0.0710506424792139, + "grad_norm": 2.8698384761810303, + "learning_rate": 3.432835820895522e-07, + "logits/chosen": 0.1564871072769165, + "logits/rejected": 0.1589561551809311, + "logps/chosen": -53.687896728515625, + "logps/ref_chosen": -53.5174560546875, + "logps/ref_rejected": -55.22039794921875, + "logps/rejected": -55.589107513427734, + "loss": 1.385, + "margin_dpo/margin_mean": 0.19826769828796387, + "margin_dpo/margin_std": 0.3298158049583435, + "step": 47 + }, + { + "epoch": 0.07256235827664399, + "grad_norm": 2.6883156299591064, + "learning_rate": 3.507462686567164e-07, + "logits/chosen": 0.17292676866054535, + "logits/rejected": 0.13669899106025696, + "logps/chosen": -62.340145111083984, + "logps/ref_chosen": -62.185054779052734, + "logps/ref_rejected": -74.84376525878906, + "logps/rejected": -75.03015899658203, + "loss": 1.386, + "margin_dpo/margin_mean": 0.03130638599395752, + "margin_dpo/margin_std": 0.34968793392181396, + "step": 48 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 2.750591993331909, + "learning_rate": 3.5820895522388055e-07, + "logits/chosen": 0.057202327996492386, + "logits/rejected": 0.037843670696020126, + "logps/chosen": -37.986602783203125, + "logps/ref_chosen": -37.77415084838867, + "logps/ref_rejected": -51.56128692626953, + "logps/rejected": -51.90427780151367, + "loss": 1.3855, + "margin_dpo/margin_mean": 0.13054275512695312, + "margin_dpo/margin_std": 0.2379816472530365, + "step": 49 + }, + { + "epoch": 0.07558578987150416, + "grad_norm": 2.820672035217285, + "learning_rate": 3.6567164179104475e-07, + "logits/chosen": 0.10407137125730515, + "logits/rejected": 0.08040489256381989, + "logps/chosen": -60.96312713623047, + "logps/ref_chosen": -60.664947509765625, + "logps/ref_rejected": -89.31661987304688, + "logps/rejected": -89.6517333984375, + "loss": 1.3852, + "margin_dpo/margin_mean": 0.03693675994873047, + "margin_dpo/margin_std": 0.25188928842544556, + "step": 50 + }, + { + "epoch": 0.07709750566893424, + "grad_norm": 3.1633975505828857, + "learning_rate": 3.7313432835820895e-07, + "logits/chosen": 0.11444868892431259, + "logits/rejected": 0.04659546911716461, + "logps/chosen": -48.322227478027344, + "logps/ref_chosen": -48.012168884277344, + "logps/ref_rejected": -68.38160705566406, + "logps/rejected": -68.88851928710938, + "loss": 1.3847, + "margin_dpo/margin_mean": 0.19685041904449463, + "margin_dpo/margin_std": 0.40103837847709656, + "step": 51 + }, + { + "epoch": 0.07860922146636433, + "grad_norm": 3.325279951095581, + "learning_rate": 3.805970149253731e-07, + "logits/chosen": 0.08999551832675934, + "logits/rejected": 0.05174541473388672, + "logps/chosen": -69.64974975585938, + "logps/ref_chosen": -69.34451293945312, + "logps/ref_rejected": -115.13761901855469, + "logps/rejected": -115.90428924560547, + "loss": 1.3839, + "margin_dpo/margin_mean": 0.46144330501556396, + "margin_dpo/margin_std": 0.610642671585083, + "step": 52 + }, + { + "epoch": 0.0801209372637944, + "grad_norm": 2.7831149101257324, + "learning_rate": 3.880597014925373e-07, + "logits/chosen": 0.05799449607729912, + "logits/rejected": 0.057163748890161514, + "logps/chosen": -61.22389602661133, + "logps/ref_chosen": -60.885520935058594, + "logps/ref_rejected": -51.6776237487793, + "logps/rejected": -52.1979866027832, + "loss": 1.3842, + "margin_dpo/margin_mean": 0.18198823928833008, + "margin_dpo/margin_std": 0.3276767432689667, + "step": 53 + }, + { + "epoch": 0.08163265306122448, + "grad_norm": 2.6667864322662354, + "learning_rate": 3.9552238805970144e-07, + "logits/chosen": -0.010790073312819004, + "logits/rejected": -0.014523019082844257, + "logps/chosen": -43.61071014404297, + "logps/ref_chosen": -43.147682189941406, + "logps/ref_rejected": -48.63517761230469, + "logps/rejected": -49.14527130126953, + "loss": 1.3849, + "margin_dpo/margin_mean": 0.047069668769836426, + "margin_dpo/margin_std": 0.5451761484146118, + "step": 54 + }, + { + "epoch": 0.08314436885865457, + "grad_norm": 3.2038674354553223, + "learning_rate": 4.0298507462686564e-07, + "logits/chosen": 0.09067589789628983, + "logits/rejected": -0.015168176032602787, + "logps/chosen": -45.248573303222656, + "logps/ref_chosen": -44.79475402832031, + "logps/ref_rejected": -87.80093383789062, + "logps/rejected": -88.53862762451172, + "loss": 1.3832, + "margin_dpo/margin_mean": 0.28387510776519775, + "margin_dpo/margin_std": 0.6332917809486389, + "step": 55 + }, + { + "epoch": 0.08465608465608465, + "grad_norm": 2.775723934173584, + "learning_rate": 4.1044776119402984e-07, + "logits/chosen": 0.09276323020458221, + "logits/rejected": 0.11390136182308197, + "logps/chosen": -71.15162658691406, + "logps/ref_chosen": -70.83395385742188, + "logps/ref_rejected": -66.29704284667969, + "logps/rejected": -66.77851867675781, + "loss": 1.3843, + "margin_dpo/margin_mean": 0.16381430625915527, + "margin_dpo/margin_std": 0.6528092622756958, + "step": 56 + }, + { + "epoch": 0.08616780045351474, + "grad_norm": 3.1573398113250732, + "learning_rate": 4.17910447761194e-07, + "logits/chosen": 0.13995778560638428, + "logits/rejected": 0.10124865919351578, + "logps/chosen": -55.33793640136719, + "logps/ref_chosen": -54.77841567993164, + "logps/ref_rejected": -97.11053466796875, + "logps/rejected": -97.8675537109375, + "loss": 1.3835, + "margin_dpo/margin_mean": 0.19750165939331055, + "margin_dpo/margin_std": 0.8898135423660278, + "step": 57 + }, + { + "epoch": 0.08767951625094482, + "grad_norm": 3.0929791927337646, + "learning_rate": 4.253731343283582e-07, + "logits/chosen": -0.024497557431459427, + "logits/rejected": -0.11392132192850113, + "logps/chosen": -59.87010955810547, + "logps/ref_chosen": -59.280887603759766, + "logps/ref_rejected": -82.80585479736328, + "logps/rejected": -83.43619537353516, + "loss": 1.3839, + "margin_dpo/margin_mean": 0.041118621826171875, + "margin_dpo/margin_std": 0.6544058322906494, + "step": 58 + }, + { + "epoch": 0.08919123204837491, + "grad_norm": 2.7556982040405273, + "learning_rate": 4.3283582089552234e-07, + "logits/chosen": 0.17279072105884552, + "logits/rejected": 0.04841926693916321, + "logps/chosen": -38.3438720703125, + "logps/ref_chosen": -37.76800537109375, + "logps/ref_rejected": -84.53325653076172, + "logps/rejected": -85.44366455078125, + "loss": 1.3833, + "margin_dpo/margin_mean": 0.33453965187072754, + "margin_dpo/margin_std": 0.6480045318603516, + "step": 59 + }, + { + "epoch": 0.09070294784580499, + "grad_norm": 2.9187021255493164, + "learning_rate": 4.4029850746268654e-07, + "logits/chosen": 0.2775009274482727, + "logits/rejected": 0.2738117575645447, + "logps/chosen": -67.29905700683594, + "logps/ref_chosen": -66.45524597167969, + "logps/ref_rejected": -67.77998352050781, + "logps/rejected": -68.42279052734375, + "loss": 1.3855, + "margin_dpo/margin_mean": -0.20100653171539307, + "margin_dpo/margin_std": 1.0322511196136475, + "step": 60 + }, + { + "epoch": 0.09221466364323508, + "grad_norm": 2.7786881923675537, + "learning_rate": 4.4776119402985074e-07, + "logits/chosen": 0.02796616032719612, + "logits/rejected": -0.045561283826828, + "logps/chosen": -41.18583679199219, + "logps/ref_chosen": -40.7296257019043, + "logps/ref_rejected": -82.87712097167969, + "logps/rejected": -83.8257827758789, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.4924490451812744, + "margin_dpo/margin_std": 0.6613912582397461, + "step": 61 + }, + { + "epoch": 0.09372637944066516, + "grad_norm": 2.8831284046173096, + "learning_rate": 4.552238805970149e-07, + "logits/chosen": 0.16551688313484192, + "logits/rejected": 0.05868356674909592, + "logps/chosen": -47.318138122558594, + "logps/ref_chosen": -46.39446258544922, + "logps/ref_rejected": -79.28895568847656, + "logps/rejected": -80.39089965820312, + "loss": 1.3838, + "margin_dpo/margin_mean": 0.17827367782592773, + "margin_dpo/margin_std": 1.0472949743270874, + "step": 62 + }, + { + "epoch": 0.09523809523809523, + "grad_norm": 3.3160440921783447, + "learning_rate": 4.626865671641791e-07, + "logits/chosen": 0.2583298087120056, + "logits/rejected": 0.2626573443412781, + "logps/chosen": -75.17607116699219, + "logps/ref_chosen": -74.44918823242188, + "logps/ref_rejected": -115.08148193359375, + "logps/rejected": -116.2177734375, + "loss": 1.3806, + "margin_dpo/margin_mean": 0.4094170331954956, + "margin_dpo/margin_std": 1.2669267654418945, + "step": 63 + }, + { + "epoch": 0.09674981103552532, + "grad_norm": 2.5979647636413574, + "learning_rate": 4.701492537313433e-07, + "logits/chosen": 0.0404229536652565, + "logits/rejected": -0.03692961111664772, + "logps/chosen": -42.32487869262695, + "logps/ref_chosen": -41.524444580078125, + "logps/ref_rejected": -76.30519104003906, + "logps/rejected": -77.62030029296875, + "loss": 1.3855, + "margin_dpo/margin_mean": 0.5146619081497192, + "margin_dpo/margin_std": 0.8373426198959351, + "step": 64 + }, + { + "epoch": 0.0982615268329554, + "grad_norm": 3.180335760116577, + "learning_rate": 4.776119402985074e-07, + "logits/chosen": 0.22089830040931702, + "logits/rejected": 0.2287295162677765, + "logps/chosen": -79.51335144042969, + "logps/ref_chosen": -78.54818725585938, + "logps/ref_rejected": -58.596473693847656, + "logps/rejected": -59.96930694580078, + "loss": 1.3814, + "margin_dpo/margin_mean": 0.4076697826385498, + "margin_dpo/margin_std": 0.8789803981781006, + "step": 65 + }, + { + "epoch": 0.09977324263038549, + "grad_norm": 3.0080127716064453, + "learning_rate": 4.850746268656717e-07, + "logits/chosen": 0.15983106195926666, + "logits/rejected": 0.14227060973644257, + "logps/chosen": -69.20197296142578, + "logps/ref_chosen": -68.18994903564453, + "logps/ref_rejected": -83.489013671875, + "logps/rejected": -85.03985595703125, + "loss": 1.3826, + "margin_dpo/margin_mean": 0.5388225317001343, + "margin_dpo/margin_std": 1.0032914876937866, + "step": 66 + }, + { + "epoch": 0.10128495842781557, + "grad_norm": 2.978635311126709, + "learning_rate": 4.925373134328357e-07, + "logits/chosen": 0.13758046925067902, + "logits/rejected": 0.084464430809021, + "logps/chosen": -63.52684020996094, + "logps/ref_chosen": -62.41529083251953, + "logps/ref_rejected": -53.900169372558594, + "logps/rejected": -54.98922348022461, + "loss": 1.3834, + "margin_dpo/margin_mean": -0.02249324321746826, + "margin_dpo/margin_std": 1.1306034326553345, + "step": 67 + }, + { + "epoch": 0.10279667422524566, + "grad_norm": 3.2404768466949463, + "learning_rate": 5e-07, + "logits/chosen": 0.06684955954551697, + "logits/rejected": -0.005514336749911308, + "logps/chosen": -81.26565551757812, + "logps/ref_chosen": -80.14068603515625, + "logps/ref_rejected": -116.50318145751953, + "logps/rejected": -118.35859680175781, + "loss": 1.3832, + "margin_dpo/margin_mean": 0.7304507493972778, + "margin_dpo/margin_std": 1.5464614629745483, + "step": 68 + }, + { + "epoch": 0.10430839002267574, + "grad_norm": 3.3028857707977295, + "learning_rate": 4.999965034812934e-07, + "logits/chosen": 0.16428935527801514, + "logits/rejected": 0.11686080694198608, + "logps/chosen": -54.37907409667969, + "logps/ref_chosen": -53.415428161621094, + "logps/ref_rejected": -71.89765930175781, + "logps/rejected": -73.68708801269531, + "loss": 1.3792, + "margin_dpo/margin_mean": 0.8257811069488525, + "margin_dpo/margin_std": 0.9166597127914429, + "step": 69 + }, + { + "epoch": 0.10582010582010581, + "grad_norm": 2.9263834953308105, + "learning_rate": 4.999860140229787e-07, + "logits/chosen": 0.006922289729118347, + "logits/rejected": 0.01605862006545067, + "logps/chosen": -72.73168182373047, + "logps/ref_chosen": -71.148193359375, + "logps/ref_rejected": -67.78597259521484, + "logps/rejected": -69.44383239746094, + "loss": 1.383, + "margin_dpo/margin_mean": 0.07435917854309082, + "margin_dpo/margin_std": 1.4182093143463135, + "step": 70 + }, + { + "epoch": 0.1073318216175359, + "grad_norm": 2.8662917613983154, + "learning_rate": 4.999685319184688e-07, + "logits/chosen": 0.1492251604795456, + "logits/rejected": 0.13701248168945312, + "logps/chosen": -46.276390075683594, + "logps/ref_chosen": -44.75934982299805, + "logps/ref_rejected": -65.2507553100586, + "logps/rejected": -66.79752349853516, + "loss": 1.3832, + "margin_dpo/margin_mean": 0.029730796813964844, + "margin_dpo/margin_std": 1.3828539848327637, + "step": 71 + }, + { + "epoch": 0.10884353741496598, + "grad_norm": 3.228018045425415, + "learning_rate": 4.999440576567755e-07, + "logits/chosen": 0.09348595142364502, + "logits/rejected": 0.025297988206148148, + "logps/chosen": -48.47857666015625, + "logps/ref_chosen": -47.3697395324707, + "logps/ref_rejected": -52.99060821533203, + "logps/rejected": -54.79874801635742, + "loss": 1.3786, + "margin_dpo/margin_mean": 0.699303388595581, + "margin_dpo/margin_std": 1.9094823598861694, + "step": 72 + }, + { + "epoch": 0.11035525321239607, + "grad_norm": 3.229674816131592, + "learning_rate": 4.999125919224965e-07, + "logits/chosen": 0.10412274301052094, + "logits/rejected": 0.11492769420146942, + "logps/chosen": -58.23365783691406, + "logps/ref_chosen": -56.49576187133789, + "logps/ref_rejected": -49.6622200012207, + "logps/rejected": -50.660560607910156, + "loss": 1.3857, + "margin_dpo/margin_mean": -0.7395575046539307, + "margin_dpo/margin_std": 1.3145250082015991, + "step": 73 + }, + { + "epoch": 0.11186696900982615, + "grad_norm": 2.963764190673828, + "learning_rate": 4.998741355957963e-07, + "logits/chosen": 0.2109803855419159, + "logits/rejected": 0.14343787729740143, + "logps/chosen": -58.918479919433594, + "logps/ref_chosen": -57.7161750793457, + "logps/ref_rejected": -114.44114685058594, + "logps/rejected": -117.02381896972656, + "loss": 1.3789, + "margin_dpo/margin_mean": 1.3803461790084839, + "margin_dpo/margin_std": 1.7276947498321533, + "step": 74 + }, + { + "epoch": 0.11337868480725624, + "grad_norm": 2.831622362136841, + "learning_rate": 4.998286897523808e-07, + "logits/chosen": 0.19602981209754944, + "logits/rejected": 0.17468248307704926, + "logps/chosen": -41.735137939453125, + "logps/ref_chosen": -39.99764633178711, + "logps/ref_rejected": -56.469932556152344, + "logps/rejected": -58.855743408203125, + "loss": 1.3781, + "margin_dpo/margin_mean": 0.6483190059661865, + "margin_dpo/margin_std": 1.347118616104126, + "step": 75 + }, + { + "epoch": 0.11489040060468632, + "grad_norm": 2.9740262031555176, + "learning_rate": 4.997762556634679e-07, + "logits/chosen": -0.004973419010639191, + "logits/rejected": -0.06459110975265503, + "logps/chosen": -60.27781677246094, + "logps/ref_chosen": -57.66736602783203, + "logps/ref_rejected": -80.56336212158203, + "logps/rejected": -83.19686889648438, + "loss": 1.3828, + "margin_dpo/margin_mean": 0.023052692413330078, + "margin_dpo/margin_std": 2.44602632522583, + "step": 76 + }, + { + "epoch": 0.1164021164021164, + "grad_norm": 3.190723419189453, + "learning_rate": 4.99716834795752e-07, + "logits/chosen": -0.021925870329141617, + "logits/rejected": -0.0063882917165756226, + "logps/chosen": -55.694488525390625, + "logps/ref_chosen": -53.56401824951172, + "logps/ref_rejected": -48.90995407104492, + "logps/rejected": -51.173919677734375, + "loss": 1.3771, + "margin_dpo/margin_mean": 0.13348984718322754, + "margin_dpo/margin_std": 1.8197870254516602, + "step": 77 + }, + { + "epoch": 0.11791383219954649, + "grad_norm": 2.9408459663391113, + "learning_rate": 4.996504288113623e-07, + "logits/chosen": 0.07129096984863281, + "logits/rejected": 0.04697857052087784, + "logps/chosen": -74.45344543457031, + "logps/ref_chosen": -72.49877166748047, + "logps/ref_rejected": -85.70948791503906, + "logps/rejected": -88.24615478515625, + "loss": 1.3815, + "margin_dpo/margin_mean": 0.5819820165634155, + "margin_dpo/margin_std": 1.5051074028015137, + "step": 78 + }, + { + "epoch": 0.11942554799697656, + "grad_norm": 3.75710391998291, + "learning_rate": 4.995770395678171e-07, + "logits/chosen": 0.2161446511745453, + "logits/rejected": 0.2010088711977005, + "logps/chosen": -57.58828353881836, + "logps/ref_chosen": -55.347450256347656, + "logps/ref_rejected": -58.748321533203125, + "logps/rejected": -61.51164245605469, + "loss": 1.3723, + "margin_dpo/margin_mean": 0.522484540939331, + "margin_dpo/margin_std": 2.684553384780884, + "step": 79 + }, + { + "epoch": 0.12093726379440665, + "grad_norm": 2.9566855430603027, + "learning_rate": 4.994966691179711e-07, + "logits/chosen": 0.16256621479988098, + "logits/rejected": 0.08070458471775055, + "logps/chosen": -61.3144416809082, + "logps/ref_chosen": -58.95909118652344, + "logps/ref_rejected": -62.06755065917969, + "logps/rejected": -65.10287475585938, + "loss": 1.3796, + "margin_dpo/margin_mean": 0.6799747943878174, + "margin_dpo/margin_std": 2.2472503185272217, + "step": 80 + }, + { + "epoch": 0.12244897959183673, + "grad_norm": 3.1515696048736572, + "learning_rate": 4.994093197099587e-07, + "logits/chosen": 0.10985112190246582, + "logits/rejected": 0.07357762008905411, + "logps/chosen": -60.51456069946289, + "logps/ref_chosen": -57.94086456298828, + "logps/ref_rejected": -64.90560913085938, + "logps/rejected": -68.10665893554688, + "loss": 1.3767, + "margin_dpo/margin_mean": 0.6273548603057861, + "margin_dpo/margin_std": 2.312195301055908, + "step": 81 + }, + { + "epoch": 0.12396069538926682, + "grad_norm": 2.9304919242858887, + "learning_rate": 4.993149937871306e-07, + "logits/chosen": 0.1599178910255432, + "logits/rejected": 0.06249549984931946, + "logps/chosen": -27.858848571777344, + "logps/ref_chosen": -26.338790893554688, + "logps/ref_rejected": -56.80085754394531, + "logps/rejected": -59.76110076904297, + "loss": 1.374, + "margin_dpo/margin_mean": 1.4401792287826538, + "margin_dpo/margin_std": 1.2950589656829834, + "step": 82 + }, + { + "epoch": 0.1254724111866969, + "grad_norm": 3.3920578956604004, + "learning_rate": 4.992136939879856e-07, + "logits/chosen": 0.07286302000284195, + "logits/rejected": -0.008843222633004189, + "logps/chosen": -58.036651611328125, + "logps/ref_chosen": -55.43024444580078, + "logps/ref_rejected": -90.0384521484375, + "logps/rejected": -94.02841186523438, + "loss": 1.3722, + "margin_dpo/margin_mean": 1.3835418224334717, + "margin_dpo/margin_std": 2.0829572677612305, + "step": 83 + }, + { + "epoch": 0.12698412698412698, + "grad_norm": 3.293107271194458, + "learning_rate": 4.991054231460969e-07, + "logits/chosen": 0.14062954485416412, + "logits/rejected": 0.09686338156461716, + "logps/chosen": -64.60865020751953, + "logps/ref_chosen": -60.98677444458008, + "logps/ref_rejected": -61.2043342590332, + "logps/rejected": -66.28961944580078, + "loss": 1.3749, + "margin_dpo/margin_mean": 1.463415503501892, + "margin_dpo/margin_std": 2.3290019035339355, + "step": 84 + }, + { + "epoch": 0.12849584278155707, + "grad_norm": 3.0449564456939697, + "learning_rate": 4.989901842900325e-07, + "logits/chosen": 0.2767505645751953, + "logits/rejected": 0.2392296940088272, + "logps/chosen": -58.43796157836914, + "logps/ref_chosen": -55.2398681640625, + "logps/ref_rejected": -69.06980895996094, + "logps/rejected": -73.96517181396484, + "loss": 1.3711, + "margin_dpo/margin_mean": 1.6972711086273193, + "margin_dpo/margin_std": 1.7987269163131714, + "step": 85 + }, + { + "epoch": 0.13000755857898716, + "grad_norm": 3.007859468460083, + "learning_rate": 4.988679806432711e-07, + "logits/chosen": 0.16140137612819672, + "logits/rejected": 0.1533210277557373, + "logps/chosen": -64.31861114501953, + "logps/ref_chosen": -60.13792419433594, + "logps/ref_rejected": -66.80750274658203, + "logps/rejected": -72.00105285644531, + "loss": 1.3752, + "margin_dpo/margin_mean": 1.0128706693649292, + "margin_dpo/margin_std": 3.6858959197998047, + "step": 86 + }, + { + "epoch": 0.13151927437641722, + "grad_norm": 3.371891975402832, + "learning_rate": 4.987388156241114e-07, + "logits/chosen": 0.1272026002407074, + "logits/rejected": 0.08599858731031418, + "logps/chosen": -95.75196075439453, + "logps/ref_chosen": -90.86170959472656, + "logps/ref_rejected": -81.98894500732422, + "logps/rejected": -88.12923431396484, + "loss": 1.3721, + "margin_dpo/margin_mean": 1.2500503063201904, + "margin_dpo/margin_std": 4.517343997955322, + "step": 87 + }, + { + "epoch": 0.1330309901738473, + "grad_norm": 3.1865837574005127, + "learning_rate": 4.986026928455767e-07, + "logits/chosen": 0.10692833364009857, + "logits/rejected": 0.08406249433755875, + "logps/chosen": -47.30534362792969, + "logps/ref_chosen": -44.642494201660156, + "logps/ref_rejected": -57.57598114013672, + "logps/rejected": -62.85344314575195, + "loss": 1.3767, + "margin_dpo/margin_mean": 2.6146082878112793, + "margin_dpo/margin_std": 3.4296021461486816, + "step": 88 + }, + { + "epoch": 0.1345427059712774, + "grad_norm": 3.5698328018188477, + "learning_rate": 4.984596161153135e-07, + "logits/chosen": 0.23458774387836456, + "logits/rejected": 0.11213146895170212, + "logps/chosen": -47.105751037597656, + "logps/ref_chosen": -43.498695373535156, + "logps/ref_rejected": -72.5770034790039, + "logps/rejected": -78.28662109375, + "loss": 1.3671, + "margin_dpo/margin_mean": 2.10256290435791, + "margin_dpo/margin_std": 2.2028491497039795, + "step": 89 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 3.3276965618133545, + "learning_rate": 4.983095894354857e-07, + "logits/chosen": 0.24859435856342316, + "logits/rejected": 0.14004702866077423, + "logps/chosen": -35.86720275878906, + "logps/ref_chosen": -32.05683517456055, + "logps/ref_rejected": -76.27911376953125, + "logps/rejected": -82.3067626953125, + "loss": 1.3688, + "margin_dpo/margin_mean": 2.2172858715057373, + "margin_dpo/margin_std": 4.022238731384277, + "step": 90 + }, + { + "epoch": 0.13756613756613756, + "grad_norm": 2.8259410858154297, + "learning_rate": 4.98152617002662e-07, + "logits/chosen": 0.2829027473926544, + "logits/rejected": 0.1933433711528778, + "logps/chosen": -42.66006088256836, + "logps/ref_chosen": -38.95655822753906, + "logps/ref_rejected": -85.30648803710938, + "logps/rejected": -92.27479553222656, + "loss": 1.3726, + "margin_dpo/margin_mean": 3.264805793762207, + "margin_dpo/margin_std": 5.691169738769531, + "step": 91 + }, + { + "epoch": 0.13907785336356765, + "grad_norm": 3.3671655654907227, + "learning_rate": 4.979887032076988e-07, + "logits/chosen": 0.1655869483947754, + "logits/rejected": 0.14279474318027496, + "logps/chosen": -50.55265808105469, + "logps/ref_chosen": -46.43939208984375, + "logps/ref_rejected": -56.59052276611328, + "logps/rejected": -61.2132568359375, + "loss": 1.3639, + "margin_dpo/margin_mean": 0.5094633102416992, + "margin_dpo/margin_std": 2.5982000827789307, + "step": 92 + }, + { + "epoch": 0.14058956916099774, + "grad_norm": 2.7980592250823975, + "learning_rate": 4.978178526356172e-07, + "logits/chosen": 0.23437252640724182, + "logits/rejected": 0.18408940732479095, + "logps/chosen": -47.498390197753906, + "logps/ref_chosen": -42.59188461303711, + "logps/ref_rejected": -66.06549072265625, + "logps/rejected": -73.67166900634766, + "loss": 1.3733, + "margin_dpo/margin_mean": 2.699659824371338, + "margin_dpo/margin_std": 6.147436141967773, + "step": 93 + }, + { + "epoch": 0.1421012849584278, + "grad_norm": 3.8317432403564453, + "learning_rate": 4.976400700654751e-07, + "logits/chosen": 0.23454198241233826, + "logits/rejected": 0.24940252304077148, + "logps/chosen": -82.16134643554688, + "logps/ref_chosen": -77.08745574951172, + "logps/ref_rejected": -72.75309753417969, + "logps/rejected": -78.77753448486328, + "loss": 1.3598, + "margin_dpo/margin_mean": 0.9505494832992554, + "margin_dpo/margin_std": 5.345444679260254, + "step": 94 + }, + { + "epoch": 0.1436130007558579, + "grad_norm": 3.494490146636963, + "learning_rate": 4.974553604702332e-07, + "logits/chosen": 0.13477879762649536, + "logits/rejected": 0.0803680494427681, + "logps/chosen": -63.49205017089844, + "logps/ref_chosen": -57.205665588378906, + "logps/ref_rejected": -101.0076904296875, + "logps/rejected": -108.45451354980469, + "loss": 1.3646, + "margin_dpo/margin_mean": 1.1604300737380981, + "margin_dpo/margin_std": 4.039283752441406, + "step": 95 + }, + { + "epoch": 0.14512471655328799, + "grad_norm": 3.4159934520721436, + "learning_rate": 4.972637290166157e-07, + "logits/chosen": 0.27271753549575806, + "logits/rejected": 0.200592041015625, + "logps/chosen": -78.8208999633789, + "logps/ref_chosen": -71.87321472167969, + "logps/ref_rejected": -108.4333267211914, + "logps/rejected": -119.43417358398438, + "loss": 1.3618, + "margin_dpo/margin_mean": 4.053152084350586, + "margin_dpo/margin_std": 8.04190731048584, + "step": 96 + }, + { + "epoch": 0.14663643235071808, + "grad_norm": 3.486067056655884, + "learning_rate": 4.970651810649666e-07, + "logits/chosen": 0.15996894240379333, + "logits/rejected": 0.2043858766555786, + "logps/chosen": -104.0246810913086, + "logps/ref_chosen": -93.0615463256836, + "logps/ref_rejected": -54.65837097167969, + "logps/rejected": -61.65705871582031, + "loss": 1.3753, + "margin_dpo/margin_mean": -3.9644370079040527, + "margin_dpo/margin_std": 11.941303253173828, + "step": 97 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 2.9447083473205566, + "learning_rate": 4.968597221690985e-07, + "logits/chosen": 0.19196490943431854, + "logits/rejected": 0.1588142365217209, + "logps/chosen": -72.50187683105469, + "logps/ref_chosen": -64.01171875, + "logps/ref_rejected": -64.54795837402344, + "logps/rejected": -73.04808044433594, + "loss": 1.3791, + "margin_dpo/margin_mean": 0.009970307350158691, + "margin_dpo/margin_std": 4.951154708862305, + "step": 98 + }, + { + "epoch": 0.14965986394557823, + "grad_norm": 3.1145100593566895, + "learning_rate": 4.966473580761389e-07, + "logits/chosen": 0.3419758081436157, + "logits/rejected": 0.2835521697998047, + "logps/chosen": -59.71137619018555, + "logps/ref_chosen": -53.621009826660156, + "logps/ref_rejected": -93.46940612792969, + "logps/rejected": -103.66671752929688, + "loss": 1.3786, + "margin_dpo/margin_mean": 4.106945991516113, + "margin_dpo/margin_std": 5.849977016448975, + "step": 99 + }, + { + "epoch": 0.15117157974300832, + "grad_norm": 3.7920093536376953, + "learning_rate": 4.964280947263676e-07, + "logits/chosen": 0.24154677987098694, + "logits/rejected": 0.23493698239326477, + "logps/chosen": -102.99555969238281, + "logps/ref_chosen": -93.66744995117188, + "logps/ref_rejected": -110.8492431640625, + "logps/rejected": -124.81897735595703, + "loss": 1.3656, + "margin_dpo/margin_mean": 4.6416401863098145, + "margin_dpo/margin_std": 7.766916275024414, + "step": 100 + }, + { + "epoch": 0.15268329554043839, + "grad_norm": 3.0823545455932617, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": 0.09690912812948227, + "logits/rejected": 0.05974133312702179, + "logps/chosen": -71.51644897460938, + "logps/ref_chosen": -63.71492004394531, + "logps/ref_rejected": -83.56236267089844, + "logps/rejected": -93.69319152832031, + "loss": 1.3574, + "margin_dpo/margin_mean": 2.32930326461792, + "margin_dpo/margin_std": 8.121504783630371, + "step": 101 + }, + { + "epoch": 0.15419501133786848, + "grad_norm": 3.003969430923462, + "learning_rate": 4.959688949822748e-07, + "logits/chosen": 0.11170051246881485, + "logits/rejected": 0.12498050928115845, + "logps/chosen": -73.76181030273438, + "logps/ref_chosen": -65.03767395019531, + "logps/ref_rejected": -65.62431335449219, + "logps/rejected": -73.43655395507812, + "loss": 1.381, + "margin_dpo/margin_mean": -0.9118953943252563, + "margin_dpo/margin_std": 7.858981132507324, + "step": 102 + }, + { + "epoch": 0.15570672713529857, + "grad_norm": 3.363142251968384, + "learning_rate": 4.957289714327572e-07, + "logits/chosen": 0.28459155559539795, + "logits/rejected": 0.2856888771057129, + "logps/chosen": -62.49891662597656, + "logps/ref_chosen": -54.93040466308594, + "logps/ref_rejected": -56.902076721191406, + "logps/rejected": -65.07192993164062, + "loss": 1.3643, + "margin_dpo/margin_mean": 0.6013284921646118, + "margin_dpo/margin_std": 7.1304521560668945, + "step": 103 + }, + { + "epoch": 0.15721844293272866, + "grad_norm": 4.440778732299805, + "learning_rate": 4.954821743156767e-07, + "logits/chosen": 0.28602921962738037, + "logits/rejected": 0.20815476775169373, + "logps/chosen": -76.05664825439453, + "logps/ref_chosen": -66.83149719238281, + "logps/ref_rejected": -114.09371185302734, + "logps/rejected": -128.50567626953125, + "loss": 1.3449, + "margin_dpo/margin_mean": 5.186819553375244, + "margin_dpo/margin_std": 18.05282974243164, + "step": 104 + }, + { + "epoch": 0.15873015873015872, + "grad_norm": 3.9823782444000244, + "learning_rate": 4.952285105344791e-07, + "logits/chosen": 0.2622066140174866, + "logits/rejected": 0.18628236651420593, + "logps/chosen": -52.09508514404297, + "logps/ref_chosen": -46.05514144897461, + "logps/ref_rejected": -78.72990417480469, + "logps/rejected": -89.09690856933594, + "loss": 1.3504, + "margin_dpo/margin_mean": 4.327066898345947, + "margin_dpo/margin_std": 8.973100662231445, + "step": 105 + }, + { + "epoch": 0.1602418745275888, + "grad_norm": 3.0525312423706055, + "learning_rate": 4.949679871846857e-07, + "logits/chosen": 0.2560195326805115, + "logits/rejected": 0.21072283387184143, + "logps/chosen": -62.558570861816406, + "logps/ref_chosen": -53.9271240234375, + "logps/ref_rejected": -66.69943237304688, + "logps/rejected": -78.46056365966797, + "loss": 1.3676, + "margin_dpo/margin_mean": 3.129687786102295, + "margin_dpo/margin_std": 10.326120376586914, + "step": 106 + }, + { + "epoch": 0.1617535903250189, + "grad_norm": 3.6129863262176514, + "learning_rate": 4.947006115536947e-07, + "logits/chosen": 0.20093579590320587, + "logits/rejected": 0.26784881949424744, + "logps/chosen": -103.26329040527344, + "logps/ref_chosen": -87.38455200195312, + "logps/ref_rejected": -76.76750183105469, + "logps/rejected": -89.45541381835938, + "loss": 1.3811, + "margin_dpo/margin_mean": -3.1908369064331055, + "margin_dpo/margin_std": 17.2308292388916, + "step": 107 + }, + { + "epoch": 0.16326530612244897, + "grad_norm": 3.4911181926727295, + "learning_rate": 4.944263911205772e-07, + "logits/chosen": 0.2522239685058594, + "logits/rejected": 0.1837356686592102, + "logps/chosen": -72.13520812988281, + "logps/ref_chosen": -62.109657287597656, + "logps/ref_rejected": -91.77952575683594, + "logps/rejected": -107.07212829589844, + "loss": 1.3568, + "margin_dpo/margin_mean": 5.267055034637451, + "margin_dpo/margin_std": 8.78154182434082, + "step": 108 + }, + { + "epoch": 0.16477702191987906, + "grad_norm": 4.055303573608398, + "learning_rate": 4.941453335558681e-07, + "logits/chosen": 0.3957793116569519, + "logits/rejected": 0.35673829913139343, + "logps/chosen": -84.13023376464844, + "logps/ref_chosen": -70.22321319580078, + "logps/ref_rejected": -103.89926147460938, + "logps/rejected": -124.2558364868164, + "loss": 1.3295, + "margin_dpo/margin_mean": 6.449550151824951, + "margin_dpo/margin_std": 12.290249824523926, + "step": 109 + }, + { + "epoch": 0.16628873771730915, + "grad_norm": 4.451986312866211, + "learning_rate": 4.938574467213517e-07, + "logits/chosen": 0.2378203570842743, + "logits/rejected": 0.25508666038513184, + "logps/chosen": -85.72879028320312, + "logps/ref_chosen": -72.66490173339844, + "logps/ref_rejected": -62.63153076171875, + "logps/rejected": -74.57170867919922, + "loss": 1.4061, + "margin_dpo/margin_mean": -1.1237014532089233, + "margin_dpo/margin_std": 12.696800231933594, + "step": 110 + }, + { + "epoch": 0.16780045351473924, + "grad_norm": 3.417356014251709, + "learning_rate": 4.935627386698418e-07, + "logits/chosen": 0.26538121700286865, + "logits/rejected": 0.27719810605049133, + "logps/chosen": -81.70519256591797, + "logps/ref_chosen": -67.52632141113281, + "logps/ref_rejected": -73.84911346435547, + "logps/rejected": -89.34820556640625, + "loss": 1.3481, + "margin_dpo/margin_mean": 1.3202223777770996, + "margin_dpo/margin_std": 9.906134605407715, + "step": 111 + }, + { + "epoch": 0.1693121693121693, + "grad_norm": 4.943251132965088, + "learning_rate": 4.932612176449559e-07, + "logits/chosen": 0.19840675592422485, + "logits/rejected": 0.1570388376712799, + "logps/chosen": -72.82176971435547, + "logps/ref_chosen": -61.945491790771484, + "logps/ref_rejected": -113.05886840820312, + "logps/rejected": -132.51878356933594, + "loss": 1.3318, + "margin_dpo/margin_mean": 8.58362865447998, + "margin_dpo/margin_std": 13.595785140991211, + "step": 112 + }, + { + "epoch": 0.1708238851095994, + "grad_norm": 3.653705358505249, + "learning_rate": 4.929528920808854e-07, + "logits/chosen": 0.18068620562553406, + "logits/rejected": 0.22447502613067627, + "logps/chosen": -83.29192352294922, + "logps/ref_chosen": -68.68717193603516, + "logps/ref_rejected": -49.824913024902344, + "logps/rejected": -59.991519927978516, + "loss": 1.3697, + "margin_dpo/margin_mean": -4.438149452209473, + "margin_dpo/margin_std": 8.201181411743164, + "step": 113 + }, + { + "epoch": 0.17233560090702948, + "grad_norm": 3.5117268562316895, + "learning_rate": 4.92637770602159e-07, + "logits/chosen": 0.37020614743232727, + "logits/rejected": 0.3371107578277588, + "logps/chosen": -83.77205657958984, + "logps/ref_chosen": -68.70469665527344, + "logps/ref_rejected": -66.02621459960938, + "logps/rejected": -80.38362121582031, + "loss": 1.3549, + "margin_dpo/margin_mean": -0.7099564075469971, + "margin_dpo/margin_std": 13.317527770996094, + "step": 114 + }, + { + "epoch": 0.17384731670445955, + "grad_norm": 3.4344635009765625, + "learning_rate": 4.923158620234019e-07, + "logits/chosen": 0.5059663653373718, + "logits/rejected": 0.3934090733528137, + "logps/chosen": -78.51239776611328, + "logps/ref_chosen": -64.35462951660156, + "logps/ref_rejected": -91.54823303222656, + "logps/rejected": -109.61679077148438, + "loss": 1.3504, + "margin_dpo/margin_mean": 3.910794496536255, + "margin_dpo/margin_std": 20.801626205444336, + "step": 115 + }, + { + "epoch": 0.17535903250188964, + "grad_norm": 4.414751052856445, + "learning_rate": 4.91987175349089e-07, + "logits/chosen": 0.35310861468315125, + "logits/rejected": 0.28566908836364746, + "logps/chosen": -62.23815155029297, + "logps/ref_chosen": -49.481990814208984, + "logps/ref_rejected": -81.90135192871094, + "logps/rejected": -104.6424560546875, + "loss": 1.3141, + "margin_dpo/margin_mean": 9.984952926635742, + "margin_dpo/margin_std": 16.990009307861328, + "step": 116 + }, + { + "epoch": 0.17687074829931973, + "grad_norm": 3.1478612422943115, + "learning_rate": 4.916517197732933e-07, + "logits/chosen": 0.4462072551250458, + "logits/rejected": 0.3813575208187103, + "logps/chosen": -64.82511901855469, + "logps/ref_chosen": -52.36582946777344, + "logps/ref_rejected": -76.98123931884766, + "logps/rejected": -93.8857192993164, + "loss": 1.3637, + "margin_dpo/margin_mean": 4.445184707641602, + "margin_dpo/margin_std": 15.84908676147461, + "step": 117 + }, + { + "epoch": 0.17838246409674982, + "grad_norm": 4.316784381866455, + "learning_rate": 4.913095046794281e-07, + "logits/chosen": 0.33763909339904785, + "logits/rejected": 0.33866265416145325, + "logps/chosen": -76.38975524902344, + "logps/ref_chosen": -59.73781204223633, + "logps/ref_rejected": -71.17877197265625, + "logps/rejected": -88.17243194580078, + "loss": 1.3327, + "margin_dpo/margin_mean": 0.3417208194732666, + "margin_dpo/margin_std": 10.12466049194336, + "step": 118 + }, + { + "epoch": 0.17989417989417988, + "grad_norm": 3.382061719894409, + "learning_rate": 4.909605396399855e-07, + "logits/chosen": 0.42505258321762085, + "logits/rejected": 0.4622851014137268, + "logps/chosen": -75.50498962402344, + "logps/ref_chosen": -57.297969818115234, + "logps/ref_rejected": -43.269874572753906, + "logps/rejected": -58.09909439086914, + "loss": 1.3609, + "margin_dpo/margin_mean": -3.377796173095703, + "margin_dpo/margin_std": 17.354841232299805, + "step": 119 + }, + { + "epoch": 0.18140589569160998, + "grad_norm": 4.64827823638916, + "learning_rate": 4.906048344162676e-07, + "logits/chosen": 0.3838917016983032, + "logits/rejected": 0.2518424391746521, + "logps/chosen": -73.36843872070312, + "logps/ref_chosen": -59.898441314697266, + "logps/ref_rejected": -103.18621826171875, + "logps/rejected": -131.10989379882812, + "loss": 1.3166, + "margin_dpo/margin_mean": 14.453689575195312, + "margin_dpo/margin_std": 17.748037338256836, + "step": 120 + }, + { + "epoch": 0.18291761148904007, + "grad_norm": 5.509910583496094, + "learning_rate": 4.902423989581143e-07, + "logits/chosen": 0.4782486855983734, + "logits/rejected": 0.3805123567581177, + "logps/chosen": -88.04644775390625, + "logps/ref_chosen": -67.99308776855469, + "logps/ref_rejected": -128.11260986328125, + "logps/rejected": -163.5806427001953, + "loss": 1.2951, + "margin_dpo/margin_mean": 15.414678573608398, + "margin_dpo/margin_std": 29.943111419677734, + "step": 121 + }, + { + "epoch": 0.18442932728647016, + "grad_norm": 3.7154958248138428, + "learning_rate": 4.898732434036243e-07, + "logits/chosen": 0.4762170612812042, + "logits/rejected": 0.41412898898124695, + "logps/chosen": -74.78926086425781, + "logps/ref_chosen": -57.45248031616211, + "logps/ref_rejected": -94.9098892211914, + "logps/rejected": -132.783447265625, + "loss": 1.3373, + "margin_dpo/margin_mean": 20.53677749633789, + "margin_dpo/margin_std": 39.82550811767578, + "step": 122 + }, + { + "epoch": 0.18594104308390022, + "grad_norm": 3.9479894638061523, + "learning_rate": 4.894973780788722e-07, + "logits/chosen": 0.4904557466506958, + "logits/rejected": 0.3759152293205261, + "logps/chosen": -68.86778259277344, + "logps/ref_chosen": -51.397483825683594, + "logps/ref_rejected": -106.05813598632812, + "logps/rejected": -142.84266662597656, + "loss": 1.3303, + "margin_dpo/margin_mean": 19.314233779907227, + "margin_dpo/margin_std": 27.63502311706543, + "step": 123 + }, + { + "epoch": 0.1874527588813303, + "grad_norm": 4.315618515014648, + "learning_rate": 4.89114813497619e-07, + "logits/chosen": 0.5175679922103882, + "logits/rejected": 0.3383052349090576, + "logps/chosen": -46.36642837524414, + "logps/ref_chosen": -35.62065887451172, + "logps/ref_rejected": -100.73350524902344, + "logps/rejected": -135.20120239257812, + "loss": 1.3198, + "margin_dpo/margin_mean": 23.721927642822266, + "margin_dpo/margin_std": 20.40395736694336, + "step": 124 + }, + { + "epoch": 0.1889644746787604, + "grad_norm": 4.568764686584473, + "learning_rate": 4.887255603610184e-07, + "logits/chosen": 0.5989629030227661, + "logits/rejected": 0.4955187439918518, + "logps/chosen": -93.56169891357422, + "logps/ref_chosen": -66.91831970214844, + "logps/ref_rejected": -116.68222045898438, + "logps/rejected": -157.46566772460938, + "loss": 1.313, + "margin_dpo/margin_mean": 14.140068054199219, + "margin_dpo/margin_std": 47.2589225769043, + "step": 125 + }, + { + "epoch": 0.19047619047619047, + "grad_norm": 4.645914077758789, + "learning_rate": 4.883296295573176e-07, + "logits/chosen": 0.3819458484649658, + "logits/rejected": 0.3799610137939453, + "logps/chosen": -78.845703125, + "logps/ref_chosen": -60.64362335205078, + "logps/ref_rejected": -54.69144821166992, + "logps/rejected": -76.06550598144531, + "loss": 1.3929, + "margin_dpo/margin_mean": 3.171980381011963, + "margin_dpo/margin_std": 13.540816307067871, + "step": 126 + }, + { + "epoch": 0.19198790627362056, + "grad_norm": 3.9225544929504395, + "learning_rate": 4.87927032161552e-07, + "logits/chosen": 0.5223321914672852, + "logits/rejected": 0.4658096730709076, + "logps/chosen": -100.17329406738281, + "logps/ref_chosen": -73.2366943359375, + "logps/ref_rejected": -83.54694366455078, + "logps/rejected": -119.79974365234375, + "loss": 1.3401, + "margin_dpo/margin_mean": 9.31619930267334, + "margin_dpo/margin_std": 25.015779495239258, + "step": 127 + }, + { + "epoch": 0.19349962207105065, + "grad_norm": 4.997095584869385, + "learning_rate": 4.875177794352363e-07, + "logits/chosen": 0.4383442997932434, + "logits/rejected": 0.4075517952442169, + "logps/chosen": -108.09526062011719, + "logps/ref_chosen": -81.03706359863281, + "logps/ref_rejected": -95.60237121582031, + "logps/rejected": -140.10910034179688, + "loss": 1.2741, + "margin_dpo/margin_mean": 17.448543548583984, + "margin_dpo/margin_std": 43.10020446777344, + "step": 128 + }, + { + "epoch": 0.19501133786848074, + "grad_norm": 5.618246078491211, + "learning_rate": 4.871018828260491e-07, + "logits/chosen": 0.44885939359664917, + "logits/rejected": 0.41066890954971313, + "logps/chosen": -70.06056213378906, + "logps/ref_chosen": -46.31350326538086, + "logps/ref_rejected": -68.42625427246094, + "logps/rejected": -98.83761596679688, + "loss": 1.3926, + "margin_dpo/margin_mean": 6.664305686950684, + "margin_dpo/margin_std": 23.976375579833984, + "step": 129 + }, + { + "epoch": 0.1965230536659108, + "grad_norm": 5.429229259490967, + "learning_rate": 4.866793539675126e-07, + "logits/chosen": 0.4905567169189453, + "logits/rejected": 0.4603291153907776, + "logps/chosen": -101.65276336669922, + "logps/ref_chosen": -65.72907257080078, + "logps/ref_rejected": -97.77519226074219, + "logps/rejected": -144.22227478027344, + "loss": 1.2914, + "margin_dpo/margin_mean": 10.523391723632812, + "margin_dpo/margin_std": 27.65914535522461, + "step": 130 + }, + { + "epoch": 0.1980347694633409, + "grad_norm": 7.263116359710693, + "learning_rate": 4.86250204678667e-07, + "logits/chosen": 0.46883371472358704, + "logits/rejected": 0.37517213821411133, + "logps/chosen": -45.65996551513672, + "logps/ref_chosen": -30.02720069885254, + "logps/ref_rejected": -52.36793518066406, + "logps/rejected": -78.58646392822266, + "loss": 1.2509, + "margin_dpo/margin_mean": 10.585772514343262, + "margin_dpo/margin_std": 22.57959747314453, + "step": 131 + }, + { + "epoch": 0.19954648526077098, + "grad_norm": 4.200234413146973, + "learning_rate": 4.858144469637408e-07, + "logits/chosen": 0.5417824387550354, + "logits/rejected": 0.5621850490570068, + "logps/chosen": -76.25199127197266, + "logps/ref_chosen": -51.91057586669922, + "logps/ref_rejected": -42.87281799316406, + "logps/rejected": -67.77296447753906, + "loss": 1.3117, + "margin_dpo/margin_mean": 0.5587306022644043, + "margin_dpo/margin_std": 15.402095794677734, + "step": 132 + }, + { + "epoch": 0.20105820105820105, + "grad_norm": 6.736537933349609, + "learning_rate": 4.853720930118138e-07, + "logits/chosen": 0.5202792882919312, + "logits/rejected": 0.49684813618659973, + "logps/chosen": -79.01284790039062, + "logps/ref_chosen": -51.23572540283203, + "logps/ref_rejected": -57.740684509277344, + "logps/rejected": -93.11215209960938, + "loss": 1.3884, + "margin_dpo/margin_mean": 7.594354152679443, + "margin_dpo/margin_std": 35.9656867980957, + "step": 133 + }, + { + "epoch": 0.20256991685563114, + "grad_norm": 4.357104301452637, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": 0.6357393264770508, + "logits/rejected": 0.6053987741470337, + "logps/chosen": -93.45689392089844, + "logps/ref_chosen": -60.42839813232422, + "logps/ref_rejected": -74.99949645996094, + "logps/rejected": -121.59634399414062, + "loss": 1.281, + "margin_dpo/margin_mean": 13.568346977233887, + "margin_dpo/margin_std": 36.36805725097656, + "step": 134 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 4.28125524520874, + "learning_rate": 4.844676460754862e-07, + "logits/chosen": 0.5447328090667725, + "logits/rejected": 0.5454561710357666, + "logps/chosen": -93.66383361816406, + "logps/ref_chosen": -57.496219635009766, + "logps/ref_rejected": -65.18019104003906, + "logps/rejected": -101.57806396484375, + "loss": 1.324, + "margin_dpo/margin_mean": 0.23026132583618164, + "margin_dpo/margin_std": 23.302963256835938, + "step": 135 + }, + { + "epoch": 0.20559334845049132, + "grad_norm": 5.157009124755859, + "learning_rate": 4.840055783904106e-07, + "logits/chosen": 0.4646771550178528, + "logits/rejected": 0.49990448355674744, + "logps/chosen": -127.57366180419922, + "logps/ref_chosen": -86.13214111328125, + "logps/ref_rejected": -57.09363555908203, + "logps/rejected": -86.46631622314453, + "loss": 1.2754, + "margin_dpo/margin_mean": -12.068828582763672, + "margin_dpo/margin_std": 32.699989318847656, + "step": 136 + }, + { + "epoch": 0.20710506424792138, + "grad_norm": 5.023794174194336, + "learning_rate": 4.835369650662767e-07, + "logits/chosen": 0.4657016396522522, + "logits/rejected": 0.41886240243911743, + "logps/chosen": -117.60987091064453, + "logps/ref_chosen": -85.38725280761719, + "logps/ref_rejected": -74.20018005371094, + "logps/rejected": -122.04615020751953, + "loss": 1.321, + "margin_dpo/margin_mean": 15.623347282409668, + "margin_dpo/margin_std": 53.613494873046875, + "step": 137 + }, + { + "epoch": 0.20861678004535147, + "grad_norm": 6.945674896240234, + "learning_rate": 4.830618192112065e-07, + "logits/chosen": 0.47126084566116333, + "logits/rejected": 0.48690515756607056, + "logps/chosen": -163.68258666992188, + "logps/ref_chosen": -91.57633972167969, + "logps/ref_rejected": -72.2829360961914, + "logps/rejected": -130.02001953125, + "loss": 1.3501, + "margin_dpo/margin_mean": -14.36915397644043, + "margin_dpo/margin_std": 60.06218338012695, + "step": 138 + }, + { + "epoch": 0.21012849584278157, + "grad_norm": 6.725894927978516, + "learning_rate": 4.825801541160509e-07, + "logits/chosen": 0.4866417348384857, + "logits/rejected": 0.49573272466659546, + "logps/chosen": -97.94691467285156, + "logps/ref_chosen": -61.32575225830078, + "logps/ref_rejected": -58.40146255493164, + "logps/rejected": -101.65697479248047, + "loss": 1.3529, + "margin_dpo/margin_mean": 6.634352684020996, + "margin_dpo/margin_std": 25.386695861816406, + "step": 139 + }, + { + "epoch": 0.21164021164021163, + "grad_norm": 6.632240295410156, + "learning_rate": 4.820919832540181e-07, + "logits/chosen": 0.4285508096218109, + "logits/rejected": 0.4244239926338196, + "logps/chosen": -144.7003631591797, + "logps/ref_chosen": -90.57447814941406, + "logps/ref_rejected": -99.98605346679688, + "logps/rejected": -158.67343139648438, + "loss": 1.3232, + "margin_dpo/margin_mean": 4.56149959564209, + "margin_dpo/margin_std": 71.53721618652344, + "step": 140 + }, + { + "epoch": 0.21315192743764172, + "grad_norm": 5.187021255493164, + "learning_rate": 4.815973202802966e-07, + "logits/chosen": 0.5863088965415955, + "logits/rejected": 0.5247952938079834, + "logps/chosen": -98.68556213378906, + "logps/ref_chosen": -48.589813232421875, + "logps/ref_rejected": -86.63417053222656, + "logps/rejected": -149.1506805419922, + "loss": 1.299, + "margin_dpo/margin_mean": 12.420760154724121, + "margin_dpo/margin_std": 37.57579040527344, + "step": 141 + }, + { + "epoch": 0.2146636432350718, + "grad_norm": 4.731565475463867, + "learning_rate": 4.810961790316729e-07, + "logits/chosen": 0.5422487258911133, + "logits/rejected": 0.5291632413864136, + "logps/chosen": -82.97702026367188, + "logps/ref_chosen": -52.91022491455078, + "logps/ref_rejected": -69.94438934326172, + "logps/rejected": -105.55659484863281, + "loss": 1.3097, + "margin_dpo/margin_mean": 5.545400142669678, + "margin_dpo/margin_std": 24.14703941345215, + "step": 142 + }, + { + "epoch": 0.2161753590325019, + "grad_norm": 7.777275085449219, + "learning_rate": 4.805885735261454e-07, + "logits/chosen": 0.6047331690788269, + "logits/rejected": 0.5495315194129944, + "logps/chosen": -65.96603393554688, + "logps/ref_chosen": -41.020355224609375, + "logps/ref_rejected": -76.39324951171875, + "logps/rejected": -121.12960815429688, + "loss": 1.399, + "margin_dpo/margin_mean": 19.79067611694336, + "margin_dpo/margin_std": 37.09278106689453, + "step": 143 + }, + { + "epoch": 0.21768707482993196, + "grad_norm": 7.344333171844482, + "learning_rate": 4.800745179625307e-07, + "logits/chosen": 0.5492057204246521, + "logits/rejected": 0.5295698046684265, + "logps/chosen": -72.70046997070312, + "logps/ref_chosen": -42.882530212402344, + "logps/ref_rejected": -55.65336608886719, + "logps/rejected": -92.50035095214844, + "loss": 1.3536, + "margin_dpo/margin_mean": 7.029045104980469, + "margin_dpo/margin_std": 13.494336128234863, + "step": 144 + }, + { + "epoch": 0.21919879062736206, + "grad_norm": 13.654488563537598, + "learning_rate": 4.795540267200686e-07, + "logits/chosen": 0.3647320866584778, + "logits/rejected": 0.3844793438911438, + "logps/chosen": -107.22215270996094, + "logps/ref_chosen": -64.46841430664062, + "logps/ref_rejected": -74.22611236572266, + "logps/rejected": -127.66377258300781, + "loss": 1.4044, + "margin_dpo/margin_mean": 10.683923721313477, + "margin_dpo/margin_std": 56.14466857910156, + "step": 145 + }, + { + "epoch": 0.22071050642479215, + "grad_norm": 5.76055908203125, + "learning_rate": 4.790271143580173e-07, + "logits/chosen": 0.5389979481697083, + "logits/rejected": 0.5553423166275024, + "logps/chosen": -108.01651000976562, + "logps/ref_chosen": -66.7105712890625, + "logps/ref_rejected": -65.54419708251953, + "logps/rejected": -108.33381652832031, + "loss": 1.3497, + "margin_dpo/margin_mean": 1.4836831092834473, + "margin_dpo/margin_std": 34.92258071899414, + "step": 146 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 5.631329536437988, + "learning_rate": 4.784937956152489e-07, + "logits/chosen": 0.42997854948043823, + "logits/rejected": 0.3938245177268982, + "logps/chosen": -113.09949493408203, + "logps/ref_chosen": -69.16227722167969, + "logps/ref_rejected": -93.64544677734375, + "logps/rejected": -146.63816833496094, + "loss": 1.3586, + "margin_dpo/margin_mean": 9.055498123168945, + "margin_dpo/margin_std": 28.972103118896484, + "step": 147 + }, + { + "epoch": 0.2237339380196523, + "grad_norm": 5.31839656829834, + "learning_rate": 4.779540854098347e-07, + "logits/chosen": 0.7581343650817871, + "logits/rejected": 0.6984580755233765, + "logps/chosen": -70.00361633300781, + "logps/ref_chosen": -43.368194580078125, + "logps/ref_rejected": -76.12153625488281, + "logps/rejected": -122.95445251464844, + "loss": 1.2599, + "margin_dpo/margin_mean": 20.197490692138672, + "margin_dpo/margin_std": 35.63082504272461, + "step": 148 + }, + { + "epoch": 0.2252456538170824, + "grad_norm": 4.855349063873291, + "learning_rate": 4.774079988386296e-07, + "logits/chosen": 0.4739559292793274, + "logits/rejected": 0.5105680227279663, + "logps/chosen": -98.31448364257812, + "logps/ref_chosen": -59.394657135009766, + "logps/ref_rejected": -50.62220001220703, + "logps/rejected": -90.08949279785156, + "loss": 1.3099, + "margin_dpo/margin_mean": 0.5474551916122437, + "margin_dpo/margin_std": 29.59663963317871, + "step": 149 + }, + { + "epoch": 0.22675736961451248, + "grad_norm": 4.553208827972412, + "learning_rate": 4.768555511768486e-07, + "logits/chosen": 0.41769298911094666, + "logits/rejected": 0.42729195952415466, + "logps/chosen": -122.25434875488281, + "logps/ref_chosen": -75.73005676269531, + "logps/ref_rejected": -88.11376190185547, + "logps/rejected": -137.2162322998047, + "loss": 1.277, + "margin_dpo/margin_mean": 2.578186511993408, + "margin_dpo/margin_std": 40.36921691894531, + "step": 150 + }, + { + "epoch": 0.22826908541194255, + "grad_norm": 4.969564437866211, + "learning_rate": 4.762967578776406e-07, + "logits/chosen": 0.528440535068512, + "logits/rejected": 0.4810951352119446, + "logps/chosen": -94.2465591430664, + "logps/ref_chosen": -64.3418960571289, + "logps/ref_rejected": -81.62193298339844, + "logps/rejected": -135.70932006835938, + "loss": 1.2305, + "margin_dpo/margin_mean": 24.182723999023438, + "margin_dpo/margin_std": 51.8789176940918, + "step": 151 + }, + { + "epoch": 0.22978080120937264, + "grad_norm": 4.437036037445068, + "learning_rate": 4.757316345716553e-07, + "logits/chosen": 0.4535418152809143, + "logits/rejected": 0.4450215995311737, + "logps/chosen": -111.42601013183594, + "logps/ref_chosen": -71.10409545898438, + "logps/ref_rejected": -85.16281127929688, + "logps/rejected": -132.8240203857422, + "loss": 1.2759, + "margin_dpo/margin_mean": 7.339301586151123, + "margin_dpo/margin_std": 32.49983596801758, + "step": 152 + }, + { + "epoch": 0.23129251700680273, + "grad_norm": 4.642518043518066, + "learning_rate": 4.751601970666064e-07, + "logits/chosen": 0.30414754152297974, + "logits/rejected": 0.25842684507369995, + "logps/chosen": -119.28223419189453, + "logps/ref_chosen": -72.91175842285156, + "logps/ref_rejected": -77.55793762207031, + "logps/rejected": -136.66567993164062, + "loss": 1.2979, + "margin_dpo/margin_mean": 12.737245559692383, + "margin_dpo/margin_std": 24.552635192871094, + "step": 153 + }, + { + "epoch": 0.2328042328042328, + "grad_norm": 4.9511237144470215, + "learning_rate": 4.745824613468292e-07, + "logits/chosen": 0.35246241092681885, + "logits/rejected": 0.5255336761474609, + "logps/chosen": -121.34557342529297, + "logps/ref_chosen": -75.31983947753906, + "logps/ref_rejected": -54.22064208984375, + "logps/rejected": -90.056884765625, + "loss": 1.3369, + "margin_dpo/margin_mean": -10.189491271972656, + "margin_dpo/margin_std": 41.34107971191406, + "step": 154 + }, + { + "epoch": 0.23431594860166288, + "grad_norm": 5.262056350708008, + "learning_rate": 4.7399844357283393e-07, + "logits/chosen": 0.5434817671775818, + "logits/rejected": 0.46925586462020874, + "logps/chosen": -76.32972717285156, + "logps/ref_chosen": -44.81412124633789, + "logps/ref_rejected": -76.29104614257812, + "logps/rejected": -128.21963500976562, + "loss": 1.3033, + "margin_dpo/margin_mean": 20.412994384765625, + "margin_dpo/margin_std": 27.084177017211914, + "step": 155 + }, + { + "epoch": 0.23582766439909297, + "grad_norm": 5.0457353591918945, + "learning_rate": 4.7340816008085305e-07, + "logits/chosen": 0.44192785024642944, + "logits/rejected": 0.4855366349220276, + "logps/chosen": -137.42698669433594, + "logps/ref_chosen": -94.79901123046875, + "logps/ref_rejected": -81.31282806396484, + "logps/rejected": -137.68701171875, + "loss": 1.2281, + "margin_dpo/margin_mean": 13.746213912963867, + "margin_dpo/margin_std": 34.27767562866211, + "step": 156 + }, + { + "epoch": 0.23733938019652306, + "grad_norm": 4.6686692237854, + "learning_rate": 4.728116273823847e-07, + "logits/chosen": 0.4652218222618103, + "logits/rejected": 0.45060834288597107, + "logps/chosen": -98.19709777832031, + "logps/ref_chosen": -58.356666564941406, + "logps/ref_rejected": -77.88996887207031, + "logps/rejected": -127.36456298828125, + "loss": 1.2929, + "margin_dpo/margin_mean": 9.634150505065918, + "margin_dpo/margin_std": 30.75609588623047, + "step": 157 + }, + { + "epoch": 0.23885109599395313, + "grad_norm": 4.9093017578125, + "learning_rate": 4.7220886216373085e-07, + "logits/chosen": 0.4573588967323303, + "logits/rejected": 0.458060622215271, + "logps/chosen": -108.16549682617188, + "logps/ref_chosen": -66.18313598632812, + "logps/ref_rejected": -68.98072814941406, + "logps/rejected": -120.69749450683594, + "loss": 1.2881, + "margin_dpo/margin_mean": 9.734394073486328, + "margin_dpo/margin_std": 32.37115478515625, + "step": 158 + }, + { + "epoch": 0.24036281179138322, + "grad_norm": 4.744980812072754, + "learning_rate": 4.715998812855304e-07, + "logits/chosen": 0.6077166199684143, + "logits/rejected": 0.550294816493988, + "logps/chosen": -97.12281799316406, + "logps/ref_chosen": -63.03964614868164, + "logps/ref_rejected": -94.90765380859375, + "logps/rejected": -150.78564453125, + "loss": 1.2671, + "margin_dpo/margin_mean": 21.7947998046875, + "margin_dpo/margin_std": 43.32719039916992, + "step": 159 + }, + { + "epoch": 0.2418745275888133, + "grad_norm": 5.374088287353516, + "learning_rate": 4.7098470178228755e-07, + "logits/chosen": 0.3768664002418518, + "logits/rejected": 0.35334479808807373, + "logps/chosen": -82.2614517211914, + "logps/ref_chosen": -53.58171081542969, + "logps/ref_rejected": -70.01301574707031, + "logps/rejected": -118.45684814453125, + "loss": 1.2652, + "margin_dpo/margin_mean": 19.764087677001953, + "margin_dpo/margin_std": 29.50685691833496, + "step": 160 + }, + { + "epoch": 0.24338624338624337, + "grad_norm": 5.152878761291504, + "learning_rate": 4.703633408618955e-07, + "logits/chosen": 0.5939978957176208, + "logits/rejected": 0.5104175806045532, + "logps/chosen": -85.92366790771484, + "logps/ref_chosen": -51.761775970458984, + "logps/ref_rejected": -89.95056915283203, + "logps/rejected": -145.84454345703125, + "loss": 1.2587, + "margin_dpo/margin_mean": 21.732070922851562, + "margin_dpo/margin_std": 34.75413513183594, + "step": 161 + }, + { + "epoch": 0.24489795918367346, + "grad_norm": 6.838240146636963, + "learning_rate": 4.697358159051549e-07, + "logits/chosen": 0.5239239931106567, + "logits/rejected": 0.5126291513442993, + "logps/chosen": -148.87701416015625, + "logps/ref_chosen": -93.13358306884766, + "logps/ref_rejected": -96.1287841796875, + "logps/rejected": -170.743408203125, + "loss": 1.2403, + "margin_dpo/margin_mean": 18.871191024780273, + "margin_dpo/margin_std": 61.78876495361328, + "step": 162 + }, + { + "epoch": 0.24640967498110355, + "grad_norm": 5.787431240081787, + "learning_rate": 4.691021444652876e-07, + "logits/chosen": 0.4375625252723694, + "logits/rejected": 0.2801172137260437, + "logps/chosen": -79.89445495605469, + "logps/ref_chosen": -46.83258056640625, + "logps/ref_rejected": -108.36699676513672, + "logps/rejected": -170.7933807373047, + "loss": 1.2107, + "margin_dpo/margin_mean": 29.364521026611328, + "margin_dpo/margin_std": 36.74135208129883, + "step": 163 + }, + { + "epoch": 0.24792139077853365, + "grad_norm": 5.994203090667725, + "learning_rate": 4.6846234426744624e-07, + "logits/chosen": 0.490617573261261, + "logits/rejected": 0.4238327443599701, + "logps/chosen": -90.10145568847656, + "logps/ref_chosen": -54.36710739135742, + "logps/ref_rejected": -96.78005981445312, + "logps/rejected": -158.1606903076172, + "loss": 1.1904, + "margin_dpo/margin_mean": 25.646286010742188, + "margin_dpo/margin_std": 32.140342712402344, + "step": 164 + }, + { + "epoch": 0.2494331065759637, + "grad_norm": 5.857151508331299, + "learning_rate": 4.678164332082175e-07, + "logits/chosen": 0.6752983331680298, + "logits/rejected": 0.6979560852050781, + "logps/chosen": -97.69610595703125, + "logps/ref_chosen": -57.447242736816406, + "logps/ref_rejected": -57.388519287109375, + "logps/rejected": -110.6009750366211, + "loss": 1.2174, + "margin_dpo/margin_mean": 12.963600158691406, + "margin_dpo/margin_std": 32.28080749511719, + "step": 165 + }, + { + "epoch": 0.2509448223733938, + "grad_norm": 6.883800029754639, + "learning_rate": 4.6716442935512214e-07, + "logits/chosen": 0.6320142149925232, + "logits/rejected": 0.524321436882019, + "logps/chosen": -117.2408447265625, + "logps/ref_chosen": -75.21217346191406, + "logps/ref_rejected": -97.00743103027344, + "logps/rejected": -162.87738037109375, + "loss": 1.2198, + "margin_dpo/margin_mean": 23.84128189086914, + "margin_dpo/margin_std": 30.363832473754883, + "step": 166 + }, + { + "epoch": 0.25245653817082386, + "grad_norm": 7.713088512420654, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": 0.6184214949607849, + "logits/rejected": 0.5350244641304016, + "logps/chosen": -72.7265853881836, + "logps/ref_chosen": -39.998783111572266, + "logps/ref_rejected": -68.48226928710938, + "logps/rejected": -122.67631530761719, + "loss": 1.2041, + "margin_dpo/margin_mean": 21.466243743896484, + "margin_dpo/margin_std": 22.175495147705078, + "step": 167 + }, + { + "epoch": 0.25396825396825395, + "grad_norm": 6.5206685066223145, + "learning_rate": 4.6584221638904767e-07, + "logits/chosen": 0.46801936626434326, + "logits/rejected": 0.5133869647979736, + "logps/chosen": -126.92230224609375, + "logps/ref_chosen": -78.39299774169922, + "logps/ref_rejected": -56.804256439208984, + "logps/rejected": -111.1657485961914, + "loss": 1.2315, + "margin_dpo/margin_mean": 5.832178592681885, + "margin_dpo/margin_std": 34.10231399536133, + "step": 168 + }, + { + "epoch": 0.25547996976568405, + "grad_norm": 6.418181419372559, + "learning_rate": 4.651720442612075e-07, + "logits/chosen": 0.5367448329925537, + "logits/rejected": 0.49534183740615845, + "logps/chosen": -96.13229370117188, + "logps/ref_chosen": -61.65520477294922, + "logps/ref_rejected": -74.08130645751953, + "logps/rejected": -126.73211669921875, + "loss": 1.2526, + "margin_dpo/margin_mean": 18.173721313476562, + "margin_dpo/margin_std": 33.40047073364258, + "step": 169 + }, + { + "epoch": 0.25699168556311414, + "grad_norm": 6.973052501678467, + "learning_rate": 4.6449585330874425e-07, + "logits/chosen": 0.6434294581413269, + "logits/rejected": 0.6778239011764526, + "logps/chosen": -76.18331146240234, + "logps/ref_chosen": -48.08928680419922, + "logps/ref_rejected": -49.94983673095703, + "logps/rejected": -97.5601577758789, + "loss": 1.2333, + "margin_dpo/margin_mean": 19.516292572021484, + "margin_dpo/margin_std": 36.30491638183594, + "step": 170 + }, + { + "epoch": 0.2585034013605442, + "grad_norm": 7.348653316497803, + "learning_rate": 4.6381366244617224e-07, + "logits/chosen": 0.7886656522750854, + "logits/rejected": 0.7550907135009766, + "logps/chosen": -70.39274597167969, + "logps/ref_chosen": -49.932838439941406, + "logps/ref_rejected": -69.74531555175781, + "logps/rejected": -124.51280212402344, + "loss": 1.2094, + "margin_dpo/margin_mean": 34.307586669921875, + "margin_dpo/margin_std": 35.918819427490234, + "step": 171 + }, + { + "epoch": 0.2600151171579743, + "grad_norm": 8.536656379699707, + "learning_rate": 4.631254907558365e-07, + "logits/chosen": 0.8007753491401672, + "logits/rejected": 0.7138994932174683, + "logps/chosen": -96.61810302734375, + "logps/ref_chosen": -49.946144104003906, + "logps/ref_rejected": -93.07025146484375, + "logps/rejected": -180.4694366455078, + "loss": 1.1897, + "margin_dpo/margin_mean": 40.72722244262695, + "margin_dpo/margin_std": 49.622161865234375, + "step": 172 + }, + { + "epoch": 0.2615268329554044, + "grad_norm": 7.331446647644043, + "learning_rate": 4.624313574873786e-07, + "logits/chosen": 0.6109236478805542, + "logits/rejected": 0.6213595867156982, + "logps/chosen": -75.77210235595703, + "logps/ref_chosen": -49.288230895996094, + "logps/ref_rejected": -48.86689758300781, + "logps/rejected": -101.83216094970703, + "loss": 1.1658, + "margin_dpo/margin_mean": 26.481386184692383, + "margin_dpo/margin_std": 57.468788146972656, + "step": 173 + }, + { + "epoch": 0.26303854875283444, + "grad_norm": 8.760350227355957, + "learning_rate": 4.61731282057198e-07, + "logits/chosen": 0.6046967506408691, + "logits/rejected": 0.5037060379981995, + "logps/chosen": -112.89518737792969, + "logps/ref_chosen": -64.05992126464844, + "logps/ref_rejected": -93.19732666015625, + "logps/rejected": -186.00070190429688, + "loss": 1.1071, + "margin_dpo/margin_mean": 43.968109130859375, + "margin_dpo/margin_std": 35.23388671875, + "step": 174 + }, + { + "epoch": 0.26455026455026454, + "grad_norm": 9.087226867675781, + "learning_rate": 4.6102528404790965e-07, + "logits/chosen": 0.8991174697875977, + "logits/rejected": 0.8246195316314697, + "logps/chosen": -83.77995300292969, + "logps/ref_chosen": -40.91709899902344, + "logps/ref_rejected": -84.71610260009766, + "logps/rejected": -157.31451416015625, + "loss": 1.1565, + "margin_dpo/margin_mean": 29.735551834106445, + "margin_dpo/margin_std": 53.80352783203125, + "step": 175 + }, + { + "epoch": 0.2660619803476946, + "grad_norm": 13.09632396697998, + "learning_rate": 4.603133832077953e-07, + "logits/chosen": 0.6280794143676758, + "logits/rejected": 0.6072982549667358, + "logps/chosen": -153.69549560546875, + "logps/ref_chosen": -83.32914733886719, + "logps/ref_rejected": -69.77914428710938, + "logps/rejected": -140.9823455810547, + "loss": 1.2784, + "margin_dpo/margin_mean": 0.8368606567382812, + "margin_dpo/margin_std": 59.72896957397461, + "step": 176 + }, + { + "epoch": 0.2675736961451247, + "grad_norm": 11.76092529296875, + "learning_rate": 4.5959559945025183e-07, + "logits/chosen": 0.843967080116272, + "logits/rejected": 0.8128781318664551, + "logps/chosen": -91.85818481445312, + "logps/ref_chosen": -56.50011444091797, + "logps/ref_rejected": -76.53157806396484, + "logps/rejected": -166.71331787109375, + "loss": 1.008, + "margin_dpo/margin_mean": 54.82364273071289, + "margin_dpo/margin_std": 54.522438049316406, + "step": 177 + }, + { + "epoch": 0.2690854119425548, + "grad_norm": 12.708898544311523, + "learning_rate": 4.588719528532341e-07, + "logits/chosen": 0.7774707078933716, + "logits/rejected": 0.6817446351051331, + "logps/chosen": -82.52491760253906, + "logps/ref_chosen": -35.268131256103516, + "logps/ref_rejected": -70.45591735839844, + "logps/rejected": -159.62277221679688, + "loss": 1.1395, + "margin_dpo/margin_mean": 41.910072326660156, + "margin_dpo/margin_std": 58.84284973144531, + "step": 178 + }, + { + "epoch": 0.2705971277399849, + "grad_norm": 11.324702262878418, + "learning_rate": 4.581424636586928e-07, + "logits/chosen": 0.724172830581665, + "logits/rejected": 0.7666547298431396, + "logps/chosen": -149.95443725585938, + "logps/ref_chosen": -82.57086181640625, + "logps/ref_rejected": -75.7454605102539, + "logps/rejected": -170.7916717529297, + "loss": 1.1648, + "margin_dpo/margin_mean": 27.66265106201172, + "margin_dpo/margin_std": 52.755733489990234, + "step": 179 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 10.527196884155273, + "learning_rate": 4.5740715227200897e-07, + "logits/chosen": 0.6827090978622437, + "logits/rejected": 0.591106116771698, + "logps/chosen": -93.48646545410156, + "logps/ref_chosen": -49.27946472167969, + "logps/ref_rejected": -70.7477035522461, + "logps/rejected": -130.7356719970703, + "loss": 1.2205, + "margin_dpo/margin_mean": 15.780977249145508, + "margin_dpo/margin_std": 23.79660415649414, + "step": 180 + }, + { + "epoch": 0.273620559334845, + "grad_norm": 12.358736991882324, + "learning_rate": 4.566660392614228e-07, + "logits/chosen": 0.7833654880523682, + "logits/rejected": 0.7754448652267456, + "logps/chosen": -116.57708740234375, + "logps/ref_chosen": -68.44458770751953, + "logps/ref_rejected": -75.00827026367188, + "logps/rejected": -140.25169372558594, + "loss": 1.0862, + "margin_dpo/margin_mean": 17.11092185974121, + "margin_dpo/margin_std": 26.67954444885254, + "step": 181 + }, + { + "epoch": 0.2751322751322751, + "grad_norm": 11.587776184082031, + "learning_rate": 4.5591914535745817e-07, + "logits/chosen": 0.7390822768211365, + "logits/rejected": 0.6326345205307007, + "logps/chosen": -106.85360717773438, + "logps/ref_chosen": -53.30392074584961, + "logps/ref_rejected": -106.17438507080078, + "logps/rejected": -206.2800750732422, + "loss": 1.0938, + "margin_dpo/margin_mean": 46.55598449707031, + "margin_dpo/margin_std": 62.35004425048828, + "step": 182 + }, + { + "epoch": 0.2766439909297052, + "grad_norm": 14.335674285888672, + "learning_rate": 4.551664914523433e-07, + "logits/chosen": 0.6751140356063843, + "logits/rejected": 0.6843445301055908, + "logps/chosen": -129.42478942871094, + "logps/ref_chosen": -61.0320930480957, + "logps/ref_rejected": -64.14570617675781, + "logps/rejected": -138.19187927246094, + "loss": 1.3218, + "margin_dpo/margin_mean": 5.653476238250732, + "margin_dpo/margin_std": 64.97230529785156, + "step": 183 + }, + { + "epoch": 0.2781557067271353, + "grad_norm": 14.724100112915039, + "learning_rate": 4.544080985994258e-07, + "logits/chosen": 0.8014061450958252, + "logits/rejected": 0.7991318702697754, + "logps/chosen": -102.4467544555664, + "logps/ref_chosen": -58.40687561035156, + "logps/ref_rejected": -65.13507080078125, + "logps/rejected": -135.5673828125, + "loss": 1.1217, + "margin_dpo/margin_mean": 26.392431259155273, + "margin_dpo/margin_std": 42.3356819152832, + "step": 184 + }, + { + "epoch": 0.2796674225245654, + "grad_norm": 13.151495933532715, + "learning_rate": 4.5364398801258394e-07, + "logits/chosen": 0.8872311115264893, + "logits/rejected": 0.8844795227050781, + "logps/chosen": -117.208984375, + "logps/ref_chosen": -68.01717376708984, + "logps/ref_rejected": -68.56169128417969, + "logps/rejected": -155.34658813476562, + "loss": 1.1561, + "margin_dpo/margin_mean": 37.59309387207031, + "margin_dpo/margin_std": 63.59172058105469, + "step": 185 + }, + { + "epoch": 0.2811791383219955, + "grad_norm": 18.820205688476562, + "learning_rate": 4.5287418106563354e-07, + "logits/chosen": 0.7388228178024292, + "logits/rejected": 0.6680508255958557, + "logps/chosen": -118.04601287841797, + "logps/ref_chosen": -66.14515686035156, + "logps/ref_rejected": -101.60063934326172, + "logps/rejected": -203.31298828125, + "loss": 1.1578, + "margin_dpo/margin_mean": 49.811492919921875, + "margin_dpo/margin_std": 85.83422088623047, + "step": 186 + }, + { + "epoch": 0.28269085411942557, + "grad_norm": 13.48037052154541, + "learning_rate": 4.520986992917297e-07, + "logits/chosen": 0.6655247211456299, + "logits/rejected": 0.5620222687721252, + "logps/chosen": -162.19210815429688, + "logps/ref_chosen": -80.47019958496094, + "logps/ref_rejected": -118.81498718261719, + "logps/rejected": -255.6341094970703, + "loss": 1.1797, + "margin_dpo/margin_mean": 55.09719467163086, + "margin_dpo/margin_std": 82.15184020996094, + "step": 187 + }, + { + "epoch": 0.2842025699168556, + "grad_norm": 12.772809982299805, + "learning_rate": 4.5131756438276466e-07, + "logits/chosen": 0.8540096282958984, + "logits/rejected": 0.7868589758872986, + "logps/chosen": -118.0152587890625, + "logps/ref_chosen": -66.75248718261719, + "logps/ref_rejected": -97.99404907226562, + "logps/rejected": -214.93295288085938, + "loss": 1.1456, + "margin_dpo/margin_mean": 65.6761474609375, + "margin_dpo/margin_std": 84.41012573242188, + "step": 188 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 19.746803283691406, + "learning_rate": 4.5053079818876096e-07, + "logits/chosen": 0.742944598197937, + "logits/rejected": 0.8294093012809753, + "logps/chosen": -144.683349609375, + "logps/ref_chosen": -75.2247314453125, + "logps/ref_rejected": -72.38041687011719, + "logps/rejected": -162.834228515625, + "loss": 1.3007, + "margin_dpo/margin_mean": 20.995182037353516, + "margin_dpo/margin_std": 69.88140869140625, + "step": 189 + }, + { + "epoch": 0.2872260015117158, + "grad_norm": 15.231517791748047, + "learning_rate": 4.4973842271726024e-07, + "logits/chosen": 0.8356347680091858, + "logits/rejected": 0.7419763803482056, + "logps/chosen": -101.99494934082031, + "logps/ref_chosen": -47.77008056640625, + "logps/ref_rejected": -84.10618591308594, + "logps/rejected": -201.740966796875, + "loss": 0.9599, + "margin_dpo/margin_mean": 63.4099235534668, + "margin_dpo/margin_std": 64.51853942871094, + "step": 190 + }, + { + "epoch": 0.2887377173091459, + "grad_norm": 20.671388626098633, + "learning_rate": 4.48940460132708e-07, + "logits/chosen": 0.8784452676773071, + "logits/rejected": 0.8466886281967163, + "logps/chosen": -134.3704833984375, + "logps/ref_chosen": -66.10474395751953, + "logps/ref_rejected": -79.35490417480469, + "logps/rejected": -206.15818786621094, + "loss": 1.2401, + "margin_dpo/margin_mean": 58.53753662109375, + "margin_dpo/margin_std": 76.90192413330078, + "step": 191 + }, + { + "epoch": 0.29024943310657597, + "grad_norm": 21.7989559173584, + "learning_rate": 4.481369327558329e-07, + "logits/chosen": 0.8773584961891174, + "logits/rejected": 0.8835272789001465, + "logps/chosen": -154.4402618408203, + "logps/ref_chosen": -61.76245880126953, + "logps/ref_rejected": -60.038848876953125, + "logps/rejected": -150.0125732421875, + "loss": 1.279, + "margin_dpo/margin_mean": -2.704071521759033, + "margin_dpo/margin_std": 65.4942626953125, + "step": 192 + }, + { + "epoch": 0.29176114890400606, + "grad_norm": 13.274942398071289, + "learning_rate": 4.47327863063023e-07, + "logits/chosen": 0.7359751462936401, + "logits/rejected": 0.6782011985778809, + "logps/chosen": -131.29852294921875, + "logps/ref_chosen": -50.086849212646484, + "logps/ref_rejected": -69.65550231933594, + "logps/rejected": -187.8815460205078, + "loss": 1.1063, + "margin_dpo/margin_mean": 37.01435089111328, + "margin_dpo/margin_std": 56.66736602783203, + "step": 193 + }, + { + "epoch": 0.29327286470143615, + "grad_norm": 21.689014434814453, + "learning_rate": 4.4651327368569684e-07, + "logits/chosen": 0.7673474550247192, + "logits/rejected": 0.8195520639419556, + "logps/chosen": -152.31005859375, + "logps/ref_chosen": -72.05107116699219, + "logps/ref_rejected": -71.8668212890625, + "logps/rejected": -168.17160034179688, + "loss": 1.2219, + "margin_dpo/margin_mean": 16.045808792114258, + "margin_dpo/margin_std": 65.90227508544922, + "step": 194 + }, + { + "epoch": 0.2947845804988662, + "grad_norm": 19.105045318603516, + "learning_rate": 4.4569318740967043e-07, + "logits/chosen": 0.6917105317115784, + "logits/rejected": 0.761000394821167, + "logps/chosen": -192.57481384277344, + "logps/ref_chosen": -83.29867553710938, + "logps/ref_rejected": -66.07734680175781, + "logps/rejected": -193.638671875, + "loss": 1.179, + "margin_dpo/margin_mean": 18.285186767578125, + "margin_dpo/margin_std": 70.0030288696289, + "step": 195 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 14.678657531738281, + "learning_rate": 4.448676271745197e-07, + "logits/chosen": 0.8080000877380371, + "logits/rejected": 0.8198258876800537, + "logps/chosen": -177.1346435546875, + "logps/ref_chosen": -81.20787048339844, + "logps/ref_rejected": -79.35533142089844, + "logps/rejected": -194.04977416992188, + "loss": 1.1227, + "margin_dpo/margin_mean": 18.76766014099121, + "margin_dpo/margin_std": 53.9957160949707, + "step": 196 + }, + { + "epoch": 0.29780801209372637, + "grad_norm": 20.904054641723633, + "learning_rate": 4.440366160729392e-07, + "logits/chosen": 1.08381986618042, + "logits/rejected": 0.9524755477905273, + "logps/chosen": -121.47453308105469, + "logps/ref_chosen": -38.60869598388672, + "logps/ref_rejected": -74.34709167480469, + "logps/rejected": -196.74826049804688, + "loss": 1.1447, + "margin_dpo/margin_mean": 39.53533935546875, + "margin_dpo/margin_std": 104.43431091308594, + "step": 197 + }, + { + "epoch": 0.29931972789115646, + "grad_norm": 18.375619888305664, + "learning_rate": 4.432001773500957e-07, + "logits/chosen": 1.0280174016952515, + "logits/rejected": 0.9963433742523193, + "logps/chosen": -113.64097595214844, + "logps/ref_chosen": -48.90552520751953, + "logps/ref_rejected": -63.93467712402344, + "logps/rejected": -166.58355712890625, + "loss": 1.1247, + "margin_dpo/margin_mean": 37.91343307495117, + "margin_dpo/margin_std": 68.63130187988281, + "step": 198 + }, + { + "epoch": 0.30083144368858655, + "grad_norm": 13.685064315795898, + "learning_rate": 4.4235833440297856e-07, + "logits/chosen": 0.8831381797790527, + "logits/rejected": 0.8222548961639404, + "logps/chosen": -129.5958709716797, + "logps/ref_chosen": -57.552146911621094, + "logps/ref_rejected": -76.73622131347656, + "logps/rejected": -196.77474975585938, + "loss": 1.1776, + "margin_dpo/margin_mean": 47.994808197021484, + "margin_dpo/margin_std": 66.5357437133789, + "step": 199 + }, + { + "epoch": 0.30234315948601664, + "grad_norm": 13.301373481750488, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": 0.9643712043762207, + "logits/rejected": 0.7956128120422363, + "logps/chosen": -104.0108413696289, + "logps/ref_chosen": -49.7380485534668, + "logps/ref_rejected": -119.85460662841797, + "logps/rejected": -247.071533203125, + "loss": 1.0529, + "margin_dpo/margin_mean": 72.94413757324219, + "margin_dpo/margin_std": 100.05728149414062, + "step": 200 + }, + { + "epoch": 0.30385487528344673, + "grad_norm": 14.938904762268066, + "learning_rate": 4.4065853017905953e-07, + "logits/chosen": 0.9494801759719849, + "logits/rejected": 0.963019847869873, + "logps/chosen": -168.0663604736328, + "logps/ref_chosen": -76.25155639648438, + "logps/ref_rejected": -75.89337921142578, + "logps/rejected": -206.00071716308594, + "loss": 1.1877, + "margin_dpo/margin_mean": 38.292537689208984, + "margin_dpo/margin_std": 74.59246063232422, + "step": 201 + }, + { + "epoch": 0.30536659108087677, + "grad_norm": 19.734487533569336, + "learning_rate": 4.3980061644943575e-07, + "logits/chosen": 0.8432959318161011, + "logits/rejected": 0.6981616020202637, + "logps/chosen": -75.70082092285156, + "logps/ref_chosen": -31.3531551361084, + "logps/ref_rejected": -70.96551513671875, + "logps/rejected": -184.80126953125, + "loss": 1.0936, + "margin_dpo/margin_mean": 69.48809051513672, + "margin_dpo/margin_std": 59.054290771484375, + "step": 202 + }, + { + "epoch": 0.30687830687830686, + "grad_norm": 17.070323944091797, + "learning_rate": 4.3893739358856455e-07, + "logits/chosen": 0.9048564434051514, + "logits/rejected": 0.7673661708831787, + "logps/chosen": -144.82659912109375, + "logps/ref_chosen": -63.048912048339844, + "logps/ref_rejected": -137.18235778808594, + "logps/rejected": -288.850341796875, + "loss": 1.0973, + "margin_dpo/margin_mean": 69.89026641845703, + "margin_dpo/margin_std": 88.3589096069336, + "step": 203 + }, + { + "epoch": 0.30839002267573695, + "grad_norm": 14.029024124145508, + "learning_rate": 4.380688857426449e-07, + "logits/chosen": 0.8778685331344604, + "logits/rejected": 0.7175527811050415, + "logps/chosen": -111.14114379882812, + "logps/ref_chosen": -45.349220275878906, + "logps/ref_rejected": -89.06627655029297, + "logps/rejected": -215.51800537109375, + "loss": 1.0848, + "margin_dpo/margin_mean": 60.6597900390625, + "margin_dpo/margin_std": 76.72471618652344, + "step": 204 + }, + { + "epoch": 0.30990173847316704, + "grad_norm": 15.573190689086914, + "learning_rate": 4.3719511720570814e-07, + "logits/chosen": 1.0194439888000488, + "logits/rejected": 0.9591749310493469, + "logps/chosen": -138.189697265625, + "logps/ref_chosen": -68.1277847290039, + "logps/ref_rejected": -94.7017593383789, + "logps/rejected": -237.84596252441406, + "loss": 1.1396, + "margin_dpo/margin_mean": 73.08229064941406, + "margin_dpo/margin_std": 103.01658630371094, + "step": 205 + }, + { + "epoch": 0.31141345427059713, + "grad_norm": 21.639747619628906, + "learning_rate": 4.363161124189387e-07, + "logits/chosen": 1.0190932750701904, + "logits/rejected": 0.9483546614646912, + "logps/chosen": -123.68317413330078, + "logps/ref_chosen": -49.561851501464844, + "logps/ref_rejected": -91.47630310058594, + "logps/rejected": -198.70071411132812, + "loss": 1.2179, + "margin_dpo/margin_mean": 33.103092193603516, + "margin_dpo/margin_std": 57.71031951904297, + "step": 206 + }, + { + "epoch": 0.3129251700680272, + "grad_norm": 17.420026779174805, + "learning_rate": 4.3543189596998986e-07, + "logits/chosen": 0.9188834428787231, + "logits/rejected": 0.8394033908843994, + "logps/chosen": -133.870361328125, + "logps/ref_chosen": -39.89246368408203, + "logps/ref_rejected": -70.57868194580078, + "logps/rejected": -204.41519165039062, + "loss": 1.14, + "margin_dpo/margin_mean": 39.858604431152344, + "margin_dpo/margin_std": 71.88990783691406, + "step": 207 + }, + { + "epoch": 0.3144368858654573, + "grad_norm": 16.912046432495117, + "learning_rate": 4.3454249259229664e-07, + "logits/chosen": 0.8252842426300049, + "logits/rejected": 0.7790793776512146, + "logps/chosen": -161.83831787109375, + "logps/ref_chosen": -72.9476547241211, + "logps/ref_rejected": -94.71766662597656, + "logps/rejected": -216.20074462890625, + "loss": 1.31, + "margin_dpo/margin_mean": 32.592403411865234, + "margin_dpo/margin_std": 57.00579071044922, + "step": 208 + }, + { + "epoch": 0.31594860166288735, + "grad_norm": 25.769882202148438, + "learning_rate": 4.336479271643833e-07, + "logits/chosen": 0.9054467678070068, + "logits/rejected": 0.831355631351471, + "logps/chosen": -154.79385375976562, + "logps/ref_chosen": -65.3952865600586, + "logps/ref_rejected": -94.79216003417969, + "logps/rejected": -261.7325439453125, + "loss": 1.166, + "margin_dpo/margin_mean": 77.54179382324219, + "margin_dpo/margin_std": 97.3060073852539, + "step": 209 + }, + { + "epoch": 0.31746031746031744, + "grad_norm": 12.30976676940918, + "learning_rate": 4.327482247091679e-07, + "logits/chosen": 0.8562976121902466, + "logits/rejected": 0.6786376237869263, + "logps/chosen": -197.23080444335938, + "logps/ref_chosen": -88.16167449951172, + "logps/ref_rejected": -143.62713623046875, + "logps/rejected": -364.16387939453125, + "loss": 1.0647, + "margin_dpo/margin_mean": 111.46759033203125, + "margin_dpo/margin_std": 77.64178466796875, + "step": 210 + }, + { + "epoch": 0.31897203325774753, + "grad_norm": 16.888795852661133, + "learning_rate": 4.3184341039326217e-07, + "logits/chosen": 1.019036054611206, + "logits/rejected": 0.9224546551704407, + "logps/chosen": -116.31884765625, + "logps/ref_chosen": -43.544952392578125, + "logps/ref_rejected": -96.58848571777344, + "logps/rejected": -221.36495971679688, + "loss": 1.0403, + "margin_dpo/margin_mean": 52.002593994140625, + "margin_dpo/margin_std": 71.8609390258789, + "step": 211 + }, + { + "epoch": 0.3204837490551776, + "grad_norm": 14.35758113861084, + "learning_rate": 4.309335095262675e-07, + "logits/chosen": 0.9997249841690063, + "logits/rejected": 1.0045267343521118, + "logps/chosen": -140.09591674804688, + "logps/ref_chosen": -57.794212341308594, + "logps/ref_rejected": -72.26956176757812, + "logps/rejected": -212.42254638671875, + "loss": 1.0802, + "margin_dpo/margin_mean": 57.851280212402344, + "margin_dpo/margin_std": 110.18971252441406, + "step": 212 + }, + { + "epoch": 0.3219954648526077, + "grad_norm": 15.879948616027832, + "learning_rate": 4.3001854756006724e-07, + "logits/chosen": 0.8429861664772034, + "logits/rejected": 0.8865307569503784, + "logps/chosen": -176.1222381591797, + "logps/ref_chosen": -81.40860748291016, + "logps/ref_rejected": -64.03448486328125, + "logps/rejected": -180.094970703125, + "loss": 1.1033, + "margin_dpo/margin_mean": 21.346853256225586, + "margin_dpo/margin_std": 100.63861083984375, + "step": 213 + }, + { + "epoch": 0.3235071806500378, + "grad_norm": 18.610511779785156, + "learning_rate": 4.290985500881143e-07, + "logits/chosen": 0.7390300035476685, + "logits/rejected": 0.8046228885650635, + "logps/chosen": -145.5377655029297, + "logps/ref_chosen": -63.65519714355469, + "logps/ref_rejected": -61.175392150878906, + "logps/rejected": -205.8516845703125, + "loss": 1.1949, + "margin_dpo/margin_mean": 62.793731689453125, + "margin_dpo/margin_std": 71.61257934570312, + "step": 214 + }, + { + "epoch": 0.3250188964474679, + "grad_norm": 15.646477699279785, + "learning_rate": 4.281735428447157e-07, + "logits/chosen": 0.8204779624938965, + "logits/rejected": 0.7469308376312256, + "logps/chosen": -167.19033813476562, + "logps/ref_chosen": -59.09471893310547, + "logps/ref_rejected": -93.95791625976562, + "logps/rejected": -225.2965087890625, + "loss": 1.0742, + "margin_dpo/margin_mean": 23.242979049682617, + "margin_dpo/margin_std": 72.47611999511719, + "step": 215 + }, + { + "epoch": 0.32653061224489793, + "grad_norm": 25.667827606201172, + "learning_rate": 4.2724355170431247e-07, + "logits/chosen": 0.8592058420181274, + "logits/rejected": 0.8133991360664368, + "logps/chosen": -183.30780029296875, + "logps/ref_chosen": -88.48361206054688, + "logps/ref_rejected": -95.00409698486328, + "logps/rejected": -257.6964111328125, + "loss": 0.9971, + "margin_dpo/margin_mean": 67.86811065673828, + "margin_dpo/margin_std": 59.22956466674805, + "step": 216 + }, + { + "epoch": 0.328042328042328, + "grad_norm": 13.31557559967041, + "learning_rate": 4.26308602680756e-07, + "logits/chosen": 0.9996564388275146, + "logits/rejected": 0.8627007007598877, + "logps/chosen": -174.81881713867188, + "logps/ref_chosen": -62.363914489746094, + "logps/ref_rejected": -104.9856948852539, + "logps/rejected": -284.2158203125, + "loss": 1.0242, + "margin_dpo/margin_mean": 66.77523803710938, + "margin_dpo/margin_std": 126.6890869140625, + "step": 217 + }, + { + "epoch": 0.3295540438397581, + "grad_norm": 18.151487350463867, + "learning_rate": 4.253687219265803e-07, + "logits/chosen": 0.7707165479660034, + "logits/rejected": 0.7527076005935669, + "logps/chosen": -201.19351196289062, + "logps/ref_chosen": -87.87580871582031, + "logps/ref_rejected": -102.97655487060547, + "logps/rejected": -231.63278198242188, + "loss": 1.2726, + "margin_dpo/margin_mean": 15.338525772094727, + "margin_dpo/margin_std": 67.2188720703125, + "step": 218 + }, + { + "epoch": 0.3310657596371882, + "grad_norm": 14.18816089630127, + "learning_rate": 4.2442393573227043e-07, + "logits/chosen": 0.9285779595375061, + "logits/rejected": 0.9085969924926758, + "logps/chosen": -155.8358154296875, + "logps/ref_chosen": -58.567657470703125, + "logps/ref_rejected": -94.29285430908203, + "logps/rejected": -231.346923828125, + "loss": 1.1618, + "margin_dpo/margin_mean": 39.78589630126953, + "margin_dpo/margin_std": 81.78353881835938, + "step": 219 + }, + { + "epoch": 0.3325774754346183, + "grad_norm": 13.471064567565918, + "learning_rate": 4.234742705255272e-07, + "logits/chosen": 0.8746180534362793, + "logits/rejected": 0.7802847623825073, + "logps/chosen": -130.3991241455078, + "logps/ref_chosen": -45.8538703918457, + "logps/ref_rejected": -76.9227066040039, + "logps/rejected": -201.6156005859375, + "loss": 1.0699, + "margin_dpo/margin_mean": 40.14763641357422, + "margin_dpo/margin_std": 70.33467102050781, + "step": 220 + }, + { + "epoch": 0.3340891912320484, + "grad_norm": 13.25949478149414, + "learning_rate": 4.22519752870528e-07, + "logits/chosen": 0.8579016923904419, + "logits/rejected": 0.7474366426467896, + "logps/chosen": -150.62062072753906, + "logps/ref_chosen": -55.79085922241211, + "logps/ref_rejected": -81.47898864746094, + "logps/rejected": -204.39466857910156, + "loss": 1.0478, + "margin_dpo/margin_mean": 28.085922241210938, + "margin_dpo/margin_std": 60.90304946899414, + "step": 221 + }, + { + "epoch": 0.3356009070294785, + "grad_norm": 11.687209129333496, + "learning_rate": 4.2156040946718343e-07, + "logits/chosen": 0.9172873497009277, + "logits/rejected": 0.8860268592834473, + "logps/chosen": -135.50936889648438, + "logps/ref_chosen": -51.75988006591797, + "logps/ref_rejected": -97.38671875, + "logps/rejected": -218.99237060546875, + "loss": 0.9846, + "margin_dpo/margin_mean": 37.856170654296875, + "margin_dpo/margin_std": 90.37283325195312, + "step": 222 + }, + { + "epoch": 0.3371126228269085, + "grad_norm": 12.334152221679688, + "learning_rate": 4.2059626715039065e-07, + "logits/chosen": 0.8898248672485352, + "logits/rejected": 0.8581580519676208, + "logps/chosen": -131.7398681640625, + "logps/ref_chosen": -60.334190368652344, + "logps/ref_rejected": -67.8521728515625, + "logps/rejected": -187.02847290039062, + "loss": 1.0231, + "margin_dpo/margin_mean": 47.77062225341797, + "margin_dpo/margin_std": 67.18444061279297, + "step": 223 + }, + { + "epoch": 0.3386243386243386, + "grad_norm": 13.227733612060547, + "learning_rate": 4.1962735288928304e-07, + "logits/chosen": 1.0880929231643677, + "logits/rejected": 1.0430279970169067, + "logps/chosen": -98.79350280761719, + "logps/ref_chosen": -36.07902908325195, + "logps/ref_rejected": -63.69470977783203, + "logps/rejected": -191.50698852539062, + "loss": 1.3037, + "margin_dpo/margin_mean": 65.09780883789062, + "margin_dpo/margin_std": 61.339786529541016, + "step": 224 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 13.988600730895996, + "learning_rate": 4.186536937864752e-07, + "logits/chosen": 0.8999257683753967, + "logits/rejected": 0.7167999744415283, + "logps/chosen": -125.11752319335938, + "logps/ref_chosen": -46.84956359863281, + "logps/ref_rejected": -112.8951416015625, + "logps/rejected": -255.221435546875, + "loss": 1.0097, + "margin_dpo/margin_mean": 64.05833435058594, + "margin_dpo/margin_std": 75.08706665039062, + "step": 225 + }, + { + "epoch": 0.3416477702191988, + "grad_norm": 11.088345527648926, + "learning_rate": 4.176753170773052e-07, + "logits/chosen": 0.9160268306732178, + "logits/rejected": 0.8631049990653992, + "logps/chosen": -130.91664123535156, + "logps/ref_chosen": -44.67559814453125, + "logps/ref_rejected": -63.792022705078125, + "logps/rejected": -193.21331787109375, + "loss": 1.1595, + "margin_dpo/margin_mean": 43.18025207519531, + "margin_dpo/margin_std": 66.46514892578125, + "step": 226 + }, + { + "epoch": 0.3431594860166289, + "grad_norm": 13.128989219665527, + "learning_rate": 4.166922501290729e-07, + "logits/chosen": 0.8024705648422241, + "logits/rejected": 0.7597426772117615, + "logps/chosen": -116.00537109375, + "logps/ref_chosen": -47.16712188720703, + "logps/ref_rejected": -63.74501419067383, + "logps/rejected": -188.3765106201172, + "loss": 1.1549, + "margin_dpo/margin_mean": 55.79324722290039, + "margin_dpo/margin_std": 75.62820434570312, + "step": 227 + }, + { + "epoch": 0.34467120181405897, + "grad_norm": 18.30823516845703, + "learning_rate": 4.1570452044027405e-07, + "logits/chosen": 0.9423525333404541, + "logits/rejected": 0.9120630621910095, + "logps/chosen": -164.05587768554688, + "logps/ref_chosen": -63.36100387573242, + "logps/ref_rejected": -80.64863586425781, + "logps/rejected": -217.6392364501953, + "loss": 1.0638, + "margin_dpo/margin_mean": 36.29573059082031, + "margin_dpo/margin_std": 62.555362701416016, + "step": 228 + }, + { + "epoch": 0.34618291761148906, + "grad_norm": 15.304150581359863, + "learning_rate": 4.147121556398312e-07, + "logits/chosen": 1.0203680992126465, + "logits/rejected": 0.9438357353210449, + "logps/chosen": -110.92491149902344, + "logps/ref_chosen": -46.663169860839844, + "logps/ref_rejected": -88.81582641601562, + "logps/rejected": -221.1804962158203, + "loss": 1.1233, + "margin_dpo/margin_mean": 68.10292053222656, + "margin_dpo/margin_std": 46.62117004394531, + "step": 229 + }, + { + "epoch": 0.3476946334089191, + "grad_norm": 17.100791931152344, + "learning_rate": 4.137151834863213e-07, + "logits/chosen": 0.8101639151573181, + "logits/rejected": 0.9272513389587402, + "logps/chosen": -166.52886962890625, + "logps/ref_chosen": -74.076171875, + "logps/ref_rejected": -60.03583526611328, + "logps/rejected": -188.06703186035156, + "loss": 1.1756, + "margin_dpo/margin_mean": 35.5784912109375, + "margin_dpo/margin_std": 85.3873519897461, + "step": 230 + }, + { + "epoch": 0.3492063492063492, + "grad_norm": 15.612849235534668, + "learning_rate": 4.1271363186719835e-07, + "logits/chosen": 0.7588058710098267, + "logits/rejected": 0.6893700957298279, + "logps/chosen": -156.947998046875, + "logps/ref_chosen": -60.65083312988281, + "logps/ref_rejected": -88.65960693359375, + "logps/rejected": -248.28106689453125, + "loss": 0.9731, + "margin_dpo/margin_mean": 63.32429504394531, + "margin_dpo/margin_std": 63.744873046875, + "step": 231 + }, + { + "epoch": 0.3507180650037793, + "grad_norm": 16.46584701538086, + "learning_rate": 4.1170752879801436e-07, + "logits/chosen": 0.8945074081420898, + "logits/rejected": 0.9332794547080994, + "logps/chosen": -147.59791564941406, + "logps/ref_chosen": -69.09077453613281, + "logps/ref_rejected": -57.635154724121094, + "logps/rejected": -174.9073486328125, + "loss": 1.1969, + "margin_dpo/margin_mean": 38.76505661010742, + "margin_dpo/margin_std": 74.07946014404297, + "step": 232 + }, + { + "epoch": 0.35222978080120937, + "grad_norm": 17.694772720336914, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": 1.0592719316482544, + "logits/rejected": 0.9124851226806641, + "logps/chosen": -154.83291625976562, + "logps/ref_chosen": -47.50149917602539, + "logps/ref_rejected": -65.11629486083984, + "logps/rejected": -212.47425842285156, + "loss": 1.1982, + "margin_dpo/margin_mean": 40.02655792236328, + "margin_dpo/margin_std": 79.98373413085938, + "step": 233 + }, + { + "epoch": 0.35374149659863946, + "grad_norm": 15.55416488647461, + "learning_rate": 4.09681781007452e-07, + "logits/chosen": 0.6289401650428772, + "logits/rejected": 0.6006139516830444, + "logps/chosen": -184.160888671875, + "logps/ref_chosen": -71.29449462890625, + "logps/ref_rejected": -75.5903091430664, + "logps/rejected": -222.56118774414062, + "loss": 1.1917, + "margin_dpo/margin_mean": 34.104488372802734, + "margin_dpo/margin_std": 52.7165412902832, + "step": 234 + }, + { + "epoch": 0.35525321239606955, + "grad_norm": 12.542901039123535, + "learning_rate": 4.08662192950594e-07, + "logits/chosen": 0.8517894744873047, + "logits/rejected": 0.8705936670303345, + "logps/chosen": -171.2367401123047, + "logps/ref_chosen": -85.16849517822266, + "logps/ref_rejected": -84.92215728759766, + "logps/rejected": -236.8095703125, + "loss": 1.0258, + "margin_dpo/margin_mean": 65.81916809082031, + "margin_dpo/margin_std": 52.77665710449219, + "step": 235 + }, + { + "epoch": 0.35676492819349964, + "grad_norm": 14.79189682006836, + "learning_rate": 4.076381667711306e-07, + "logits/chosen": 0.9688406586647034, + "logits/rejected": 0.9256927967071533, + "logps/chosen": -192.56378173828125, + "logps/ref_chosen": -79.2076416015625, + "logps/ref_rejected": -100.40895080566406, + "logps/rejected": -259.85693359375, + "loss": 1.1711, + "margin_dpo/margin_mean": 46.09184265136719, + "margin_dpo/margin_std": 55.658546447753906, + "step": 236 + }, + { + "epoch": 0.35827664399092973, + "grad_norm": 18.867671966552734, + "learning_rate": 4.066097311132753e-07, + "logits/chosen": 0.8245556354522705, + "logits/rejected": 0.794967770576477, + "logps/chosen": -178.4556121826172, + "logps/ref_chosen": -75.55789184570312, + "logps/ref_rejected": -86.2087631225586, + "logps/rejected": -248.25030517578125, + "loss": 1.0857, + "margin_dpo/margin_mean": 59.14380645751953, + "margin_dpo/margin_std": 55.71824645996094, + "step": 237 + }, + { + "epoch": 0.35978835978835977, + "grad_norm": 16.57659339904785, + "learning_rate": 4.0557691474458414e-07, + "logits/chosen": 0.8863713145256042, + "logits/rejected": 0.8904386758804321, + "logps/chosen": -154.5315399169922, + "logps/ref_chosen": -59.983848571777344, + "logps/ref_rejected": -59.57722854614258, + "logps/rejected": -193.907958984375, + "loss": 1.1075, + "margin_dpo/margin_mean": 39.78303527832031, + "margin_dpo/margin_std": 73.26469421386719, + "step": 238 + }, + { + "epoch": 0.36130007558578986, + "grad_norm": 21.753231048583984, + "learning_rate": 4.045397465551513e-07, + "logits/chosen": 0.9004707336425781, + "logits/rejected": 0.8397661447525024, + "logps/chosen": -204.30355834960938, + "logps/ref_chosen": -71.07061767578125, + "logps/ref_rejected": -99.01707458496094, + "logps/rejected": -268.3192443847656, + "loss": 1.0612, + "margin_dpo/margin_mean": 36.0692138671875, + "margin_dpo/margin_std": 65.23834228515625, + "step": 239 + }, + { + "epoch": 0.36281179138321995, + "grad_norm": 14.38486099243164, + "learning_rate": 4.0349825555680045e-07, + "logits/chosen": 0.9007062911987305, + "logits/rejected": 0.8683948516845703, + "logps/chosen": -195.65853881835938, + "logps/ref_chosen": -62.13431930541992, + "logps/ref_rejected": -94.44198608398438, + "logps/rejected": -284.7762451171875, + "loss": 1.0184, + "margin_dpo/margin_mean": 56.8100471496582, + "margin_dpo/margin_std": 98.01765441894531, + "step": 240 + }, + { + "epoch": 0.36432350718065004, + "grad_norm": 13.766716003417969, + "learning_rate": 4.0245247088227377e-07, + "logits/chosen": 0.8529886603355408, + "logits/rejected": 0.8130964040756226, + "logps/chosen": -183.49896240234375, + "logps/ref_chosen": -64.64911651611328, + "logps/ref_rejected": -76.52389526367188, + "logps/rejected": -236.65924072265625, + "loss": 1.1746, + "margin_dpo/margin_mean": 41.285484313964844, + "margin_dpo/margin_std": 58.063209533691406, + "step": 241 + }, + { + "epoch": 0.36583522297808013, + "grad_norm": 12.187637329101562, + "learning_rate": 4.0140242178441665e-07, + "logits/chosen": 0.8713346719741821, + "logits/rejected": 0.8219842910766602, + "logps/chosen": -147.36550903320312, + "logps/ref_chosen": -50.54002380371094, + "logps/ref_rejected": -72.02078247070312, + "logps/rejected": -221.0113067626953, + "loss": 1.0658, + "margin_dpo/margin_mean": 52.1650505065918, + "margin_dpo/margin_std": 68.18669891357422, + "step": 242 + }, + { + "epoch": 0.3673469387755102, + "grad_norm": 15.1566801071167, + "learning_rate": 4.003481376353596e-07, + "logits/chosen": 0.9196364879608154, + "logits/rejected": 0.9491422176361084, + "logps/chosen": -203.58029174804688, + "logps/ref_chosen": -89.10255432128906, + "logps/ref_rejected": -77.90412902832031, + "logps/rejected": -224.44149780273438, + "loss": 1.1345, + "margin_dpo/margin_mean": 32.059627532958984, + "margin_dpo/margin_std": 67.19066619873047, + "step": 243 + }, + { + "epoch": 0.3688586545729403, + "grad_norm": 16.03683090209961, + "learning_rate": 3.9928964792569654e-07, + "logits/chosen": 0.8897314071655273, + "logits/rejected": 0.8393882513046265, + "logps/chosen": -187.6811981201172, + "logps/ref_chosen": -66.64555358886719, + "logps/ref_rejected": -92.27547454833984, + "logps/rejected": -261.2939453125, + "loss": 0.9875, + "margin_dpo/margin_mean": 47.982826232910156, + "margin_dpo/margin_std": 67.17561340332031, + "step": 244 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 12.031768798828125, + "learning_rate": 3.982269822636601e-07, + "logits/chosen": 0.9159705638885498, + "logits/rejected": 0.9142988324165344, + "logps/chosen": -185.25064086914062, + "logps/ref_chosen": -79.13615417480469, + "logps/ref_rejected": -79.21699523925781, + "logps/rejected": -262.3095397949219, + "loss": 0.9243, + "margin_dpo/margin_mean": 76.97804260253906, + "margin_dpo/margin_std": 56.90997314453125, + "step": 245 + }, + { + "epoch": 0.37188208616780044, + "grad_norm": 19.480737686157227, + "learning_rate": 3.971601703742932e-07, + "logits/chosen": 0.9996259212493896, + "logits/rejected": 1.0093920230865479, + "logps/chosen": -149.17922973632812, + "logps/ref_chosen": -57.22200012207031, + "logps/ref_rejected": -77.80888366699219, + "logps/rejected": -234.3947296142578, + "loss": 1.1167, + "margin_dpo/margin_mean": 64.62861633300781, + "margin_dpo/margin_std": 74.76765441894531, + "step": 246 + }, + { + "epoch": 0.37339380196523053, + "grad_norm": 19.866626739501953, + "learning_rate": 3.960892420986177e-07, + "logits/chosen": 0.7821018695831299, + "logits/rejected": 0.7258821725845337, + "logps/chosen": -219.67767333984375, + "logps/ref_chosen": -94.68956756591797, + "logps/ref_rejected": -122.22605895996094, + "logps/rejected": -318.41412353515625, + "loss": 1.257, + "margin_dpo/margin_mean": 71.19993591308594, + "margin_dpo/margin_std": 81.91409301757812, + "step": 247 + }, + { + "epoch": 0.3749055177626606, + "grad_norm": 22.328622817993164, + "learning_rate": 3.9501422739279953e-07, + "logits/chosen": 0.7986111640930176, + "logits/rejected": 0.8373353481292725, + "logps/chosen": -175.7537384033203, + "logps/ref_chosen": -60.833953857421875, + "logps/ref_rejected": -60.90985107421875, + "logps/rejected": -218.660888671875, + "loss": 1.1383, + "margin_dpo/margin_mean": 42.83125305175781, + "margin_dpo/margin_std": 89.69184112548828, + "step": 248 + }, + { + "epoch": 0.3764172335600907, + "grad_norm": 19.794172286987305, + "learning_rate": 3.9393515632731094e-07, + "logits/chosen": 1.0464762449264526, + "logits/rejected": 1.056666374206543, + "logps/chosen": -196.64761352539062, + "logps/ref_chosen": -74.77812194824219, + "logps/ref_rejected": -75.08592987060547, + "logps/rejected": -216.56761169433594, + "loss": 1.4198, + "margin_dpo/margin_mean": 19.612201690673828, + "margin_dpo/margin_std": 99.21858215332031, + "step": 249 + }, + { + "epoch": 0.3779289493575208, + "grad_norm": 19.19304847717285, + "learning_rate": 3.9285205908608934e-07, + "logits/chosen": 1.015788197517395, + "logits/rejected": 0.939686119556427, + "logps/chosen": -132.5025634765625, + "logps/ref_chosen": -47.93787384033203, + "logps/ref_rejected": -61.55204772949219, + "logps/rejected": -208.47003173828125, + "loss": 1.0567, + "margin_dpo/margin_mean": 62.353302001953125, + "margin_dpo/margin_std": 72.25045776367188, + "step": 250 + }, + { + "epoch": 0.3794406651549509, + "grad_norm": 18.333349227905273, + "learning_rate": 3.9176496596569265e-07, + "logits/chosen": 0.7942206263542175, + "logits/rejected": 0.8282942771911621, + "logps/chosen": -186.79269409179688, + "logps/ref_chosen": -77.54549407958984, + "logps/ref_rejected": -68.58549499511719, + "logps/rejected": -215.02297973632812, + "loss": 1.1574, + "margin_dpo/margin_mean": 37.190284729003906, + "margin_dpo/margin_std": 79.62153625488281, + "step": 251 + }, + { + "epoch": 0.38095238095238093, + "grad_norm": 26.73542594909668, + "learning_rate": 3.9067390737445254e-07, + "logits/chosen": 0.9108133316040039, + "logits/rejected": 0.8635146021842957, + "logps/chosen": -196.31478881835938, + "logps/ref_chosen": -73.16785430908203, + "logps/ref_rejected": -70.33341979980469, + "logps/rejected": -239.6460723876953, + "loss": 1.2713, + "margin_dpo/margin_mean": 46.16571044921875, + "margin_dpo/margin_std": 107.81159973144531, + "step": 252 + }, + { + "epoch": 0.382464096749811, + "grad_norm": 21.395902633666992, + "learning_rate": 3.8957891383162304e-07, + "logits/chosen": 0.9631872177124023, + "logits/rejected": 0.9409669637680054, + "logps/chosen": -125.77433776855469, + "logps/ref_chosen": -33.16981506347656, + "logps/ref_rejected": -42.722469329833984, + "logps/rejected": -179.7202911376953, + "loss": 1.1379, + "margin_dpo/margin_mean": 44.39329528808594, + "margin_dpo/margin_std": 63.10321044921875, + "step": 253 + }, + { + "epoch": 0.3839758125472411, + "grad_norm": 13.39516544342041, + "learning_rate": 3.884800159665276e-07, + "logits/chosen": 0.9127005934715271, + "logits/rejected": 0.8767493367195129, + "logps/chosen": -138.35330200195312, + "logps/ref_chosen": -50.45591735839844, + "logps/ref_rejected": -71.39493560791016, + "logps/rejected": -223.3489532470703, + "loss": 1.1077, + "margin_dpo/margin_mean": 64.056640625, + "margin_dpo/margin_std": 61.15374755859375, + "step": 254 + }, + { + "epoch": 0.3854875283446712, + "grad_norm": 21.80673599243164, + "learning_rate": 3.873772445177015e-07, + "logits/chosen": 0.929595410823822, + "logits/rejected": 0.8490023612976074, + "logps/chosen": -134.93621826171875, + "logps/ref_chosen": -57.36243438720703, + "logps/ref_rejected": -83.85030364990234, + "logps/rejected": -219.91397094726562, + "loss": 1.0894, + "margin_dpo/margin_mean": 58.489891052246094, + "margin_dpo/margin_std": 79.15504455566406, + "step": 255 + }, + { + "epoch": 0.3869992441421013, + "grad_norm": 17.780683517456055, + "learning_rate": 3.862706303320329e-07, + "logits/chosen": 0.8780696988105774, + "logits/rejected": 0.8686283230781555, + "logps/chosen": -191.927734375, + "logps/ref_chosen": -72.70278930664062, + "logps/ref_rejected": -87.58718872070312, + "logps/rejected": -250.40631103515625, + "loss": 1.1458, + "margin_dpo/margin_mean": 43.59416580200195, + "margin_dpo/margin_std": 86.399169921875, + "step": 256 + }, + { + "epoch": 0.3885109599395314, + "grad_norm": 13.700689315795898, + "learning_rate": 3.851602043638994e-07, + "logits/chosen": 0.9361928701400757, + "logits/rejected": 0.8859877586364746, + "logps/chosen": -137.43472290039062, + "logps/ref_chosen": -56.16447448730469, + "logps/ref_rejected": -79.54229736328125, + "logps/rejected": -244.7681427001953, + "loss": 0.9962, + "margin_dpo/margin_mean": 83.9555892944336, + "margin_dpo/margin_std": 90.04832458496094, + "step": 257 + }, + { + "epoch": 0.3900226757369615, + "grad_norm": 15.653840065002441, + "learning_rate": 3.840459976743023e-07, + "logits/chosen": 0.857273280620575, + "logits/rejected": 0.8150414228439331, + "logps/chosen": -171.31089782714844, + "logps/ref_chosen": -68.2958755493164, + "logps/ref_rejected": -88.37379455566406, + "logps/rejected": -259.4873046875, + "loss": 1.0813, + "margin_dpo/margin_mean": 68.0985107421875, + "margin_dpo/margin_std": 73.86476135253906, + "step": 258 + }, + { + "epoch": 0.3915343915343915, + "grad_norm": 16.806671142578125, + "learning_rate": 3.8292804142999796e-07, + "logits/chosen": 0.8986009955406189, + "logits/rejected": 0.8750613927841187, + "logps/chosen": -197.89768981933594, + "logps/ref_chosen": -86.00422668457031, + "logps/ref_rejected": -106.7618408203125, + "logps/rejected": -272.7347106933594, + "loss": 0.9853, + "margin_dpo/margin_mean": 54.07939910888672, + "margin_dpo/margin_std": 119.56117248535156, + "step": 259 + }, + { + "epoch": 0.3930461073318216, + "grad_norm": 22.537059783935547, + "learning_rate": 3.818063669026256e-07, + "logits/chosen": 0.9305140972137451, + "logits/rejected": 0.8760488629341125, + "logps/chosen": -117.2443618774414, + "logps/ref_chosen": -53.69929504394531, + "logps/ref_rejected": -80.42738342285156, + "logps/rejected": -225.81988525390625, + "loss": 1.0383, + "margin_dpo/margin_mean": 81.84744262695312, + "margin_dpo/margin_std": 77.02603149414062, + "step": 260 + }, + { + "epoch": 0.3945578231292517, + "grad_norm": 16.964977264404297, + "learning_rate": 3.806810054678331e-07, + "logits/chosen": 0.8644047975540161, + "logits/rejected": 1.017871379852295, + "logps/chosen": -177.18267822265625, + "logps/ref_chosen": -87.48385620117188, + "logps/ref_rejected": -45.14533233642578, + "logps/rejected": -152.22369384765625, + "loss": 1.1648, + "margin_dpo/margin_mean": 17.37952995300293, + "margin_dpo/margin_std": 70.14508056640625, + "step": 261 + }, + { + "epoch": 0.3960695389266818, + "grad_norm": 15.852773666381836, + "learning_rate": 3.7955198860439887e-07, + "logits/chosen": 1.01748526096344, + "logits/rejected": 0.9842813014984131, + "logps/chosen": -153.70370483398438, + "logps/ref_chosen": -52.95308303833008, + "logps/ref_rejected": -64.26335906982422, + "logps/rejected": -199.62222290039062, + "loss": 1.0934, + "margin_dpo/margin_mean": 34.60824203491211, + "margin_dpo/margin_std": 63.7818489074707, + "step": 262 + }, + { + "epoch": 0.3975812547241119, + "grad_norm": 18.748830795288086, + "learning_rate": 3.784193478933516e-07, + "logits/chosen": 0.9187393188476562, + "logits/rejected": 0.8768531084060669, + "logps/chosen": -145.96719360351562, + "logps/ref_chosen": -59.93905258178711, + "logps/ref_rejected": -70.00927734375, + "logps/rejected": -191.09591674804688, + "loss": 1.0738, + "margin_dpo/margin_mean": 35.05849838256836, + "margin_dpo/margin_std": 68.49369812011719, + "step": 263 + }, + { + "epoch": 0.39909297052154197, + "grad_norm": 16.476381301879883, + "learning_rate": 3.7728311501708674e-07, + "logits/chosen": 0.8161172866821289, + "logits/rejected": 0.7861305475234985, + "logps/chosen": -201.89556884765625, + "logps/ref_chosen": -77.3072509765625, + "logps/ref_rejected": -83.79466247558594, + "logps/rejected": -217.04876708984375, + "loss": 1.0982, + "margin_dpo/margin_mean": 8.665780067443848, + "margin_dpo/margin_std": 75.14938354492188, + "step": 264 + }, + { + "epoch": 0.40060468631897206, + "grad_norm": 14.398598670959473, + "learning_rate": 3.7614332175848027e-07, + "logits/chosen": 0.8012948036193848, + "logits/rejected": 0.7619481086730957, + "logps/chosen": -183.10635375976562, + "logps/ref_chosen": -62.340206146240234, + "logps/ref_rejected": -57.182029724121094, + "logps/rejected": -195.21893310546875, + "loss": 1.0655, + "margin_dpo/margin_mean": 17.27075958251953, + "margin_dpo/margin_std": 70.34654235839844, + "step": 265 + }, + { + "epoch": 0.4021164021164021, + "grad_norm": 19.04807472229004, + "learning_rate": 3.75e-07, + "logits/chosen": 0.9468996524810791, + "logits/rejected": 0.8705604076385498, + "logps/chosen": -128.9197235107422, + "logps/ref_chosen": -47.8638916015625, + "logps/ref_rejected": -65.85595703125, + "logps/rejected": -201.5211181640625, + "loss": 1.1209, + "margin_dpo/margin_mean": 54.60932922363281, + "margin_dpo/margin_std": 57.88256072998047, + "step": 266 + }, + { + "epoch": 0.4036281179138322, + "grad_norm": 22.434011459350586, + "learning_rate": 3.738531817228131e-07, + "logits/chosen": 0.8454642295837402, + "logits/rejected": 0.786316990852356, + "logps/chosen": -106.54922485351562, + "logps/ref_chosen": -43.666568756103516, + "logps/ref_rejected": -68.1474380493164, + "logps/rejected": -161.97854614257812, + "loss": 1.2043, + "margin_dpo/margin_mean": 30.94845199584961, + "margin_dpo/margin_std": 81.37274169921875, + "step": 267 + }, + { + "epoch": 0.4051398337112623, + "grad_norm": 18.147586822509766, + "learning_rate": 3.7270289900589204e-07, + "logits/chosen": 0.7394974231719971, + "logits/rejected": 0.7207775115966797, + "logps/chosen": -133.07296752929688, + "logps/ref_chosen": -67.96279907226562, + "logps/ref_rejected": -72.69281005859375, + "logps/rejected": -186.4730224609375, + "loss": 1.1032, + "margin_dpo/margin_mean": 48.67003631591797, + "margin_dpo/margin_std": 64.53730010986328, + "step": 268 + }, + { + "epoch": 0.40665154950869237, + "grad_norm": 12.17220687866211, + "learning_rate": 3.7154918402511714e-07, + "logits/chosen": 0.8191482424736023, + "logits/rejected": 0.8883626461029053, + "logps/chosen": -182.84963989257812, + "logps/ref_chosen": -80.52581787109375, + "logps/ref_rejected": -65.83181762695312, + "logps/rejected": -205.61041259765625, + "loss": 1.0584, + "margin_dpo/margin_mean": 37.45478820800781, + "margin_dpo/margin_std": 63.13644027709961, + "step": 269 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 18.165111541748047, + "learning_rate": 3.7039206905237656e-07, + "logits/chosen": 0.8216699957847595, + "logits/rejected": 0.8250892758369446, + "logps/chosen": -129.14108276367188, + "logps/ref_chosen": -64.29264831542969, + "logps/ref_rejected": -65.4633560180664, + "logps/rejected": -201.03909301757812, + "loss": 1.0999, + "margin_dpo/margin_mean": 70.72732543945312, + "margin_dpo/margin_std": 76.4561767578125, + "step": 270 + }, + { + "epoch": 0.40967498110355255, + "grad_norm": 21.99321174621582, + "learning_rate": 3.692315864546635e-07, + "logits/chosen": 0.9689754247665405, + "logits/rejected": 0.8169035911560059, + "logps/chosen": -90.29707336425781, + "logps/ref_chosen": -39.26963424682617, + "logps/ref_rejected": -87.15721130371094, + "logps/rejected": -214.21963500976562, + "loss": 1.2105, + "margin_dpo/margin_mean": 76.03497314453125, + "margin_dpo/margin_std": 88.13627624511719, + "step": 271 + }, + { + "epoch": 0.41118669690098264, + "grad_norm": 12.809111595153809, + "learning_rate": 3.6806776869317067e-07, + "logits/chosen": 0.9838389158248901, + "logits/rejected": 1.008858323097229, + "logps/chosen": -140.18511962890625, + "logps/ref_chosen": -55.88648223876953, + "logps/ref_rejected": -57.95124816894531, + "logps/rejected": -215.2462158203125, + "loss": 0.9634, + "margin_dpo/margin_mean": 72.996337890625, + "margin_dpo/margin_std": 73.1607666015625, + "step": 272 + }, + { + "epoch": 0.4126984126984127, + "grad_norm": 17.283329010009766, + "learning_rate": 3.669006483223828e-07, + "logits/chosen": 0.9234431982040405, + "logits/rejected": 0.8033227324485779, + "logps/chosen": -175.26605224609375, + "logps/ref_chosen": -59.144004821777344, + "logps/ref_rejected": -123.4438247680664, + "logps/rejected": -278.2921447753906, + "loss": 1.14, + "margin_dpo/margin_mean": 38.72627258300781, + "margin_dpo/margin_std": 74.85566711425781, + "step": 273 + }, + { + "epoch": 0.41421012849584277, + "grad_norm": 18.224842071533203, + "learning_rate": 3.657302579891656e-07, + "logits/chosen": 0.7041195631027222, + "logits/rejected": 0.8130519986152649, + "logps/chosen": -175.5207977294922, + "logps/ref_chosen": -77.74801635742188, + "logps/ref_rejected": -63.99616241455078, + "logps/rejected": -199.80519104003906, + "loss": 1.1015, + "margin_dpo/margin_mean": 38.036251068115234, + "margin_dpo/margin_std": 89.42131042480469, + "step": 274 + }, + { + "epoch": 0.41572184429327286, + "grad_norm": 21.59168815612793, + "learning_rate": 3.645566304318526e-07, + "logits/chosen": 0.9048452377319336, + "logits/rejected": 0.8862916231155396, + "logps/chosen": -169.61819458007812, + "logps/ref_chosen": -56.68327331542969, + "logps/ref_rejected": -65.53984832763672, + "logps/rejected": -225.29954528808594, + "loss": 1.0305, + "margin_dpo/margin_mean": 46.82477569580078, + "margin_dpo/margin_std": 65.18940734863281, + "step": 275 + }, + { + "epoch": 0.41723356009070295, + "grad_norm": 20.635337829589844, + "learning_rate": 3.633797984793294e-07, + "logits/chosen": 0.8774361610412598, + "logits/rejected": 0.8986802101135254, + "logps/chosen": -137.88626098632812, + "logps/ref_chosen": -43.50504684448242, + "logps/ref_rejected": -43.088871002197266, + "logps/rejected": -173.4464111328125, + "loss": 1.123, + "margin_dpo/margin_mean": 35.97632598876953, + "margin_dpo/margin_std": 63.101436614990234, + "step": 276 + }, + { + "epoch": 0.41874527588813304, + "grad_norm": 18.922773361206055, + "learning_rate": 3.6219979505011555e-07, + "logits/chosen": 0.7700687646865845, + "logits/rejected": 0.7433135509490967, + "logps/chosen": -165.19195556640625, + "logps/ref_chosen": -52.701934814453125, + "logps/ref_rejected": -62.01823425292969, + "logps/rejected": -202.63037109375, + "loss": 1.3038, + "margin_dpo/margin_mean": 28.122108459472656, + "margin_dpo/margin_std": 74.96601867675781, + "step": 277 + }, + { + "epoch": 0.42025699168556313, + "grad_norm": 24.745040893554688, + "learning_rate": 3.6101665315144353e-07, + "logits/chosen": 0.9205065965652466, + "logits/rejected": 0.8737306594848633, + "logps/chosen": -186.35955810546875, + "logps/ref_chosen": -66.36759948730469, + "logps/ref_rejected": -71.12834930419922, + "logps/rejected": -234.69064331054688, + "loss": 1.0695, + "margin_dpo/margin_mean": 43.57034683227539, + "margin_dpo/margin_std": 91.35447692871094, + "step": 278 + }, + { + "epoch": 0.4217687074829932, + "grad_norm": 19.72016143798828, + "learning_rate": 3.5983040587833563e-07, + "logits/chosen": 0.8044095039367676, + "logits/rejected": 0.7835577726364136, + "logps/chosen": -125.09947967529297, + "logps/ref_chosen": -57.34808349609375, + "logps/ref_rejected": -60.212989807128906, + "logps/rejected": -194.96324157714844, + "loss": 0.942, + "margin_dpo/margin_mean": 66.99885559082031, + "margin_dpo/margin_std": 86.55064392089844, + "step": 279 + }, + { + "epoch": 0.42328042328042326, + "grad_norm": 13.24905014038086, + "learning_rate": 3.586410864126781e-07, + "logits/chosen": 0.8917016983032227, + "logits/rejected": 0.8699558973312378, + "logps/chosen": -111.32423400878906, + "logps/ref_chosen": -58.24922561645508, + "logps/ref_rejected": -59.01625061035156, + "logps/rejected": -200.49822998046875, + "loss": 0.8917, + "margin_dpo/margin_mean": 88.40696716308594, + "margin_dpo/margin_std": 55.23664474487305, + "step": 280 + }, + { + "epoch": 0.42479213907785335, + "grad_norm": 14.725621223449707, + "learning_rate": 3.574487280222929e-07, + "logits/chosen": 0.8508050441741943, + "logits/rejected": 0.8704952597618103, + "logps/chosen": -189.38882446289062, + "logps/ref_chosen": -78.43274688720703, + "logps/ref_rejected": -91.09056091308594, + "logps/rejected": -267.9164733886719, + "loss": 1.081, + "margin_dpo/margin_mean": 65.86985778808594, + "margin_dpo/margin_std": 70.21514892578125, + "step": 281 + }, + { + "epoch": 0.42630385487528344, + "grad_norm": 16.900634765625, + "learning_rate": 3.562533640600075e-07, + "logits/chosen": 0.8684970140457153, + "logits/rejected": 0.7901803255081177, + "logps/chosen": -150.54010009765625, + "logps/ref_chosen": -53.83773422241211, + "logps/ref_rejected": -75.1729507446289, + "logps/rejected": -234.26873779296875, + "loss": 1.1197, + "margin_dpo/margin_mean": 62.39341354370117, + "margin_dpo/margin_std": 64.95208740234375, + "step": 282 + }, + { + "epoch": 0.42781557067271353, + "grad_norm": 15.944957733154297, + "learning_rate": 3.550550279627215e-07, + "logits/chosen": 0.8715301156044006, + "logits/rejected": 0.8303920030593872, + "logps/chosen": -142.8111572265625, + "logps/ref_chosen": -53.5611457824707, + "logps/ref_rejected": -73.18958282470703, + "logps/rejected": -201.0227813720703, + "loss": 1.0702, + "margin_dpo/margin_mean": 38.58319091796875, + "margin_dpo/margin_std": 71.02609252929688, + "step": 283 + }, + { + "epoch": 0.4293272864701436, + "grad_norm": 19.076061248779297, + "learning_rate": 3.5385375325047163e-07, + "logits/chosen": 0.9148997068405151, + "logits/rejected": 0.8821941614151001, + "logps/chosen": -152.02041625976562, + "logps/ref_chosen": -55.81263732910156, + "logps/ref_rejected": -90.23190307617188, + "logps/rejected": -226.96029663085938, + "loss": 1.0726, + "margin_dpo/margin_mean": 40.52062225341797, + "margin_dpo/margin_std": 77.91551971435547, + "step": 284 + }, + { + "epoch": 0.4308390022675737, + "grad_norm": 24.047950744628906, + "learning_rate": 3.5264957352549375e-07, + "logits/chosen": 0.9095529913902283, + "logits/rejected": 0.9729138016700745, + "logps/chosen": -185.89060974121094, + "logps/ref_chosen": -71.53235626220703, + "logps/ref_rejected": -46.31084060668945, + "logps/rejected": -196.288818359375, + "loss": 1.1315, + "margin_dpo/margin_mean": 35.61972427368164, + "margin_dpo/margin_std": 69.28761291503906, + "step": 285 + }, + { + "epoch": 0.4323507180650038, + "grad_norm": 16.382957458496094, + "learning_rate": 3.514425224712835e-07, + "logits/chosen": 0.9263152480125427, + "logits/rejected": 0.9703081846237183, + "logps/chosen": -178.46542358398438, + "logps/ref_chosen": -56.29132080078125, + "logps/ref_rejected": -54.583534240722656, + "logps/rejected": -236.7250518798828, + "loss": 0.9443, + "margin_dpo/margin_mean": 59.96741485595703, + "margin_dpo/margin_std": 58.34821319580078, + "step": 286 + }, + { + "epoch": 0.43386243386243384, + "grad_norm": 18.149490356445312, + "learning_rate": 3.502326338516534e-07, + "logits/chosen": 0.9496700763702393, + "logits/rejected": 0.798512876033783, + "logps/chosen": -130.92161560058594, + "logps/ref_chosen": -39.158355712890625, + "logps/ref_rejected": -74.88023376464844, + "logps/rejected": -248.06649780273438, + "loss": 1.0283, + "margin_dpo/margin_mean": 81.4229965209961, + "margin_dpo/margin_std": 76.41511535644531, + "step": 287 + }, + { + "epoch": 0.43537414965986393, + "grad_norm": 17.73630714416504, + "learning_rate": 3.490199415097892e-07, + "logits/chosen": 0.7672666311264038, + "logits/rejected": 0.7976804971694946, + "logps/chosen": -178.63949584960938, + "logps/ref_chosen": -69.44332885742188, + "logps/ref_rejected": -54.98228454589844, + "logps/rejected": -200.60418701171875, + "loss": 1.1142, + "margin_dpo/margin_mean": 36.425743103027344, + "margin_dpo/margin_std": 56.845184326171875, + "step": 288 + }, + { + "epoch": 0.436885865457294, + "grad_norm": 16.403818130493164, + "learning_rate": 3.4780447936730247e-07, + "logits/chosen": 0.8484159111976624, + "logits/rejected": 0.8917375802993774, + "logps/chosen": -221.443359375, + "logps/ref_chosen": -64.24480438232422, + "logps/ref_rejected": -67.39839172363281, + "logps/rejected": -246.82095336914062, + "loss": 1.1333, + "margin_dpo/margin_mean": 22.224018096923828, + "margin_dpo/margin_std": 46.693260192871094, + "step": 289 + }, + { + "epoch": 0.4383975812547241, + "grad_norm": 17.30687141418457, + "learning_rate": 3.465862814232821e-07, + "logits/chosen": 0.9498525261878967, + "logits/rejected": 0.9050639867782593, + "logps/chosen": -192.14419555664062, + "logps/ref_chosen": -65.73394012451172, + "logps/ref_rejected": -74.00738525390625, + "logps/rejected": -251.295654296875, + "loss": 1.0606, + "margin_dpo/margin_mean": 50.87800598144531, + "margin_dpo/margin_std": 90.56031799316406, + "step": 290 + }, + { + "epoch": 0.4399092970521542, + "grad_norm": 19.28776741027832, + "learning_rate": 3.4536538175334343e-07, + "logits/chosen": 0.9647561311721802, + "logits/rejected": 0.9699366092681885, + "logps/chosen": -183.28567504882812, + "logps/ref_chosen": -67.45711517333984, + "logps/ref_rejected": -75.27851867675781, + "logps/rejected": -262.3265075683594, + "loss": 1.0179, + "margin_dpo/margin_mean": 71.21941375732422, + "margin_dpo/margin_std": 88.33670806884766, + "step": 291 + }, + { + "epoch": 0.4414210128495843, + "grad_norm": 15.975472450256348, + "learning_rate": 3.4414181450867465e-07, + "logits/chosen": 0.9394838809967041, + "logits/rejected": 0.9308391809463501, + "logps/chosen": -167.02105712890625, + "logps/ref_chosen": -58.86817932128906, + "logps/ref_rejected": -64.91166687011719, + "logps/rejected": -216.97520446777344, + "loss": 1.1114, + "margin_dpo/margin_mean": 43.910648345947266, + "margin_dpo/margin_std": 96.6522216796875, + "step": 292 + }, + { + "epoch": 0.4429327286470144, + "grad_norm": 14.782191276550293, + "learning_rate": 3.4291561391508185e-07, + "logits/chosen": 0.9456629753112793, + "logits/rejected": 0.9435967803001404, + "logps/chosen": -138.83485412597656, + "logps/ref_chosen": -48.30006408691406, + "logps/ref_rejected": -58.28700256347656, + "logps/rejected": -214.57583618164062, + "loss": 0.9865, + "margin_dpo/margin_mean": 65.7540512084961, + "margin_dpo/margin_std": 53.61219024658203, + "step": 293 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 13.40251350402832, + "learning_rate": 3.4168681427203153e-07, + "logits/chosen": 0.8696566224098206, + "logits/rejected": 0.7992103099822998, + "logps/chosen": -201.7752685546875, + "logps/ref_chosen": -73.32835388183594, + "logps/ref_rejected": -100.44032287597656, + "logps/rejected": -282.5135192871094, + "loss": 1.0812, + "margin_dpo/margin_mean": 53.62628936767578, + "margin_dpo/margin_std": 67.26288604736328, + "step": 294 + }, + { + "epoch": 0.4459561602418745, + "grad_norm": 20.00554656982422, + "learning_rate": 3.4045544995169125e-07, + "logits/chosen": 1.0336451530456543, + "logits/rejected": 0.9124792814254761, + "logps/chosen": -154.9568328857422, + "logps/ref_chosen": -36.71764373779297, + "logps/ref_rejected": -77.01786804199219, + "logps/rejected": -266.3442687988281, + "loss": 1.0952, + "margin_dpo/margin_mean": 71.08719635009766, + "margin_dpo/margin_std": 75.32640838623047, + "step": 295 + }, + { + "epoch": 0.4474678760393046, + "grad_norm": 17.829896926879883, + "learning_rate": 3.392215553979679e-07, + "logits/chosen": 0.8829468488693237, + "logits/rejected": 0.7915094494819641, + "logps/chosen": -156.69998168945312, + "logps/ref_chosen": -47.72833251953125, + "logps/ref_rejected": -85.0424575805664, + "logps/rejected": -267.91741943359375, + "loss": 1.0734, + "margin_dpo/margin_mean": 73.9033203125, + "margin_dpo/margin_std": 85.67912292480469, + "step": 296 + }, + { + "epoch": 0.4489795918367347, + "grad_norm": 13.065276145935059, + "learning_rate": 3.3798516512554485e-07, + "logits/chosen": 1.0190558433532715, + "logits/rejected": 0.9399404525756836, + "logps/chosen": -171.317626953125, + "logps/ref_chosen": -45.453880310058594, + "logps/ref_rejected": -60.58012390136719, + "logps/rejected": -240.07229614257812, + "loss": 0.9618, + "margin_dpo/margin_mean": 53.62842559814453, + "margin_dpo/margin_std": 68.6543197631836, + "step": 297 + }, + { + "epoch": 0.4504913076341648, + "grad_norm": 14.449824333190918, + "learning_rate": 3.367463137189156e-07, + "logits/chosen": 1.0604900121688843, + "logits/rejected": 1.045938491821289, + "logps/chosen": -196.72889709472656, + "logps/ref_chosen": -65.93342590332031, + "logps/ref_rejected": -81.31886291503906, + "logps/rejected": -258.4837341308594, + "loss": 1.1329, + "margin_dpo/margin_mean": 46.3693962097168, + "margin_dpo/margin_std": 84.15235900878906, + "step": 298 + }, + { + "epoch": 0.4520030234315949, + "grad_norm": 18.29178237915039, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": 0.9493238925933838, + "logits/rejected": 0.9166898727416992, + "logps/chosen": -167.92984008789062, + "logps/ref_chosen": -41.45861053466797, + "logps/ref_rejected": -55.44845199584961, + "logps/rejected": -203.72457885742188, + "loss": 1.2108, + "margin_dpo/margin_mean": 21.804901123046875, + "margin_dpo/margin_std": 75.58480834960938, + "step": 299 + }, + { + "epoch": 0.45351473922902497, + "grad_norm": 15.63215446472168, + "learning_rate": 3.3426136618426043e-07, + "logits/chosen": 0.95320063829422, + "logits/rejected": 1.0048692226409912, + "logps/chosen": -177.46788024902344, + "logps/ref_chosen": -61.02867889404297, + "logps/ref_rejected": -52.653968811035156, + "logps/rejected": -235.43170166015625, + "loss": 1.1015, + "margin_dpo/margin_mean": 66.33853149414062, + "margin_dpo/margin_std": 77.45952606201172, + "step": 300 + }, + { + "epoch": 0.45351473922902497, + "eval_logits/chosen": 0.910781741142273, + "eval_logits/rejected": 0.8756802678108215, + "eval_logps/chosen": -200.87997436523438, + "eval_logps/ref_chosen": -75.30646514892578, + "eval_logps/ref_rejected": -77.75511932373047, + "eval_logps/rejected": -251.94520568847656, + "eval_loss": 0.557418942451477, + "eval_margin_dpo/margin_mean": 48.61654281616211, + "eval_margin_dpo/margin_std": 83.31050872802734, + "eval_runtime": 37.4536, + "eval_samples_per_second": 61.489, + "eval_steps_per_second": 1.922, + "step": 300 + }, + { + "epoch": 0.455026455026455, + "grad_norm": 14.323222160339355, + "learning_rate": 3.3301533956555885e-07, + "logits/chosen": 1.0346510410308838, + "logits/rejected": 0.985994815826416, + "logps/chosen": -206.44583129882812, + "logps/ref_chosen": -49.377403259277344, + "logps/ref_rejected": -72.45796966552734, + "logps/rejected": -270.4298095703125, + "loss": 1.1779, + "margin_dpo/margin_mean": 40.90338897705078, + "margin_dpo/margin_std": 61.039005279541016, + "step": 301 + }, + { + "epoch": 0.4565381708238851, + "grad_norm": 18.329959869384766, + "learning_rate": 3.317669908293554e-07, + "logits/chosen": 0.8981478810310364, + "logits/rejected": 0.8204025030136108, + "logps/chosen": -166.45431518554688, + "logps/ref_chosen": -42.884490966796875, + "logps/ref_rejected": -72.04084777832031, + "logps/rejected": -267.53070068359375, + "loss": 1.2516, + "margin_dpo/margin_mean": 71.92005920410156, + "margin_dpo/margin_std": 84.18696594238281, + "step": 302 + }, + { + "epoch": 0.4580498866213152, + "grad_norm": 13.802994728088379, + "learning_rate": 3.3051635489464793e-07, + "logits/chosen": 0.9232026934623718, + "logits/rejected": 0.8720027804374695, + "logps/chosen": -161.49925231933594, + "logps/ref_chosen": -50.44966506958008, + "logps/ref_rejected": -77.09220886230469, + "logps/rejected": -234.8109893798828, + "loss": 1.0738, + "margin_dpo/margin_mean": 46.669189453125, + "margin_dpo/margin_std": 89.64495849609375, + "step": 303 + }, + { + "epoch": 0.4595616024187453, + "grad_norm": 13.76878833770752, + "learning_rate": 3.292634667444117e-07, + "logits/chosen": 1.0514378547668457, + "logits/rejected": 0.9998120069503784, + "logps/chosen": -169.84353637695312, + "logps/ref_chosen": -54.7811279296875, + "logps/ref_rejected": -74.56997680664062, + "logps/rejected": -264.3577880859375, + "loss": 0.9872, + "margin_dpo/margin_mean": 74.72541809082031, + "margin_dpo/margin_std": 65.64923858642578, + "step": 304 + }, + { + "epoch": 0.46107331821617537, + "grad_norm": 18.287494659423828, + "learning_rate": 3.280083614246217e-07, + "logits/chosen": 0.8871467113494873, + "logits/rejected": 0.917915940284729, + "logps/chosen": -238.80645751953125, + "logps/ref_chosen": -81.34001159667969, + "logps/ref_rejected": -77.41258239746094, + "logps/rejected": -272.7764892578125, + "loss": 1.1496, + "margin_dpo/margin_mean": 37.89744567871094, + "margin_dpo/margin_std": 99.84722900390625, + "step": 305 + }, + { + "epoch": 0.46258503401360546, + "grad_norm": 13.584040641784668, + "learning_rate": 3.267510740432719e-07, + "logits/chosen": 1.079813003540039, + "logits/rejected": 1.0361995697021484, + "logps/chosen": -143.89230346679688, + "logps/ref_chosen": -49.74858474731445, + "logps/ref_rejected": -54.940982818603516, + "logps/rejected": -209.38763427734375, + "loss": 1.0814, + "margin_dpo/margin_mean": 60.302940368652344, + "margin_dpo/margin_std": 79.73137664794922, + "step": 306 + }, + { + "epoch": 0.46409674981103555, + "grad_norm": 19.44350814819336, + "learning_rate": 3.2549163976939285e-07, + "logits/chosen": 0.8657441139221191, + "logits/rejected": 0.8813103437423706, + "logps/chosen": -183.44241333007812, + "logps/ref_chosen": -65.90791320800781, + "logps/ref_rejected": -65.25321960449219, + "logps/rejected": -221.45703125, + "loss": 1.2949, + "margin_dpo/margin_mean": 38.6693229675293, + "margin_dpo/margin_std": 113.20942687988281, + "step": 307 + }, + { + "epoch": 0.4656084656084656, + "grad_norm": 17.951297760009766, + "learning_rate": 3.2423009383206874e-07, + "logits/chosen": 0.9202988147735596, + "logits/rejected": 0.9259182214736938, + "logps/chosen": -203.8446807861328, + "logps/ref_chosen": -71.3767318725586, + "logps/ref_rejected": -82.10542297363281, + "logps/rejected": -271.75042724609375, + "loss": 1.1168, + "margin_dpo/margin_mean": 57.17706298828125, + "margin_dpo/margin_std": 100.58518981933594, + "step": 308 + }, + { + "epoch": 0.4671201814058957, + "grad_norm": 14.301473617553711, + "learning_rate": 3.229664715194511e-07, + "logits/chosen": 0.9366397857666016, + "logits/rejected": 0.913209080696106, + "logps/chosen": -179.5953369140625, + "logps/ref_chosen": -41.846153259277344, + "logps/ref_rejected": -61.37134552001953, + "logps/rejected": -237.18511962890625, + "loss": 1.0857, + "margin_dpo/margin_mean": 38.064598083496094, + "margin_dpo/margin_std": 66.49103546142578, + "step": 309 + }, + { + "epoch": 0.46863189720332576, + "grad_norm": 16.19812774658203, + "learning_rate": 3.2170080817777257e-07, + "logits/chosen": 0.969029426574707, + "logits/rejected": 0.9913873672485352, + "logps/chosen": -214.78329467773438, + "logps/ref_chosen": -70.55810546875, + "logps/ref_rejected": -64.62115478515625, + "logps/rejected": -227.16543579101562, + "loss": 1.2835, + "margin_dpo/margin_mean": 18.319095611572266, + "margin_dpo/margin_std": 73.50588989257812, + "step": 310 + }, + { + "epoch": 0.47014361300075586, + "grad_norm": 16.054964065551758, + "learning_rate": 3.204331392103574e-07, + "logits/chosen": 0.8452152013778687, + "logits/rejected": 0.8800424337387085, + "logps/chosen": -181.9866485595703, + "logps/ref_chosen": -60.32414245605469, + "logps/ref_rejected": -68.1629638671875, + "logps/rejected": -208.29612731933594, + "loss": 1.0987, + "margin_dpo/margin_mean": 18.47066307067871, + "margin_dpo/margin_std": 71.49610137939453, + "step": 311 + }, + { + "epoch": 0.47165532879818595, + "grad_norm": 13.902632713317871, + "learning_rate": 3.1916350007663176e-07, + "logits/chosen": 1.0027267932891846, + "logits/rejected": 0.952292799949646, + "logps/chosen": -169.0614013671875, + "logps/ref_chosen": -55.58141326904297, + "logps/ref_rejected": -71.82810974121094, + "logps/rejected": -234.40756225585938, + "loss": 0.9795, + "margin_dpo/margin_mean": 49.09947967529297, + "margin_dpo/margin_std": 47.16817855834961, + "step": 312 + }, + { + "epoch": 0.47316704459561604, + "grad_norm": 15.096352577209473, + "learning_rate": 3.178919262911314e-07, + "logits/chosen": 0.9660900831222534, + "logits/rejected": 1.0016883611679077, + "logps/chosen": -156.38772583007812, + "logps/ref_chosen": -52.92902374267578, + "logps/ref_rejected": -47.91901779174805, + "logps/rejected": -186.65386962890625, + "loss": 1.2484, + "margin_dpo/margin_mean": 35.27616500854492, + "margin_dpo/margin_std": 60.62500762939453, + "step": 313 + }, + { + "epoch": 0.47467876039304613, + "grad_norm": 15.742197036743164, + "learning_rate": 3.166184534225087e-07, + "logits/chosen": 0.9835371971130371, + "logits/rejected": 0.9550716876983643, + "logps/chosen": -169.02850341796875, + "logps/ref_chosen": -64.4450454711914, + "logps/ref_rejected": -76.83822631835938, + "logps/rejected": -248.9261474609375, + "loss": 1.0354, + "margin_dpo/margin_mean": 67.50444793701172, + "margin_dpo/margin_std": 74.06520080566406, + "step": 314 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 13.899375915527344, + "learning_rate": 3.1534311709253723e-07, + "logits/chosen": 0.9853957295417786, + "logits/rejected": 0.9791759252548218, + "logps/chosen": -192.0851593017578, + "logps/ref_chosen": -56.93284225463867, + "logps/ref_rejected": -50.28406524658203, + "logps/rejected": -199.18292236328125, + "loss": 1.0917, + "margin_dpo/margin_mean": 13.746543884277344, + "margin_dpo/margin_std": 78.6737060546875, + "step": 315 + }, + { + "epoch": 0.47770219198790626, + "grad_norm": 16.974472045898438, + "learning_rate": 3.1406595297511564e-07, + "logits/chosen": 0.7342613935470581, + "logits/rejected": 0.5312126874923706, + "logps/chosen": -182.5743408203125, + "logps/ref_chosen": -66.3306884765625, + "logps/ref_rejected": -137.49655151367188, + "logps/rejected": -309.92919921875, + "loss": 1.0034, + "margin_dpo/margin_mean": 56.18898391723633, + "margin_dpo/margin_std": 85.84481048583984, + "step": 316 + }, + { + "epoch": 0.47921390778533635, + "grad_norm": 14.526897430419922, + "learning_rate": 3.1278699679526975e-07, + "logits/chosen": 0.9011315107345581, + "logits/rejected": 0.8500291705131531, + "logps/chosen": -154.86817932128906, + "logps/ref_chosen": -42.494422912597656, + "logps/ref_rejected": -66.68242645263672, + "logps/rejected": -229.81800842285156, + "loss": 0.9879, + "margin_dpo/margin_mean": 50.761817932128906, + "margin_dpo/margin_std": 63.989234924316406, + "step": 317 + }, + { + "epoch": 0.48072562358276644, + "grad_norm": 17.438730239868164, + "learning_rate": 3.1150628432815336e-07, + "logits/chosen": 0.8117409944534302, + "logits/rejected": 0.8378602266311646, + "logps/chosen": -200.1689453125, + "logps/ref_chosen": -80.13600158691406, + "logps/ref_rejected": -83.40070343017578, + "logps/rejected": -256.28057861328125, + "loss": 1.1962, + "margin_dpo/margin_mean": 52.8469352722168, + "margin_dpo/margin_std": 89.18522644042969, + "step": 318 + }, + { + "epoch": 0.48223733938019653, + "grad_norm": 13.24758529663086, + "learning_rate": 3.1022385139804707e-07, + "logits/chosen": 0.8036596179008484, + "logits/rejected": 0.758333683013916, + "logps/chosen": -186.25949096679688, + "logps/ref_chosen": -83.42949676513672, + "logps/ref_rejected": -113.88960266113281, + "logps/rejected": -307.47576904296875, + "loss": 1.0176, + "margin_dpo/margin_mean": 90.75616455078125, + "margin_dpo/margin_std": 84.7353515625, + "step": 319 + }, + { + "epoch": 0.4837490551776266, + "grad_norm": 16.077190399169922, + "learning_rate": 3.0893973387735683e-07, + "logits/chosen": 0.8179005980491638, + "logits/rejected": 0.7942554950714111, + "logps/chosen": -119.91881561279297, + "logps/ref_chosen": -34.690284729003906, + "logps/ref_rejected": -57.00449752807617, + "logps/rejected": -183.24380493164062, + "loss": 1.1051, + "margin_dpo/margin_mean": 41.01078796386719, + "margin_dpo/margin_std": 86.33978271484375, + "step": 320 + }, + { + "epoch": 0.4852607709750567, + "grad_norm": 18.350908279418945, + "learning_rate": 3.0765396768561004e-07, + "logits/chosen": 0.8898118734359741, + "logits/rejected": 0.9107556343078613, + "logps/chosen": -122.81785583496094, + "logps/ref_chosen": -43.647361755371094, + "logps/ref_rejected": -49.380775451660156, + "logps/rejected": -199.43606567382812, + "loss": 1.0686, + "margin_dpo/margin_mean": 70.88478088378906, + "margin_dpo/margin_std": 72.49562072753906, + "step": 321 + }, + { + "epoch": 0.48677248677248675, + "grad_norm": 15.368215560913086, + "learning_rate": 3.063665887884511e-07, + "logits/chosen": 1.0382732152938843, + "logits/rejected": 0.9146447777748108, + "logps/chosen": -97.49690246582031, + "logps/ref_chosen": -30.90003204345703, + "logps/ref_rejected": -70.10389709472656, + "logps/rejected": -234.061767578125, + "loss": 0.9306, + "margin_dpo/margin_mean": 97.36099243164062, + "margin_dpo/margin_std": 62.040199279785156, + "step": 322 + }, + { + "epoch": 0.48828420256991684, + "grad_norm": 14.468438148498535, + "learning_rate": 3.0507763319663517e-07, + "logits/chosen": 0.8593270778656006, + "logits/rejected": 0.8456133008003235, + "logps/chosen": -175.40945434570312, + "logps/ref_chosen": -65.93765258789062, + "logps/ref_rejected": -73.23563385009766, + "logps/rejected": -233.8284912109375, + "loss": 1.1484, + "margin_dpo/margin_mean": 51.121055603027344, + "margin_dpo/margin_std": 80.7939453125, + "step": 323 + }, + { + "epoch": 0.4897959183673469, + "grad_norm": 15.986069679260254, + "learning_rate": 3.0378713696502097e-07, + "logits/chosen": 0.8656540513038635, + "logits/rejected": 0.8433347940444946, + "logps/chosen": -124.39891052246094, + "logps/ref_chosen": -40.30308532714844, + "logps/ref_rejected": -56.531700134277344, + "logps/rejected": -194.01043701171875, + "loss": 1.0246, + "margin_dpo/margin_mean": 53.382911682128906, + "margin_dpo/margin_std": 58.651451110839844, + "step": 324 + }, + { + "epoch": 0.491307634164777, + "grad_norm": 14.863397598266602, + "learning_rate": 3.0249513619156206e-07, + "logits/chosen": 1.018980860710144, + "logits/rejected": 1.0166943073272705, + "logps/chosen": -157.23825073242188, + "logps/ref_chosen": -52.309547424316406, + "logps/ref_rejected": -56.243019104003906, + "logps/rejected": -230.12759399414062, + "loss": 1.0454, + "margin_dpo/margin_mean": 68.95586395263672, + "margin_dpo/margin_std": 71.14509582519531, + "step": 325 + }, + { + "epoch": 0.4928193499622071, + "grad_norm": 18.127382278442383, + "learning_rate": 3.012016670162977e-07, + "logits/chosen": 0.9008328914642334, + "logits/rejected": 0.8836438655853271, + "logps/chosen": -221.15011596679688, + "logps/ref_chosen": -74.94476318359375, + "logps/ref_rejected": -84.40521240234375, + "logps/rejected": -264.2186584472656, + "loss": 1.2972, + "margin_dpo/margin_mean": 33.60809326171875, + "margin_dpo/margin_std": 81.20984649658203, + "step": 326 + }, + { + "epoch": 0.4943310657596372, + "grad_norm": 15.47046947479248, + "learning_rate": 2.99906765620341e-07, + "logits/chosen": 0.7541144490242004, + "logits/rejected": 0.7670720815658569, + "logps/chosen": -199.9362030029297, + "logps/ref_chosen": -75.78781127929688, + "logps/ref_rejected": -58.949928283691406, + "logps/rejected": -254.4146728515625, + "loss": 1.1629, + "margin_dpo/margin_mean": 71.31634521484375, + "margin_dpo/margin_std": 152.7041473388672, + "step": 327 + }, + { + "epoch": 0.4958427815570673, + "grad_norm": 14.062821388244629, + "learning_rate": 2.9861046822486766e-07, + "logits/chosen": 0.7631938457489014, + "logits/rejected": 0.7273193597793579, + "logps/chosen": -151.08351135253906, + "logps/ref_chosen": -64.362060546875, + "logps/ref_rejected": -83.52467346191406, + "logps/rejected": -229.90032958984375, + "loss": 1.0252, + "margin_dpo/margin_mean": 59.65420150756836, + "margin_dpo/margin_std": 63.522666931152344, + "step": 328 + }, + { + "epoch": 0.4973544973544973, + "grad_norm": 16.59518814086914, + "learning_rate": 2.9731281109010253e-07, + "logits/chosen": 0.9434309005737305, + "logits/rejected": 0.8342186212539673, + "logps/chosen": -146.32696533203125, + "logps/ref_chosen": -49.1827278137207, + "logps/ref_rejected": -84.71371459960938, + "logps/rejected": -222.47955322265625, + "loss": 1.0562, + "margin_dpo/margin_mean": 40.62158966064453, + "margin_dpo/margin_std": 68.19479370117188, + "step": 329 + }, + { + "epoch": 0.4988662131519274, + "grad_norm": 14.8185396194458, + "learning_rate": 2.9601383051430505e-07, + "logits/chosen": 0.9047882556915283, + "logits/rejected": 0.8687087297439575, + "logps/chosen": -178.20071411132812, + "logps/ref_chosen": -55.316497802734375, + "logps/ref_rejected": -65.46820831298828, + "logps/rejected": -196.5502166748047, + "loss": 1.1321, + "margin_dpo/margin_mean": 8.197792053222656, + "margin_dpo/margin_std": 85.20492553710938, + "step": 330 + }, + { + "epoch": 0.5003779289493575, + "grad_norm": 15.615235328674316, + "learning_rate": 2.947135628327544e-07, + "logits/chosen": 0.8331134915351868, + "logits/rejected": 0.7597118616104126, + "logps/chosen": -159.10202026367188, + "logps/ref_chosen": -59.31645965576172, + "logps/ref_rejected": -85.34983825683594, + "logps/rejected": -289.3092346191406, + "loss": 0.957, + "margin_dpo/margin_mean": 104.17383575439453, + "margin_dpo/margin_std": 75.5068588256836, + "step": 331 + }, + { + "epoch": 0.5018896447467877, + "grad_norm": 16.04841423034668, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": 0.8443226218223572, + "logits/rejected": 0.8055863976478577, + "logps/chosen": -169.5580291748047, + "logps/ref_chosen": -58.70336151123047, + "logps/ref_rejected": -75.91543579101562, + "logps/rejected": -240.45140075683594, + "loss": 1.0368, + "margin_dpo/margin_mean": 53.68130874633789, + "margin_dpo/margin_std": 67.17088317871094, + "step": 332 + }, + { + "epoch": 0.5034013605442177, + "grad_norm": 17.165664672851562, + "learning_rate": 2.921093116725076e-07, + "logits/chosen": 0.9496264457702637, + "logits/rejected": 0.8828880190849304, + "logps/chosen": -196.00656127929688, + "logps/ref_chosen": -68.11222839355469, + "logps/ref_rejected": -106.30081939697266, + "logps/rejected": -314.76043701171875, + "loss": 0.9578, + "margin_dpo/margin_mean": 80.5653076171875, + "margin_dpo/margin_std": 64.04613494873047, + "step": 333 + }, + { + "epoch": 0.5049130763416477, + "grad_norm": 13.934477806091309, + "learning_rate": 2.9080540104031484e-07, + "logits/chosen": 0.9245538711547852, + "logits/rejected": 0.8373509645462036, + "logps/chosen": -169.71932983398438, + "logps/ref_chosen": -59.10272216796875, + "logps/ref_rejected": -81.27894592285156, + "logps/rejected": -235.17379760742188, + "loss": 1.1124, + "margin_dpo/margin_mean": 43.278263092041016, + "margin_dpo/margin_std": 132.07708740234375, + "step": 334 + }, + { + "epoch": 0.5064247921390779, + "grad_norm": 18.533241271972656, + "learning_rate": 2.895003489933375e-07, + "logits/chosen": 1.05397367477417, + "logits/rejected": 0.9336830973625183, + "logps/chosen": -147.48800659179688, + "logps/ref_chosen": -59.12438201904297, + "logps/ref_rejected": -121.7302017211914, + "logps/rejected": -331.6442565917969, + "loss": 1.0951, + "margin_dpo/margin_mean": 121.5504150390625, + "margin_dpo/margin_std": 88.70282745361328, + "step": 335 + }, + { + "epoch": 0.5079365079365079, + "grad_norm": 16.828493118286133, + "learning_rate": 2.8819419203668675e-07, + "logits/chosen": 0.8980951309204102, + "logits/rejected": 0.8811999559402466, + "logps/chosen": -182.00933837890625, + "logps/ref_chosen": -58.688018798828125, + "logps/ref_rejected": -89.30653381347656, + "logps/rejected": -294.16778564453125, + "loss": 1.0804, + "margin_dpo/margin_mean": 81.53993225097656, + "margin_dpo/margin_std": 102.6664047241211, + "step": 336 + }, + { + "epoch": 0.509448223733938, + "grad_norm": 13.274103164672852, + "learning_rate": 2.8688696670638053e-07, + "logits/chosen": 0.6885286569595337, + "logits/rejected": 0.6868454813957214, + "logps/chosen": -265.0749206542969, + "logps/ref_chosen": -97.68962860107422, + "logps/ref_rejected": -91.40831756591797, + "logps/rejected": -269.9544677734375, + "loss": 1.1635, + "margin_dpo/margin_mean": 11.160860061645508, + "margin_dpo/margin_std": 96.92953491210938, + "step": 337 + }, + { + "epoch": 0.5109599395313681, + "grad_norm": 14.477042198181152, + "learning_rate": 2.8557870956832133e-07, + "logits/chosen": 0.8425217866897583, + "logits/rejected": 0.7798970937728882, + "logps/chosen": -184.1267852783203, + "logps/ref_chosen": -73.13327026367188, + "logps/ref_rejected": -104.0283432006836, + "logps/rejected": -278.3711853027344, + "loss": 1.1303, + "margin_dpo/margin_mean": 63.349342346191406, + "margin_dpo/margin_std": 92.2508544921875, + "step": 338 + }, + { + "epoch": 0.5124716553287982, + "grad_norm": 17.419946670532227, + "learning_rate": 2.842694572172736e-07, + "logits/chosen": 0.9565955400466919, + "logits/rejected": 0.8105146884918213, + "logps/chosen": -125.23301696777344, + "logps/ref_chosen": -27.726638793945312, + "logps/ref_rejected": -54.045658111572266, + "logps/rejected": -183.23825073242188, + "loss": 1.0861, + "margin_dpo/margin_mean": 31.686233520507812, + "margin_dpo/margin_std": 49.92768859863281, + "step": 339 + }, + { + "epoch": 0.5139833711262283, + "grad_norm": 18.45969009399414, + "learning_rate": 2.8295924627584004e-07, + "logits/chosen": 0.9575048685073853, + "logits/rejected": 0.8758723735809326, + "logps/chosen": -149.28018188476562, + "logps/ref_chosen": -37.378753662109375, + "logps/ref_rejected": -67.66883850097656, + "logps/rejected": -262.060546875, + "loss": 1.1941, + "margin_dpo/margin_mean": 82.4902572631836, + "margin_dpo/margin_std": 83.99423217773438, + "step": 340 + }, + { + "epoch": 0.5154950869236583, + "grad_norm": 17.296634674072266, + "learning_rate": 2.816481133934373e-07, + "logits/chosen": 1.0063215494155884, + "logits/rejected": 0.9997066259384155, + "logps/chosen": -138.48793029785156, + "logps/ref_chosen": -42.783775329589844, + "logps/ref_rejected": -59.344329833984375, + "logps/rejected": -233.50625610351562, + "loss": 1.0223, + "margin_dpo/margin_mean": 78.457763671875, + "margin_dpo/margin_std": 88.61229705810547, + "step": 341 + }, + { + "epoch": 0.5170068027210885, + "grad_norm": 14.958573341369629, + "learning_rate": 2.8033609524527046e-07, + "logits/chosen": 0.9206830859184265, + "logits/rejected": 0.9482388496398926, + "logps/chosen": -197.25350952148438, + "logps/ref_chosen": -72.35289764404297, + "logps/ref_rejected": -63.26990509033203, + "logps/rejected": -223.66114807128906, + "loss": 1.0141, + "margin_dpo/margin_mean": 35.490623474121094, + "margin_dpo/margin_std": 68.44136810302734, + "step": 342 + }, + { + "epoch": 0.5185185185185185, + "grad_norm": 14.569660186767578, + "learning_rate": 2.7902322853130753e-07, + "logits/chosen": 0.8026296496391296, + "logits/rejected": 0.8457683324813843, + "logps/chosen": -219.88909912109375, + "logps/ref_chosen": -83.87641906738281, + "logps/ref_rejected": -75.55497741699219, + "logps/rejected": -238.33535766601562, + "loss": 1.1457, + "margin_dpo/margin_mean": 26.76772689819336, + "margin_dpo/margin_std": 85.9823226928711, + "step": 343 + }, + { + "epoch": 0.5200302343159486, + "grad_norm": 16.628511428833008, + "learning_rate": 2.7770954997525274e-07, + "logits/chosen": 1.025362491607666, + "logits/rejected": 0.8970023393630981, + "logps/chosen": -157.8984832763672, + "logps/ref_chosen": -35.154476165771484, + "logps/ref_rejected": -77.97383880615234, + "logps/rejected": -260.02105712890625, + "loss": 1.0538, + "margin_dpo/margin_mean": 59.303184509277344, + "margin_dpo/margin_std": 57.50261306762695, + "step": 344 + }, + { + "epoch": 0.5215419501133787, + "grad_norm": 18.29877471923828, + "learning_rate": 2.7639509632351927e-07, + "logits/chosen": 0.9883379936218262, + "logits/rejected": 0.9794098734855652, + "logps/chosen": -154.01934814453125, + "logps/ref_chosen": -39.99463653564453, + "logps/ref_rejected": -52.60383224487305, + "logps/rejected": -210.22317504882812, + "loss": 1.0774, + "margin_dpo/margin_mean": 43.594635009765625, + "margin_dpo/margin_std": 90.1194076538086, + "step": 345 + }, + { + "epoch": 0.5230536659108088, + "grad_norm": 14.863842964172363, + "learning_rate": 2.7507990434420123e-07, + "logits/chosen": 0.9803563356399536, + "logits/rejected": 0.9136096239089966, + "logps/chosen": -155.381103515625, + "logps/ref_chosen": -61.2567024230957, + "logps/ref_rejected": -102.48171997070312, + "logps/rejected": -321.823974609375, + "loss": 0.9885, + "margin_dpo/margin_mean": 125.21788787841797, + "margin_dpo/margin_std": 68.31744384765625, + "step": 346 + }, + { + "epoch": 0.5245653817082389, + "grad_norm": 16.280048370361328, + "learning_rate": 2.737640108260456e-07, + "logits/chosen": 1.080568790435791, + "logits/rejected": 1.0347011089324951, + "logps/chosen": -183.25091552734375, + "logps/ref_chosen": -58.63034439086914, + "logps/ref_rejected": -79.94859313964844, + "logps/rejected": -263.5897216796875, + "loss": 1.058, + "margin_dpo/margin_mean": 59.020530700683594, + "margin_dpo/margin_std": 88.26313781738281, + "step": 347 + }, + { + "epoch": 0.5260770975056689, + "grad_norm": 18.49781036376953, + "learning_rate": 2.724474525774229e-07, + "logits/chosen": 0.9393756985664368, + "logits/rejected": 0.9025633335113525, + "logps/chosen": -187.76321411132812, + "logps/ref_chosen": -72.25175476074219, + "logps/ref_rejected": -95.0661392211914, + "logps/rejected": -275.28118896484375, + "loss": 1.0541, + "margin_dpo/margin_mean": 64.70359802246094, + "margin_dpo/margin_std": 109.8628158569336, + "step": 348 + }, + { + "epoch": 0.527588813303099, + "grad_norm": 15.981618881225586, + "learning_rate": 2.711302664252973e-07, + "logits/chosen": 0.9622572660446167, + "logits/rejected": 0.8644802570343018, + "logps/chosen": -134.60650634765625, + "logps/ref_chosen": -34.93451690673828, + "logps/ref_rejected": -71.41903686523438, + "logps/rejected": -250.6934814453125, + "loss": 1.0037, + "margin_dpo/margin_mean": 79.60247039794922, + "margin_dpo/margin_std": 61.48447799682617, + "step": 349 + }, + { + "epoch": 0.5291005291005291, + "grad_norm": 17.303754806518555, + "learning_rate": 2.698124892141971e-07, + "logits/chosen": 0.9259968996047974, + "logits/rejected": 0.8843666911125183, + "logps/chosen": -206.68112182617188, + "logps/ref_chosen": -71.93693542480469, + "logps/ref_rejected": -97.71165466308594, + "logps/rejected": -320.2801513671875, + "loss": 0.8634, + "margin_dpo/margin_mean": 87.82434844970703, + "margin_dpo/margin_std": 68.40359497070312, + "step": 350 + }, + { + "epoch": 0.5306122448979592, + "grad_norm": 15.037851333618164, + "learning_rate": 2.6849415780518357e-07, + "logits/chosen": 0.8401812314987183, + "logits/rejected": 0.6766891479492188, + "logps/chosen": -163.47976684570312, + "logps/ref_chosen": -55.08075714111328, + "logps/ref_rejected": -107.10870361328125, + "logps/rejected": -288.9677734375, + "loss": 1.092, + "margin_dpo/margin_mean": 73.46005249023438, + "margin_dpo/margin_std": 82.07776641845703, + "step": 351 + }, + { + "epoch": 0.5321239606953893, + "grad_norm": 19.045122146606445, + "learning_rate": 2.6717530907482024e-07, + "logits/chosen": 0.8240054845809937, + "logits/rejected": 0.8280425071716309, + "logps/chosen": -186.6539306640625, + "logps/ref_chosen": -75.67378234863281, + "logps/ref_rejected": -88.66374206542969, + "logps/rejected": -267.3773193359375, + "loss": 1.0297, + "margin_dpo/margin_mean": 67.73342895507812, + "margin_dpo/margin_std": 94.85681915283203, + "step": 352 + }, + { + "epoch": 0.5336356764928194, + "grad_norm": 16.103891372680664, + "learning_rate": 2.658559799141411e-07, + "logits/chosen": 0.9016702175140381, + "logits/rejected": 0.7808640003204346, + "logps/chosen": -195.90447998046875, + "logps/ref_chosen": -62.94065856933594, + "logps/ref_rejected": -104.08489990234375, + "logps/rejected": -294.2989807128906, + "loss": 1.082, + "margin_dpo/margin_mean": 57.25025177001953, + "margin_dpo/margin_std": 92.04182434082031, + "step": 353 + }, + { + "epoch": 0.5351473922902494, + "grad_norm": 18.149988174438477, + "learning_rate": 2.6453620722761895e-07, + "logits/chosen": 1.0865049362182617, + "logits/rejected": 1.0457913875579834, + "logps/chosen": -135.3533172607422, + "logps/ref_chosen": -28.847824096679688, + "logps/ref_rejected": -53.78091812133789, + "logps/rejected": -229.83432006835938, + "loss": 1.0416, + "margin_dpo/margin_mean": 69.54791259765625, + "margin_dpo/margin_std": 84.38679504394531, + "step": 354 + }, + { + "epoch": 0.5366591080876795, + "grad_norm": 17.005126953125, + "learning_rate": 2.632160279321328e-07, + "logits/chosen": 1.035976767539978, + "logits/rejected": 0.9110543727874756, + "logps/chosen": -159.94003295898438, + "logps/ref_chosen": -53.094722747802734, + "logps/ref_rejected": -91.13424682617188, + "logps/rejected": -290.590087890625, + "loss": 0.9458, + "margin_dpo/margin_mean": 92.61053466796875, + "margin_dpo/margin_std": 103.92430114746094, + "step": 355 + }, + { + "epoch": 0.5381708238851096, + "grad_norm": 15.574033737182617, + "learning_rate": 2.618954789559356e-07, + "logits/chosen": 0.9952447414398193, + "logits/rejected": 0.85740727186203, + "logps/chosen": -137.01632690429688, + "logps/ref_chosen": -34.362483978271484, + "logps/ref_rejected": -77.31940460205078, + "logps/rejected": -252.238037109375, + "loss": 1.0505, + "margin_dpo/margin_mean": 72.2647933959961, + "margin_dpo/margin_std": 116.42106628417969, + "step": 356 + }, + { + "epoch": 0.5396825396825397, + "grad_norm": 18.58148765563965, + "learning_rate": 2.6057459723762076e-07, + "logits/chosen": 0.9232524633407593, + "logits/rejected": 0.8515598773956299, + "logps/chosen": -193.28883361816406, + "logps/ref_chosen": -64.16845703125, + "logps/ref_rejected": -78.76988983154297, + "logps/rejected": -273.12249755859375, + "loss": 1.1607, + "margin_dpo/margin_mean": 65.23223114013672, + "margin_dpo/margin_std": 106.97843933105469, + "step": 357 + }, + { + "epoch": 0.5411942554799698, + "grad_norm": 25.09504508972168, + "learning_rate": 2.5925341972508954e-07, + "logits/chosen": 0.8110201954841614, + "logits/rejected": 0.862175464630127, + "logps/chosen": -185.81414794921875, + "logps/ref_chosen": -64.39706420898438, + "logps/ref_rejected": -56.678443908691406, + "logps/rejected": -252.69879150390625, + "loss": 1.107, + "margin_dpo/margin_mean": 74.60326385498047, + "margin_dpo/margin_std": 79.77681732177734, + "step": 358 + }, + { + "epoch": 0.5427059712773998, + "grad_norm": 27.411643981933594, + "learning_rate": 2.579319833745169e-07, + "logits/chosen": 0.9938050508499146, + "logits/rejected": 1.0131170749664307, + "logps/chosen": -222.49644470214844, + "logps/ref_chosen": -71.20832824707031, + "logps/ref_rejected": -75.58880615234375, + "logps/rejected": -236.30592346191406, + "loss": 1.3531, + "margin_dpo/margin_mean": 9.428986549377441, + "margin_dpo/margin_std": 102.51341247558594, + "step": 359 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 16.536500930786133, + "learning_rate": 2.5661032514931834e-07, + "logits/chosen": 0.8350504636764526, + "logits/rejected": 0.687712550163269, + "logps/chosen": -199.9302520751953, + "logps/ref_chosen": -65.89573669433594, + "logps/ref_rejected": -97.73664855957031, + "logps/rejected": -276.3427734375, + "loss": 1.0164, + "margin_dpo/margin_mean": 44.57158660888672, + "margin_dpo/margin_std": 97.77774047851562, + "step": 360 + }, + { + "epoch": 0.54572940287226, + "grad_norm": 15.011344909667969, + "learning_rate": 2.552884820191154e-07, + "logits/chosen": 0.9806392192840576, + "logits/rejected": 0.9534279108047485, + "logps/chosen": -175.77911376953125, + "logps/ref_chosen": -50.53264617919922, + "logps/ref_rejected": -59.25585174560547, + "logps/rejected": -233.24551391601562, + "loss": 1.0024, + "margin_dpo/margin_mean": 48.743194580078125, + "margin_dpo/margin_std": 77.80059051513672, + "step": 361 + }, + { + "epoch": 0.54724111866969, + "grad_norm": 16.17755699157715, + "learning_rate": 2.53966490958702e-07, + "logits/chosen": 1.0447218418121338, + "logits/rejected": 0.9513689279556274, + "logps/chosen": -221.77896118164062, + "logps/ref_chosen": -58.92408752441406, + "logps/ref_rejected": -104.06151580810547, + "logps/rejected": -342.5570373535156, + "loss": 0.971, + "margin_dpo/margin_mean": 75.64065551757812, + "margin_dpo/margin_std": 177.78872680664062, + "step": 362 + }, + { + "epoch": 0.5487528344671202, + "grad_norm": 21.469377517700195, + "learning_rate": 2.526443889470099e-07, + "logits/chosen": 0.9666943550109863, + "logits/rejected": 0.7734654545783997, + "logps/chosen": -171.7001495361328, + "logps/ref_chosen": -46.72846984863281, + "logps/ref_rejected": -140.4446258544922, + "logps/rejected": -366.104736328125, + "loss": 0.9585, + "margin_dpo/margin_mean": 100.68843078613281, + "margin_dpo/margin_std": 130.09930419921875, + "step": 363 + }, + { + "epoch": 0.5502645502645502, + "grad_norm": 16.017593383789062, + "learning_rate": 2.513222129660744e-07, + "logits/chosen": 0.9620314240455627, + "logits/rejected": 0.8384478688240051, + "logps/chosen": -144.81683349609375, + "logps/ref_chosen": -47.71454620361328, + "logps/ref_rejected": -85.33769226074219, + "logps/rejected": -307.9134521484375, + "loss": 0.9883, + "margin_dpo/margin_mean": 125.4734878540039, + "margin_dpo/margin_std": 125.29339599609375, + "step": 364 + }, + { + "epoch": 0.5517762660619804, + "grad_norm": 16.882993698120117, + "learning_rate": 2.5e-07, + "logits/chosen": 0.9969468116760254, + "logits/rejected": 1.0294103622436523, + "logps/chosen": -152.65603637695312, + "logps/ref_chosen": -53.76380157470703, + "logps/ref_rejected": -46.24406433105469, + "logps/rejected": -212.58950805664062, + "loss": 0.9774, + "margin_dpo/margin_mean": 67.45320892333984, + "margin_dpo/margin_std": 76.16273498535156, + "step": 365 + }, + { + "epoch": 0.5532879818594104, + "grad_norm": 19.378551483154297, + "learning_rate": 2.486777870339255e-07, + "logits/chosen": 0.9522498846054077, + "logits/rejected": 0.9834457635879517, + "logps/chosen": -179.36004638671875, + "logps/ref_chosen": -67.52264404296875, + "logps/ref_rejected": -70.28094482421875, + "logps/rejected": -228.3491973876953, + "loss": 1.0973, + "margin_dpo/margin_mean": 46.230857849121094, + "margin_dpo/margin_std": 88.74593353271484, + "step": 366 + }, + { + "epoch": 0.5547996976568406, + "grad_norm": 18.30280876159668, + "learning_rate": 2.4735561105299014e-07, + "logits/chosen": 0.9707492589950562, + "logits/rejected": 0.8614064455032349, + "logps/chosen": -161.2140655517578, + "logps/ref_chosen": -55.156681060791016, + "logps/ref_rejected": -82.34903717041016, + "logps/rejected": -260.894775390625, + "loss": 1.0604, + "margin_dpo/margin_mean": 72.48836517333984, + "margin_dpo/margin_std": 68.52483367919922, + "step": 367 + }, + { + "epoch": 0.5563114134542706, + "grad_norm": 19.369901657104492, + "learning_rate": 2.46033509041298e-07, + "logits/chosen": 0.7760097980499268, + "logits/rejected": 0.8570929765701294, + "logps/chosen": -175.87571716308594, + "logps/ref_chosen": -74.56654357910156, + "logps/ref_rejected": -55.081199645996094, + "logps/rejected": -217.58401489257812, + "loss": 1.137, + "margin_dpo/margin_mean": 61.19365692138672, + "margin_dpo/margin_std": 94.93489074707031, + "step": 368 + }, + { + "epoch": 0.5578231292517006, + "grad_norm": 24.349010467529297, + "learning_rate": 2.447115179808846e-07, + "logits/chosen": 0.9876595735549927, + "logits/rejected": 0.9113900661468506, + "logps/chosen": -190.04141235351562, + "logps/ref_chosen": -61.67764663696289, + "logps/ref_rejected": -88.59959411621094, + "logps/rejected": -285.03338623046875, + "loss": 1.1874, + "margin_dpo/margin_mean": 68.07002258300781, + "margin_dpo/margin_std": 85.15034484863281, + "step": 369 + }, + { + "epoch": 0.5593348450491308, + "grad_norm": 16.110780715942383, + "learning_rate": 2.4338967485068164e-07, + "logits/chosen": 1.0228030681610107, + "logits/rejected": 0.9811384677886963, + "logps/chosen": -119.4067153930664, + "logps/ref_chosen": -44.50119400024414, + "logps/ref_rejected": -81.18331909179688, + "logps/rejected": -273.24188232421875, + "loss": 0.9948, + "margin_dpo/margin_mean": 117.15304565429688, + "margin_dpo/margin_std": 106.01179504394531, + "step": 370 + }, + { + "epoch": 0.5608465608465608, + "grad_norm": 21.480369567871094, + "learning_rate": 2.420680166254831e-07, + "logits/chosen": 1.0966333150863647, + "logits/rejected": 1.0227328538894653, + "logps/chosen": -148.44422912597656, + "logps/ref_chosen": -39.17439651489258, + "logps/ref_rejected": -76.12638092041016, + "logps/rejected": -258.49078369140625, + "loss": 1.0422, + "margin_dpo/margin_mean": 73.09457397460938, + "margin_dpo/margin_std": 105.94315338134766, + "step": 371 + }, + { + "epoch": 0.562358276643991, + "grad_norm": 20.347423553466797, + "learning_rate": 2.4074658027491044e-07, + "logits/chosen": 0.9855044484138489, + "logits/rejected": 0.8605490922927856, + "logps/chosen": -152.38363647460938, + "logps/ref_chosen": -52.27345275878906, + "logps/ref_rejected": -87.656494140625, + "logps/rejected": -296.9883117675781, + "loss": 1.2809, + "margin_dpo/margin_mean": 109.22164916992188, + "margin_dpo/margin_std": 77.62582397460938, + "step": 372 + }, + { + "epoch": 0.563869992441421, + "grad_norm": 17.717870712280273, + "learning_rate": 2.394254027623792e-07, + "logits/chosen": 1.0428986549377441, + "logits/rejected": 1.0207685232162476, + "logps/chosen": -223.8621368408203, + "logps/ref_chosen": -68.01244354248047, + "logps/ref_rejected": -64.4259033203125, + "logps/rejected": -273.6564636230469, + "loss": 1.1361, + "margin_dpo/margin_mean": 53.3808708190918, + "margin_dpo/margin_std": 147.9482421875, + "step": 373 + }, + { + "epoch": 0.5653817082388511, + "grad_norm": 27.212177276611328, + "learning_rate": 2.381045210440644e-07, + "logits/chosen": 0.8525890111923218, + "logits/rejected": 0.7748229503631592, + "logps/chosen": -152.43893432617188, + "logps/ref_chosen": -56.639495849609375, + "logps/ref_rejected": -83.10781860351562, + "logps/rejected": -317.96673583984375, + "loss": 1.0262, + "margin_dpo/margin_mean": 139.05946350097656, + "margin_dpo/margin_std": 114.1051025390625, + "step": 374 + }, + { + "epoch": 0.5668934240362812, + "grad_norm": 21.642242431640625, + "learning_rate": 2.3678397206786715e-07, + "logits/chosen": 1.0544872283935547, + "logits/rejected": 0.9103308320045471, + "logps/chosen": -140.70907592773438, + "logps/ref_chosen": -31.620290756225586, + "logps/ref_rejected": -70.079345703125, + "logps/rejected": -234.93954467773438, + "loss": 1.1789, + "margin_dpo/margin_mean": 55.77141189575195, + "margin_dpo/margin_std": 87.87168884277344, + "step": 375 + }, + { + "epoch": 0.5684051398337112, + "grad_norm": 18.192161560058594, + "learning_rate": 2.3546379277238103e-07, + "logits/chosen": 0.788690447807312, + "logits/rejected": 0.8902658224105835, + "logps/chosen": -237.19358825683594, + "logps/ref_chosen": -78.47712707519531, + "logps/ref_rejected": -57.03622055053711, + "logps/rejected": -222.91226196289062, + "loss": 1.0841, + "margin_dpo/margin_mean": 7.1595916748046875, + "margin_dpo/margin_std": 78.5771255493164, + "step": 376 + }, + { + "epoch": 0.5699168556311414, + "grad_norm": 16.722244262695312, + "learning_rate": 2.3414402008585886e-07, + "logits/chosen": 0.9365607500076294, + "logits/rejected": 0.937558650970459, + "logps/chosen": -185.59255981445312, + "logps/ref_chosen": -60.769771575927734, + "logps/ref_rejected": -65.29888916015625, + "logps/rejected": -243.30319213867188, + "loss": 1.1232, + "margin_dpo/margin_mean": 53.181495666503906, + "margin_dpo/margin_std": 76.733642578125, + "step": 377 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 18.274368286132812, + "learning_rate": 2.3282469092517977e-07, + "logits/chosen": 1.0017985105514526, + "logits/rejected": 0.944943904876709, + "logps/chosen": -159.78944396972656, + "logps/ref_chosen": -50.79759979248047, + "logps/ref_rejected": -75.34347534179688, + "logps/rejected": -254.2080841064453, + "loss": 1.1378, + "margin_dpo/margin_mean": 69.87276458740234, + "margin_dpo/margin_std": 101.12820434570312, + "step": 378 + }, + { + "epoch": 0.5729402872260015, + "grad_norm": 17.872270584106445, + "learning_rate": 2.3150584219481643e-07, + "logits/chosen": 0.9760261178016663, + "logits/rejected": 0.9050667881965637, + "logps/chosen": -185.8768310546875, + "logps/ref_chosen": -61.738677978515625, + "logps/ref_rejected": -97.11418151855469, + "logps/rejected": -275.95050048828125, + "loss": 1.0379, + "margin_dpo/margin_mean": 54.698150634765625, + "margin_dpo/margin_std": 97.39860534667969, + "step": 379 + }, + { + "epoch": 0.5744520030234316, + "grad_norm": 15.110595703125, + "learning_rate": 2.3018751078580283e-07, + "logits/chosen": 0.8375756740570068, + "logits/rejected": 0.9170357584953308, + "logps/chosen": -169.6956329345703, + "logps/ref_chosen": -44.782066345214844, + "logps/ref_rejected": -41.68242263793945, + "logps/rejected": -221.0014190673828, + "loss": 0.9764, + "margin_dpo/margin_mean": 54.40543746948242, + "margin_dpo/margin_std": 82.51588439941406, + "step": 380 + }, + { + "epoch": 0.5759637188208617, + "grad_norm": 19.764453887939453, + "learning_rate": 2.288697335747027e-07, + "logits/chosen": 0.975003719329834, + "logits/rejected": 0.9306457042694092, + "logps/chosen": -211.8162384033203, + "logps/ref_chosen": -59.876434326171875, + "logps/ref_rejected": -56.535682678222656, + "logps/rejected": -277.5439147949219, + "loss": 1.3201, + "margin_dpo/margin_mean": 69.06842041015625, + "margin_dpo/margin_std": 162.8582763671875, + "step": 381 + }, + { + "epoch": 0.5774754346182918, + "grad_norm": 14.49204158782959, + "learning_rate": 2.2755254742257706e-07, + "logits/chosen": 1.0008131265640259, + "logits/rejected": 0.9001951217651367, + "logps/chosen": -203.72560119628906, + "logps/ref_chosen": -63.76511764526367, + "logps/ref_rejected": -100.84956359863281, + "logps/rejected": -320.87860107421875, + "loss": 1.0773, + "margin_dpo/margin_mean": 80.06857299804688, + "margin_dpo/margin_std": 91.30355072021484, + "step": 382 + }, + { + "epoch": 0.5789871504157218, + "grad_norm": 20.305614471435547, + "learning_rate": 2.2623598917395436e-07, + "logits/chosen": 0.9180213809013367, + "logits/rejected": 0.8855539560317993, + "logps/chosen": -171.7388458251953, + "logps/ref_chosen": -56.75093078613281, + "logps/ref_rejected": -63.029056549072266, + "logps/rejected": -220.02169799804688, + "loss": 1.1631, + "margin_dpo/margin_mean": 42.00471115112305, + "margin_dpo/margin_std": 75.70281982421875, + "step": 383 + }, + { + "epoch": 0.5804988662131519, + "grad_norm": 19.855918884277344, + "learning_rate": 2.2492009565579875e-07, + "logits/chosen": 0.8351778984069824, + "logits/rejected": 0.8428291082382202, + "logps/chosen": -205.7308807373047, + "logps/ref_chosen": -79.68529510498047, + "logps/ref_rejected": -81.70601654052734, + "logps/rejected": -268.79345703125, + "loss": 1.0003, + "margin_dpo/margin_mean": 61.041873931884766, + "margin_dpo/margin_std": 60.530391693115234, + "step": 384 + }, + { + "epoch": 0.582010582010582, + "grad_norm": 15.318973541259766, + "learning_rate": 2.2360490367648084e-07, + "logits/chosen": 0.8146077394485474, + "logits/rejected": 0.808496356010437, + "logps/chosen": -191.92584228515625, + "logps/ref_chosen": -55.45124053955078, + "logps/ref_rejected": -74.28287506103516, + "logps/rejected": -266.0337219238281, + "loss": 0.9743, + "margin_dpo/margin_mean": 55.27625274658203, + "margin_dpo/margin_std": 67.60725402832031, + "step": 385 + }, + { + "epoch": 0.5835222978080121, + "grad_norm": 16.764909744262695, + "learning_rate": 2.2229045002474724e-07, + "logits/chosen": 0.8237930536270142, + "logits/rejected": 0.7379618287086487, + "logps/chosen": -165.93934631347656, + "logps/ref_chosen": -49.65403747558594, + "logps/ref_rejected": -81.07264709472656, + "logps/rejected": -254.802978515625, + "loss": 1.1167, + "margin_dpo/margin_mean": 57.445030212402344, + "margin_dpo/margin_std": 128.3809814453125, + "step": 386 + }, + { + "epoch": 0.5850340136054422, + "grad_norm": 20.964162826538086, + "learning_rate": 2.209767714686924e-07, + "logits/chosen": 1.0351340770721436, + "logits/rejected": 0.93593430519104, + "logps/chosen": -142.808349609375, + "logps/ref_chosen": -30.506126403808594, + "logps/ref_rejected": -75.85283660888672, + "logps/rejected": -261.2908935546875, + "loss": 0.9562, + "margin_dpo/margin_mean": 73.13583374023438, + "margin_dpo/margin_std": 91.49317932128906, + "step": 387 + }, + { + "epoch": 0.5865457294028723, + "grad_norm": 16.960439682006836, + "learning_rate": 2.1966390475472954e-07, + "logits/chosen": 0.8540966510772705, + "logits/rejected": 0.7713406085968018, + "logps/chosen": -190.65914916992188, + "logps/ref_chosen": -74.6607437133789, + "logps/ref_rejected": -112.83131408691406, + "logps/rejected": -320.3790588378906, + "loss": 1.1545, + "margin_dpo/margin_mean": 91.54933166503906, + "margin_dpo/margin_std": 84.64852142333984, + "step": 388 + }, + { + "epoch": 0.5880574452003023, + "grad_norm": 28.818973541259766, + "learning_rate": 2.1835188660656265e-07, + "logits/chosen": 0.9836728572845459, + "logits/rejected": 0.8946930170059204, + "logps/chosen": -191.19837951660156, + "logps/ref_chosen": -57.50859069824219, + "logps/ref_rejected": -99.1073989868164, + "logps/rejected": -297.79901123046875, + "loss": 1.0733, + "margin_dpo/margin_mean": 65.00183868408203, + "margin_dpo/margin_std": 77.60054016113281, + "step": 389 + }, + { + "epoch": 0.5895691609977324, + "grad_norm": 14.67129135131836, + "learning_rate": 2.170407537241599e-07, + "logits/chosen": 0.8287357687950134, + "logits/rejected": 0.8107761144638062, + "logps/chosen": -167.33096313476562, + "logps/ref_chosen": -48.60906982421875, + "logps/ref_rejected": -54.462955474853516, + "logps/rejected": -222.6549530029297, + "loss": 1.0588, + "margin_dpo/margin_mean": 49.4700927734375, + "margin_dpo/margin_std": 65.86190795898438, + "step": 390 + }, + { + "epoch": 0.5910808767951625, + "grad_norm": 15.129668235778809, + "learning_rate": 2.1573054278272636e-07, + "logits/chosen": 0.8232520818710327, + "logits/rejected": 0.7771520018577576, + "logps/chosen": -169.54945373535156, + "logps/ref_chosen": -64.54489135742188, + "logps/ref_rejected": -90.6060791015625, + "logps/rejected": -272.054931640625, + "loss": 1.0709, + "margin_dpo/margin_mean": 76.44432067871094, + "margin_dpo/margin_std": 110.20875549316406, + "step": 391 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 15.302267074584961, + "learning_rate": 2.1442129043167873e-07, + "logits/chosen": 0.946514904499054, + "logits/rejected": 0.9243011474609375, + "logps/chosen": -161.78128051757812, + "logps/ref_chosen": -62.38185119628906, + "logps/ref_rejected": -71.40414428710938, + "logps/rejected": -220.90402221679688, + "loss": 1.0166, + "margin_dpo/margin_mean": 50.100440979003906, + "margin_dpo/margin_std": 103.29415130615234, + "step": 392 + }, + { + "epoch": 0.5941043083900227, + "grad_norm": 15.310603141784668, + "learning_rate": 2.131130332936195e-07, + "logits/chosen": 0.8303643465042114, + "logits/rejected": 0.830100953578949, + "logps/chosen": -150.3733367919922, + "logps/ref_chosen": -40.24174499511719, + "logps/ref_rejected": -50.49744415283203, + "logps/rejected": -233.74623107910156, + "loss": 0.9411, + "margin_dpo/margin_mean": 73.11720275878906, + "margin_dpo/margin_std": 63.29140853881836, + "step": 393 + }, + { + "epoch": 0.5956160241874527, + "grad_norm": 15.765230178833008, + "learning_rate": 2.1180580796331323e-07, + "logits/chosen": 0.971742570400238, + "logits/rejected": 0.847053050994873, + "logps/chosen": -156.80836486816406, + "logps/ref_chosen": -46.0341911315918, + "logps/ref_rejected": -90.94654846191406, + "logps/rejected": -264.73736572265625, + "loss": 1.076, + "margin_dpo/margin_mean": 63.01665496826172, + "margin_dpo/margin_std": 79.52315521240234, + "step": 394 + }, + { + "epoch": 0.5971277399848829, + "grad_norm": 14.962550163269043, + "learning_rate": 2.104996510066625e-07, + "logits/chosen": 1.010396122932434, + "logits/rejected": 0.888831615447998, + "logps/chosen": -136.7878875732422, + "logps/ref_chosen": -45.55821990966797, + "logps/ref_rejected": -93.29295349121094, + "logps/rejected": -282.3095397949219, + "loss": 1.0567, + "margin_dpo/margin_mean": 97.78689575195312, + "margin_dpo/margin_std": 64.2834701538086, + "step": 395 + }, + { + "epoch": 0.5986394557823129, + "grad_norm": 15.773469924926758, + "learning_rate": 2.0919459895968517e-07, + "logits/chosen": 0.9865584373474121, + "logits/rejected": 0.9610118865966797, + "logps/chosen": -135.16543579101562, + "logps/ref_chosen": -47.607505798339844, + "logps/ref_rejected": -52.5338020324707, + "logps/rejected": -236.95713806152344, + "loss": 1.0154, + "margin_dpo/margin_mean": 96.86540222167969, + "margin_dpo/margin_std": 54.51676940917969, + "step": 396 + }, + { + "epoch": 0.600151171579743, + "grad_norm": 17.012723922729492, + "learning_rate": 2.078906883274924e-07, + "logits/chosen": 0.8290398120880127, + "logits/rejected": 0.7684098482131958, + "logps/chosen": -193.76319885253906, + "logps/ref_chosen": -61.47978973388672, + "logps/ref_rejected": -80.81649780273438, + "logps/rejected": -270.89862060546875, + "loss": 1.2961, + "margin_dpo/margin_mean": 57.79869842529297, + "margin_dpo/margin_std": 66.68041229248047, + "step": 397 + }, + { + "epoch": 0.6016628873771731, + "grad_norm": 14.493592262268066, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": 0.8568699359893799, + "logits/rejected": 0.7129446268081665, + "logps/chosen": -158.29476928710938, + "logps/ref_chosen": -47.49082946777344, + "logps/ref_rejected": -101.38699340820312, + "logps/rejected": -306.26104736328125, + "loss": 0.9401, + "margin_dpo/margin_mean": 94.07012939453125, + "margin_dpo/margin_std": 80.53518676757812, + "step": 398 + }, + { + "epoch": 0.6031746031746031, + "grad_norm": 16.002872467041016, + "learning_rate": 2.052864371672457e-07, + "logits/chosen": 0.9500819444656372, + "logits/rejected": 0.8516980409622192, + "logps/chosen": -241.56881713867188, + "logps/ref_chosen": -76.00422668457031, + "logps/ref_rejected": -139.26205444335938, + "logps/rejected": -371.9350280761719, + "loss": 0.9017, + "margin_dpo/margin_mean": 67.10838317871094, + "margin_dpo/margin_std": 61.81909942626953, + "step": 399 + }, + { + "epoch": 0.6046863189720333, + "grad_norm": 22.764698028564453, + "learning_rate": 2.0398616948569493e-07, + "logits/chosen": 0.8686426281929016, + "logits/rejected": 0.9016132354736328, + "logps/chosen": -222.953857421875, + "logps/ref_chosen": -95.81818389892578, + "logps/ref_rejected": -84.06385803222656, + "logps/rejected": -276.75958251953125, + "loss": 1.1296, + "margin_dpo/margin_mean": 65.56008911132812, + "margin_dpo/margin_std": 57.78480529785156, + "step": 400 + }, + { + "epoch": 0.6061980347694633, + "grad_norm": 11.519392013549805, + "learning_rate": 2.0268718890989752e-07, + "logits/chosen": 0.8911803960800171, + "logits/rejected": 0.8166502714157104, + "logps/chosen": -165.61497497558594, + "logps/ref_chosen": -53.86456298828125, + "logps/ref_rejected": -53.25059509277344, + "logps/rejected": -205.1787567138672, + "loss": 0.9321, + "margin_dpo/margin_mean": 40.17775344848633, + "margin_dpo/margin_std": 83.69491577148438, + "step": 401 + }, + { + "epoch": 0.6077097505668935, + "grad_norm": 16.777515411376953, + "learning_rate": 2.013895317751323e-07, + "logits/chosen": 0.9933120012283325, + "logits/rejected": 0.814152717590332, + "logps/chosen": -142.3320770263672, + "logps/ref_chosen": -37.891700744628906, + "logps/ref_rejected": -85.77980041503906, + "logps/rejected": -285.2149963378906, + "loss": 1.0406, + "margin_dpo/margin_mean": 94.99481201171875, + "margin_dpo/margin_std": 68.64460754394531, + "step": 402 + }, + { + "epoch": 0.6092214663643235, + "grad_norm": 13.203496932983398, + "learning_rate": 2.0009323437965898e-07, + "logits/chosen": 1.0703718662261963, + "logits/rejected": 0.9925715923309326, + "logps/chosen": -180.50936889648438, + "logps/ref_chosen": -63.857696533203125, + "logps/ref_rejected": -93.38938903808594, + "logps/rejected": -301.53302001953125, + "loss": 0.9559, + "margin_dpo/margin_mean": 91.49195861816406, + "margin_dpo/margin_std": 91.93600463867188, + "step": 403 + }, + { + "epoch": 0.6107331821617535, + "grad_norm": 15.767083168029785, + "learning_rate": 1.9879833298370237e-07, + "logits/chosen": 0.9310320019721985, + "logits/rejected": 0.9439609050750732, + "logps/chosen": -240.16302490234375, + "logps/ref_chosen": -84.43171691894531, + "logps/ref_rejected": -89.65742492675781, + "logps/rejected": -274.97003173828125, + "loss": 0.9804, + "margin_dpo/margin_mean": 29.5813045501709, + "margin_dpo/margin_std": 90.8002700805664, + "step": 404 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 14.391467094421387, + "learning_rate": 1.975048638084379e-07, + "logits/chosen": 1.0961458683013916, + "logits/rejected": 1.0419206619262695, + "logps/chosen": -144.2384033203125, + "logps/ref_chosen": -43.280792236328125, + "logps/ref_rejected": -58.38227844238281, + "logps/rejected": -217.02481079101562, + "loss": 1.0274, + "margin_dpo/margin_mean": 57.6849365234375, + "margin_dpo/margin_std": 87.62693786621094, + "step": 405 + }, + { + "epoch": 0.6137566137566137, + "grad_norm": 17.21456527709961, + "learning_rate": 1.9621286303497914e-07, + "logits/chosen": 1.0429320335388184, + "logits/rejected": 0.8181569576263428, + "logps/chosen": -143.40634155273438, + "logps/ref_chosen": -38.76139831542969, + "logps/ref_rejected": -95.4449462890625, + "logps/rejected": -312.931396484375, + "loss": 0.9634, + "margin_dpo/margin_mean": 112.84149169921875, + "margin_dpo/margin_std": 117.30062866210938, + "step": 406 + }, + { + "epoch": 0.6152683295540439, + "grad_norm": 17.83815574645996, + "learning_rate": 1.9492236680336483e-07, + "logits/chosen": 0.921512246131897, + "logits/rejected": 0.9170160293579102, + "logps/chosen": -201.85311889648438, + "logps/ref_chosen": -70.94854736328125, + "logps/ref_rejected": -83.073486328125, + "logps/rejected": -236.95440673828125, + "loss": 1.065, + "margin_dpo/margin_mean": 22.976346969604492, + "margin_dpo/margin_std": 84.53071594238281, + "step": 407 + }, + { + "epoch": 0.6167800453514739, + "grad_norm": 14.140363693237305, + "learning_rate": 1.9363341121154895e-07, + "logits/chosen": 0.9865923523902893, + "logits/rejected": 0.8969758749008179, + "logps/chosen": -151.7926788330078, + "logps/ref_chosen": -57.809539794921875, + "logps/ref_rejected": -81.67845153808594, + "logps/rejected": -253.80831909179688, + "loss": 0.8738, + "margin_dpo/margin_mean": 78.146728515625, + "margin_dpo/margin_std": 85.02195739746094, + "step": 408 + }, + { + "epoch": 0.618291761148904, + "grad_norm": 18.92486000061035, + "learning_rate": 1.9234603231438994e-07, + "logits/chosen": 0.9477880001068115, + "logits/rejected": 1.024693250656128, + "logps/chosen": -208.4686279296875, + "logps/ref_chosen": -77.07215118408203, + "logps/ref_rejected": -52.54692459106445, + "logps/rejected": -207.8866729736328, + "loss": 1.1856, + "margin_dpo/margin_mean": 23.943286895751953, + "margin_dpo/margin_std": 57.939476013183594, + "step": 409 + }, + { + "epoch": 0.6198034769463341, + "grad_norm": 16.562345504760742, + "learning_rate": 1.9106026612264315e-07, + "logits/chosen": 1.0837414264678955, + "logits/rejected": 1.0848373174667358, + "logps/chosen": -157.5943603515625, + "logps/ref_chosen": -44.102970123291016, + "logps/ref_rejected": -48.312713623046875, + "logps/rejected": -213.13218688964844, + "loss": 1.0328, + "margin_dpo/margin_mean": 51.32807159423828, + "margin_dpo/margin_std": 73.68001556396484, + "step": 410 + }, + { + "epoch": 0.6213151927437641, + "grad_norm": 15.483366012573242, + "learning_rate": 1.8977614860195296e-07, + "logits/chosen": 0.9602583646774292, + "logits/rejected": 0.9861100316047668, + "logps/chosen": -207.64120483398438, + "logps/ref_chosen": -68.72139739990234, + "logps/ref_rejected": -60.70808792114258, + "logps/rejected": -261.77276611328125, + "loss": 1.0486, + "margin_dpo/margin_mean": 62.14488983154297, + "margin_dpo/margin_std": 83.70924377441406, + "step": 411 + }, + { + "epoch": 0.6228269085411943, + "grad_norm": 18.00762176513672, + "learning_rate": 1.8849371567184662e-07, + "logits/chosen": 0.9532754421234131, + "logits/rejected": 0.9993252754211426, + "logps/chosen": -164.32028198242188, + "logps/ref_chosen": -48.907501220703125, + "logps/ref_rejected": -40.223628997802734, + "logps/rejected": -204.09124755859375, + "loss": 1.0641, + "margin_dpo/margin_mean": 48.454830169677734, + "margin_dpo/margin_std": 77.01388549804688, + "step": 412 + }, + { + "epoch": 0.6243386243386243, + "grad_norm": 18.54705810546875, + "learning_rate": 1.872130032047302e-07, + "logits/chosen": 0.7836633324623108, + "logits/rejected": 0.6924010515213013, + "logps/chosen": -224.32937622070312, + "logps/ref_chosen": -66.48075103759766, + "logps/ref_rejected": -88.64950561523438, + "logps/rejected": -281.3914794921875, + "loss": 1.1014, + "margin_dpo/margin_mean": 34.89335250854492, + "margin_dpo/margin_std": 108.35519409179688, + "step": 413 + }, + { + "epoch": 0.6258503401360545, + "grad_norm": 17.233596801757812, + "learning_rate": 1.8593404702488436e-07, + "logits/chosen": 0.8586117029190063, + "logits/rejected": 0.8090198040008545, + "logps/chosen": -217.32992553710938, + "logps/ref_chosen": -64.2005386352539, + "logps/ref_rejected": -92.14444732666016, + "logps/rejected": -309.4424743652344, + "loss": 0.9987, + "margin_dpo/margin_mean": 64.16864013671875, + "margin_dpo/margin_std": 81.55545806884766, + "step": 414 + }, + { + "epoch": 0.6273620559334845, + "grad_norm": 15.781834602355957, + "learning_rate": 1.846568829074628e-07, + "logits/chosen": 0.8974360227584839, + "logits/rejected": 1.0069502592086792, + "logps/chosen": -171.6870880126953, + "logps/ref_chosen": -58.819007873535156, + "logps/ref_rejected": -41.336639404296875, + "logps/rejected": -227.85916137695312, + "loss": 1.1027, + "margin_dpo/margin_mean": 73.65443420410156, + "margin_dpo/margin_std": 78.06534576416016, + "step": 415 + }, + { + "epoch": 0.6288737717309146, + "grad_norm": 18.85218620300293, + "learning_rate": 1.8338154657749128e-07, + "logits/chosen": 0.892058253288269, + "logits/rejected": 0.7859885692596436, + "logps/chosen": -194.0150146484375, + "logps/ref_chosen": -53.452110290527344, + "logps/ref_rejected": -97.50613403320312, + "logps/rejected": -326.833984375, + "loss": 1.1854, + "margin_dpo/margin_mean": 88.76496124267578, + "margin_dpo/margin_std": 134.0952911376953, + "step": 416 + }, + { + "epoch": 0.6303854875283447, + "grad_norm": 16.071107864379883, + "learning_rate": 1.8210807370886849e-07, + "logits/chosen": 0.9685580730438232, + "logits/rejected": 0.9870299100875854, + "logps/chosen": -228.75973510742188, + "logps/ref_chosen": -75.47906494140625, + "logps/ref_rejected": -67.37366485595703, + "logps/rejected": -281.687255859375, + "loss": 1.0758, + "margin_dpo/margin_mean": 61.032928466796875, + "margin_dpo/margin_std": 164.45205688476562, + "step": 417 + }, + { + "epoch": 0.6318972033257747, + "grad_norm": 31.492109298706055, + "learning_rate": 1.8083649992336825e-07, + "logits/chosen": 1.0411242246627808, + "logits/rejected": 0.9932562112808228, + "logps/chosen": -163.39306640625, + "logps/ref_chosen": -51.03925323486328, + "logps/ref_rejected": -80.96292877197266, + "logps/rejected": -269.3048095703125, + "loss": 1.2937, + "margin_dpo/margin_mean": 75.98806762695312, + "margin_dpo/margin_std": 79.07701110839844, + "step": 418 + }, + { + "epoch": 0.6334089191232048, + "grad_norm": 15.3997802734375, + "learning_rate": 1.7956686078964255e-07, + "logits/chosen": 0.9788599014282227, + "logits/rejected": 0.8751944303512573, + "logps/chosen": -196.85659790039062, + "logps/ref_chosen": -60.755767822265625, + "logps/ref_rejected": -77.95507049560547, + "logps/rejected": -270.27056884765625, + "loss": 0.9464, + "margin_dpo/margin_mean": 56.214637756347656, + "margin_dpo/margin_std": 98.2620849609375, + "step": 419 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 16.48794937133789, + "learning_rate": 1.782991918222275e-07, + "logits/chosen": 0.9061227440834045, + "logits/rejected": 0.9082077741622925, + "logps/chosen": -246.45130920410156, + "logps/ref_chosen": -64.6197738647461, + "logps/ref_rejected": -65.47144317626953, + "logps/rejected": -271.0919494628906, + "loss": 1.2391, + "margin_dpo/margin_mean": 23.788970947265625, + "margin_dpo/margin_std": 90.23184204101562, + "step": 420 + }, + { + "epoch": 0.636432350718065, + "grad_norm": 20.588409423828125, + "learning_rate": 1.7703352848054887e-07, + "logits/chosen": 1.1465280055999756, + "logits/rejected": 0.9681707620620728, + "logps/chosen": -153.81906127929688, + "logps/ref_chosen": -37.7196159362793, + "logps/ref_rejected": -102.12132263183594, + "logps/rejected": -318.0316162109375, + "loss": 1.1831, + "margin_dpo/margin_mean": 99.81085205078125, + "margin_dpo/margin_std": 135.99864196777344, + "step": 421 + }, + { + "epoch": 0.6379440665154951, + "grad_norm": 15.120969772338867, + "learning_rate": 1.7576990616793137e-07, + "logits/chosen": 0.988216757774353, + "logits/rejected": 0.943016529083252, + "logps/chosen": -174.78521728515625, + "logps/ref_chosen": -70.57130432128906, + "logps/ref_rejected": -81.15480041503906, + "logps/rejected": -301.62823486328125, + "loss": 0.9731, + "margin_dpo/margin_mean": 116.259521484375, + "margin_dpo/margin_std": 165.31149291992188, + "step": 422 + }, + { + "epoch": 0.6394557823129252, + "grad_norm": 18.961877822875977, + "learning_rate": 1.745083602306071e-07, + "logits/chosen": 1.0410032272338867, + "logits/rejected": 1.0402342081069946, + "logps/chosen": -181.06886291503906, + "logps/ref_chosen": -65.94102478027344, + "logps/ref_rejected": -58.68115234375, + "logps/rejected": -218.81155395507812, + "loss": 0.968, + "margin_dpo/margin_mean": 45.00257873535156, + "margin_dpo/margin_std": 87.43341827392578, + "step": 423 + }, + { + "epoch": 0.6409674981103552, + "grad_norm": 16.557573318481445, + "learning_rate": 1.7324892595672804e-07, + "logits/chosen": 0.966809868812561, + "logits/rejected": 0.9489647150039673, + "logps/chosen": -159.09210205078125, + "logps/ref_chosen": -47.5775032043457, + "logps/ref_rejected": -81.55694580078125, + "logps/rejected": -263.04534912109375, + "loss": 0.9385, + "margin_dpo/margin_mean": 69.97380828857422, + "margin_dpo/margin_std": 96.92681884765625, + "step": 424 + }, + { + "epoch": 0.6424792139077853, + "grad_norm": 18.266592025756836, + "learning_rate": 1.7199163857537824e-07, + "logits/chosen": 0.9221489429473877, + "logits/rejected": 0.9283965826034546, + "logps/chosen": -210.3824462890625, + "logps/ref_chosen": -73.816650390625, + "logps/ref_rejected": -68.2657470703125, + "logps/rejected": -270.66265869140625, + "loss": 1.0809, + "margin_dpo/margin_mean": 65.83113861083984, + "margin_dpo/margin_std": 87.40162658691406, + "step": 425 + }, + { + "epoch": 0.6439909297052154, + "grad_norm": 23.678329467773438, + "learning_rate": 1.7073653325558828e-07, + "logits/chosen": 0.7612646818161011, + "logits/rejected": 0.8192156553268433, + "logps/chosen": -212.634033203125, + "logps/ref_chosen": -73.34886169433594, + "logps/ref_rejected": -49.84626007080078, + "logps/rejected": -236.68417358398438, + "loss": 1.28, + "margin_dpo/margin_mean": 47.552730560302734, + "margin_dpo/margin_std": 78.64155578613281, + "step": 426 + }, + { + "epoch": 0.6455026455026455, + "grad_norm": 18.935203552246094, + "learning_rate": 1.6948364510535218e-07, + "logits/chosen": 0.8741757869720459, + "logits/rejected": 0.8858860731124878, + "logps/chosen": -192.26693725585938, + "logps/ref_chosen": -59.81298828125, + "logps/ref_rejected": -72.67082214355469, + "logps/rejected": -229.54574584960938, + "loss": 1.1213, + "margin_dpo/margin_mean": 24.420982360839844, + "margin_dpo/margin_std": 100.69892883300781, + "step": 427 + }, + { + "epoch": 0.6470143613000756, + "grad_norm": 14.1466646194458, + "learning_rate": 1.6823300917064458e-07, + "logits/chosen": 0.810982346534729, + "logits/rejected": 0.8677164316177368, + "logps/chosen": -231.75439453125, + "logps/ref_chosen": -75.25834655761719, + "logps/ref_rejected": -71.87213134765625, + "logps/rejected": -261.0834655761719, + "loss": 1.0756, + "margin_dpo/margin_mean": 32.71527099609375, + "margin_dpo/margin_std": 101.5775146484375, + "step": 428 + }, + { + "epoch": 0.6485260770975056, + "grad_norm": 18.27950668334961, + "learning_rate": 1.669846604344412e-07, + "logits/chosen": 0.8632928133010864, + "logits/rejected": 0.9631503820419312, + "logps/chosen": -232.76438903808594, + "logps/ref_chosen": -85.73371887207031, + "logps/ref_rejected": -54.903968811035156, + "logps/rejected": -241.41387939453125, + "loss": 1.0955, + "margin_dpo/margin_mean": 39.47923278808594, + "margin_dpo/margin_std": 66.2182846069336, + "step": 429 + }, + { + "epoch": 0.6500377928949358, + "grad_norm": 17.305450439453125, + "learning_rate": 1.6573863381573954e-07, + "logits/chosen": 0.8608442544937134, + "logits/rejected": 0.8532319068908691, + "logps/chosen": -167.2602081298828, + "logps/ref_chosen": -54.592891693115234, + "logps/ref_rejected": -57.52851867675781, + "logps/rejected": -240.46214294433594, + "loss": 0.9472, + "margin_dpo/margin_mean": 70.26631164550781, + "margin_dpo/margin_std": 83.55381774902344, + "step": 430 + }, + { + "epoch": 0.6515495086923658, + "grad_norm": 15.106849670410156, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": 0.9315043091773987, + "logits/rejected": 0.8595200777053833, + "logps/chosen": -139.00140380859375, + "logps/ref_chosen": -28.757225036621094, + "logps/ref_rejected": -60.048500061035156, + "logps/rejected": -241.73719787597656, + "loss": 1.0949, + "margin_dpo/margin_mean": 71.44451141357422, + "margin_dpo/margin_std": 95.85964965820312, + "step": 431 + }, + { + "epoch": 0.6530612244897959, + "grad_norm": 15.847650527954102, + "learning_rate": 1.632536862810844e-07, + "logits/chosen": 0.9007859230041504, + "logits/rejected": 0.9773823022842407, + "logps/chosen": -222.88815307617188, + "logps/ref_chosen": -72.49076080322266, + "logps/ref_rejected": -61.208106994628906, + "logps/rejected": -219.5262451171875, + "loss": 1.0237, + "margin_dpo/margin_mean": 7.920762062072754, + "margin_dpo/margin_std": 85.05551147460938, + "step": 432 + }, + { + "epoch": 0.654572940287226, + "grad_norm": 18.91417121887207, + "learning_rate": 1.6201483487445515e-07, + "logits/chosen": 1.0263733863830566, + "logits/rejected": 1.010549545288086, + "logps/chosen": -214.4171905517578, + "logps/ref_chosen": -74.03857421875, + "logps/ref_rejected": -75.64851379394531, + "logps/rejected": -267.9232177734375, + "loss": 0.9721, + "margin_dpo/margin_mean": 51.896095275878906, + "margin_dpo/margin_std": 69.40410614013672, + "step": 433 + }, + { + "epoch": 0.656084656084656, + "grad_norm": 14.838190078735352, + "learning_rate": 1.6077844460203204e-07, + "logits/chosen": 0.9536264538764954, + "logits/rejected": 0.9389366507530212, + "logps/chosen": -183.00430297851562, + "logps/ref_chosen": -56.56264114379883, + "logps/ref_rejected": -75.03836822509766, + "logps/rejected": -236.72100830078125, + "loss": 1.0005, + "margin_dpo/margin_mean": 35.24098587036133, + "margin_dpo/margin_std": 116.1950912475586, + "step": 434 + }, + { + "epoch": 0.6575963718820862, + "grad_norm": 14.722707748413086, + "learning_rate": 1.5954455004830878e-07, + "logits/chosen": 1.0074257850646973, + "logits/rejected": 0.9908655881881714, + "logps/chosen": -178.95724487304688, + "logps/ref_chosen": -52.70317840576172, + "logps/ref_rejected": -59.57474899291992, + "logps/rejected": -245.07876586914062, + "loss": 1.1085, + "margin_dpo/margin_mean": 59.24994659423828, + "margin_dpo/margin_std": 70.93247985839844, + "step": 435 + }, + { + "epoch": 0.6591080876795162, + "grad_norm": 21.451398849487305, + "learning_rate": 1.5831318572796847e-07, + "logits/chosen": 0.8413320183753967, + "logits/rejected": 0.8829286098480225, + "logps/chosen": -164.8529815673828, + "logps/ref_chosen": -54.026947021484375, + "logps/ref_rejected": -50.91650390625, + "logps/rejected": -223.85400390625, + "loss": 1.1831, + "margin_dpo/margin_mean": 62.111473083496094, + "margin_dpo/margin_std": 77.27197265625, + "step": 436 + }, + { + "epoch": 0.6606198034769464, + "grad_norm": 15.607953071594238, + "learning_rate": 1.5708438608491815e-07, + "logits/chosen": 0.8712018728256226, + "logits/rejected": 0.796289324760437, + "logps/chosen": -210.6629180908203, + "logps/ref_chosen": -65.94082641601562, + "logps/ref_rejected": -101.48641967773438, + "logps/rejected": -329.0594482421875, + "loss": 1.1607, + "margin_dpo/margin_mean": 82.8509292602539, + "margin_dpo/margin_std": 156.46224975585938, + "step": 437 + }, + { + "epoch": 0.6621315192743764, + "grad_norm": 18.58790397644043, + "learning_rate": 1.558581854913253e-07, + "logits/chosen": 0.9901245832443237, + "logits/rejected": 0.8835450410842896, + "logps/chosen": -163.1690673828125, + "logps/ref_chosen": -37.30860137939453, + "logps/ref_rejected": -83.07009887695312, + "logps/rejected": -284.5603942871094, + "loss": 1.0057, + "margin_dpo/margin_mean": 75.62983703613281, + "margin_dpo/margin_std": 144.90997314453125, + "step": 438 + }, + { + "epoch": 0.6636432350718064, + "grad_norm": 14.376500129699707, + "learning_rate": 1.5463461824665658e-07, + "logits/chosen": 0.9146884679794312, + "logits/rejected": 0.8561520576477051, + "logps/chosen": -174.28402709960938, + "logps/ref_chosen": -58.679443359375, + "logps/ref_rejected": -86.585693359375, + "logps/rejected": -287.9344177246094, + "loss": 0.9811, + "margin_dpo/margin_mean": 85.74412536621094, + "margin_dpo/margin_std": 75.79908752441406, + "step": 439 + }, + { + "epoch": 0.6651549508692366, + "grad_norm": 23.994163513183594, + "learning_rate": 1.534137185767178e-07, + "logits/chosen": 0.90851229429245, + "logits/rejected": 0.7904536724090576, + "logps/chosen": -171.59495544433594, + "logps/ref_chosen": -52.94178009033203, + "logps/ref_rejected": -108.83475494384766, + "logps/rejected": -340.7264404296875, + "loss": 1.0222, + "margin_dpo/margin_mean": 113.23851013183594, + "margin_dpo/margin_std": 85.38230895996094, + "step": 440 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 16.344532012939453, + "learning_rate": 1.521955206326976e-07, + "logits/chosen": 0.9741239547729492, + "logits/rejected": 0.7976517081260681, + "logps/chosen": -146.74644470214844, + "logps/ref_chosen": -43.878997802734375, + "logps/ref_rejected": -85.12787628173828, + "logps/rejected": -268.0721130371094, + "loss": 0.9318, + "margin_dpo/margin_mean": 80.07679748535156, + "margin_dpo/margin_std": 80.79681396484375, + "step": 441 + }, + { + "epoch": 0.6681783824640968, + "grad_norm": 16.937883377075195, + "learning_rate": 1.5098005849021078e-07, + "logits/chosen": 0.9499423503875732, + "logits/rejected": 0.8875406980514526, + "logps/chosen": -202.962158203125, + "logps/ref_chosen": -82.47845458984375, + "logps/ref_rejected": -119.25947570800781, + "logps/rejected": -351.79766845703125, + "loss": 1.0069, + "margin_dpo/margin_mean": 112.05450439453125, + "margin_dpo/margin_std": 77.75383758544922, + "step": 442 + }, + { + "epoch": 0.6696900982615268, + "grad_norm": 15.244129180908203, + "learning_rate": 1.4976736614834662e-07, + "logits/chosen": 0.7210106253623962, + "logits/rejected": 0.6923055648803711, + "logps/chosen": -197.3117218017578, + "logps/ref_chosen": -68.7590560913086, + "logps/ref_rejected": -75.88587951660156, + "logps/rejected": -294.58660888671875, + "loss": 0.8711, + "margin_dpo/margin_mean": 90.1480712890625, + "margin_dpo/margin_std": 78.41068267822266, + "step": 443 + }, + { + "epoch": 0.671201814058957, + "grad_norm": 21.625139236450195, + "learning_rate": 1.4855747752871654e-07, + "logits/chosen": 0.8997494578361511, + "logits/rejected": 0.8529607057571411, + "logps/chosen": -243.5794677734375, + "logps/ref_chosen": -69.02406311035156, + "logps/ref_rejected": -82.6009750366211, + "logps/rejected": -275.14569091796875, + "loss": 1.2998, + "margin_dpo/margin_mean": 17.989328384399414, + "margin_dpo/margin_std": 90.46211242675781, + "step": 444 + }, + { + "epoch": 0.672713529856387, + "grad_norm": 18.841861724853516, + "learning_rate": 1.473504264745062e-07, + "logits/chosen": 0.9031407833099365, + "logits/rejected": 0.9619897603988647, + "logps/chosen": -269.9713439941406, + "logps/ref_chosen": -90.35289001464844, + "logps/ref_rejected": -69.4399185180664, + "logps/rejected": -283.7539367675781, + "loss": 0.9734, + "margin_dpo/margin_mean": 34.695556640625, + "margin_dpo/margin_std": 91.03128051757812, + "step": 445 + }, + { + "epoch": 0.674225245653817, + "grad_norm": 14.573994636535645, + "learning_rate": 1.461462467495284e-07, + "logits/chosen": 0.8910402655601501, + "logits/rejected": 0.8468337655067444, + "logps/chosen": -196.3031768798828, + "logps/ref_chosen": -68.73054504394531, + "logps/ref_rejected": -94.55728149414062, + "logps/rejected": -304.6093444824219, + "loss": 0.8754, + "margin_dpo/margin_mean": 82.47945404052734, + "margin_dpo/margin_std": 77.50213623046875, + "step": 446 + }, + { + "epoch": 0.6757369614512472, + "grad_norm": 19.80596351623535, + "learning_rate": 1.4494497203727843e-07, + "logits/chosen": 0.9224183559417725, + "logits/rejected": 0.8308309316635132, + "logps/chosen": -180.90097045898438, + "logps/ref_chosen": -63.873809814453125, + "logps/ref_rejected": -93.55643463134766, + "logps/rejected": -295.10546875, + "loss": 0.9013, + "margin_dpo/margin_mean": 84.52186584472656, + "margin_dpo/margin_std": 88.04441833496094, + "step": 447 + }, + { + "epoch": 0.6772486772486772, + "grad_norm": 15.010350227355957, + "learning_rate": 1.4374663593999256e-07, + "logits/chosen": 1.0064988136291504, + "logits/rejected": 0.9316452145576477, + "logps/chosen": -173.94631958007812, + "logps/ref_chosen": -48.06145477294922, + "logps/ref_rejected": -74.88455963134766, + "logps/rejected": -297.13531494140625, + "loss": 1.015, + "margin_dpo/margin_mean": 96.36589050292969, + "margin_dpo/margin_std": 80.901611328125, + "step": 448 + }, + { + "epoch": 0.6787603930461074, + "grad_norm": 21.837177276611328, + "learning_rate": 1.4255127197770707e-07, + "logits/chosen": 0.7422596216201782, + "logits/rejected": 0.7190842628479004, + "logps/chosen": -238.9716796875, + "logps/ref_chosen": -68.59271240234375, + "logps/ref_rejected": -88.01607513427734, + "logps/rejected": -317.0701599121094, + "loss": 1.233, + "margin_dpo/margin_mean": 58.67515563964844, + "margin_dpo/margin_std": 102.90806579589844, + "step": 449 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 16.762248992919922, + "learning_rate": 1.4135891358732205e-07, + "logits/chosen": 1.0215989351272583, + "logits/rejected": 0.838610053062439, + "logps/chosen": -167.00927734375, + "logps/ref_chosen": -35.716896057128906, + "logps/ref_rejected": -95.92900085449219, + "logps/rejected": -310.751953125, + "loss": 1.049, + "margin_dpo/margin_mean": 83.53057861328125, + "margin_dpo/margin_std": 82.19971466064453, + "step": 450 + }, + { + "epoch": 0.6817838246409675, + "grad_norm": 15.963050842285156, + "learning_rate": 1.4016959412166437e-07, + "logits/chosen": 0.8930540084838867, + "logits/rejected": 0.7441238760948181, + "logps/chosen": -221.75592041015625, + "logps/ref_chosen": -73.0178451538086, + "logps/ref_rejected": -109.17976379394531, + "logps/rejected": -333.75726318359375, + "loss": 1.0905, + "margin_dpo/margin_mean": 75.83943939208984, + "margin_dpo/margin_std": 87.21731567382812, + "step": 451 + }, + { + "epoch": 0.6832955404383976, + "grad_norm": 17.484153747558594, + "learning_rate": 1.3898334684855645e-07, + "logits/chosen": 0.7930479645729065, + "logits/rejected": 0.7267175316810608, + "logps/chosen": -191.09988403320312, + "logps/ref_chosen": -68.31144714355469, + "logps/ref_rejected": -97.96285247802734, + "logps/rejected": -300.38970947265625, + "loss": 1.096, + "margin_dpo/margin_mean": 79.63841247558594, + "margin_dpo/margin_std": 85.86831665039062, + "step": 452 + }, + { + "epoch": 0.6848072562358276, + "grad_norm": 24.5980167388916, + "learning_rate": 1.3780020494988445e-07, + "logits/chosen": 0.8875958919525146, + "logits/rejected": 0.8231840133666992, + "logps/chosen": -145.66799926757812, + "logps/ref_chosen": -51.74473190307617, + "logps/ref_rejected": -60.13861083984375, + "logps/rejected": -229.53916931152344, + "loss": 1.0671, + "margin_dpo/margin_mean": 75.477294921875, + "margin_dpo/margin_std": 93.73162841796875, + "step": 453 + }, + { + "epoch": 0.6863189720332578, + "grad_norm": 12.73465633392334, + "learning_rate": 1.366202015206706e-07, + "logits/chosen": 0.9996371269226074, + "logits/rejected": 1.0085588693618774, + "logps/chosen": -156.74002075195312, + "logps/ref_chosen": -44.98827362060547, + "logps/ref_rejected": -50.99993133544922, + "logps/rejected": -196.87745666503906, + "loss": 1.0144, + "margin_dpo/margin_mean": 34.12577819824219, + "margin_dpo/margin_std": 120.2644271850586, + "step": 454 + }, + { + "epoch": 0.6878306878306878, + "grad_norm": 16.96663475036621, + "learning_rate": 1.354433695681474e-07, + "logits/chosen": 0.8430243134498596, + "logits/rejected": 0.8127152919769287, + "logps/chosen": -215.993896484375, + "logps/ref_chosen": -71.83073425292969, + "logps/ref_rejected": -84.22274780273438, + "logps/rejected": -321.873046875, + "loss": 1.0001, + "margin_dpo/margin_mean": 93.48712921142578, + "margin_dpo/margin_std": 97.56796264648438, + "step": 455 + }, + { + "epoch": 0.6893424036281179, + "grad_norm": 21.244468688964844, + "learning_rate": 1.3426974201083439e-07, + "logits/chosen": 0.8168896436691284, + "logits/rejected": 0.7225136756896973, + "logps/chosen": -166.49514770507812, + "logps/ref_chosen": -37.67304229736328, + "logps/ref_rejected": -87.09375, + "logps/rejected": -294.4206848144531, + "loss": 1.0769, + "margin_dpo/margin_mean": 78.50482177734375, + "margin_dpo/margin_std": 95.56376647949219, + "step": 456 + }, + { + "epoch": 0.690854119425548, + "grad_norm": 20.784381866455078, + "learning_rate": 1.3309935167761717e-07, + "logits/chosen": 0.9731137752532959, + "logits/rejected": 0.890478789806366, + "logps/chosen": -193.4537353515625, + "logps/ref_chosen": -36.889923095703125, + "logps/ref_rejected": -71.26411437988281, + "logps/rejected": -267.1976623535156, + "loss": 1.0515, + "margin_dpo/margin_mean": 39.369747161865234, + "margin_dpo/margin_std": 71.71172332763672, + "step": 457 + }, + { + "epoch": 0.6923658352229781, + "grad_norm": 17.790769577026367, + "learning_rate": 1.3193223130682936e-07, + "logits/chosen": 0.9141269326210022, + "logits/rejected": 0.8772815465927124, + "logps/chosen": -211.3562774658203, + "logps/ref_chosen": -61.25056076049805, + "logps/ref_rejected": -70.772216796875, + "logps/rejected": -278.8810119628906, + "loss": 0.9925, + "margin_dpo/margin_mean": 58.00306701660156, + "margin_dpo/margin_std": 117.56779479980469, + "step": 458 + }, + { + "epoch": 0.6938775510204082, + "grad_norm": 15.180387496948242, + "learning_rate": 1.3076841354533658e-07, + "logits/chosen": 1.0155048370361328, + "logits/rejected": 0.9921817779541016, + "logps/chosen": -206.2635498046875, + "logps/ref_chosen": -73.28994750976562, + "logps/ref_rejected": -108.29696655273438, + "logps/rejected": -306.605712890625, + "loss": 0.9325, + "margin_dpo/margin_mean": 65.33513641357422, + "margin_dpo/margin_std": 104.4261474609375, + "step": 459 + }, + { + "epoch": 0.6953892668178382, + "grad_norm": 15.735591888427734, + "learning_rate": 1.2960793094762345e-07, + "logits/chosen": 0.8665391802787781, + "logits/rejected": 0.744087815284729, + "logps/chosen": -220.26666259765625, + "logps/ref_chosen": -76.68836975097656, + "logps/ref_rejected": -99.02154541015625, + "logps/rejected": -351.54107666015625, + "loss": 0.8928, + "margin_dpo/margin_mean": 108.94125366210938, + "margin_dpo/margin_std": 133.6810302734375, + "step": 460 + }, + { + "epoch": 0.6969009826152683, + "grad_norm": 14.483108520507812, + "learning_rate": 1.2845081597488286e-07, + "logits/chosen": 0.9958152770996094, + "logits/rejected": 0.9232292175292969, + "logps/chosen": -167.3024139404297, + "logps/ref_chosen": -63.83565902709961, + "logps/ref_rejected": -87.00081634521484, + "logps/rejected": -267.07049560546875, + "loss": 0.883, + "margin_dpo/margin_mean": 76.60294342041016, + "margin_dpo/margin_std": 62.0404167175293, + "step": 461 + }, + { + "epoch": 0.6984126984126984, + "grad_norm": 18.1173095703125, + "learning_rate": 1.27297100994108e-07, + "logits/chosen": 0.9541089534759521, + "logits/rejected": 0.9522888660430908, + "logps/chosen": -175.2286376953125, + "logps/ref_chosen": -64.87322998046875, + "logps/ref_rejected": -68.76766967773438, + "logps/rejected": -291.7859191894531, + "loss": 0.9019, + "margin_dpo/margin_mean": 112.66285705566406, + "margin_dpo/margin_std": 76.85243225097656, + "step": 462 + }, + { + "epoch": 0.6999244142101285, + "grad_norm": 21.672767639160156, + "learning_rate": 1.2614681827718695e-07, + "logits/chosen": 0.9698790907859802, + "logits/rejected": 1.0222316980361938, + "logps/chosen": -219.83706665039062, + "logps/ref_chosen": -66.8828125, + "logps/ref_rejected": -47.84074783325195, + "logps/rejected": -255.33847045898438, + "loss": 1.0292, + "margin_dpo/margin_mean": 54.54346466064453, + "margin_dpo/margin_std": 97.27693176269531, + "step": 463 + }, + { + "epoch": 0.7014361300075586, + "grad_norm": 16.856752395629883, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": 1.0088590383529663, + "logits/rejected": 1.0314637422561646, + "logps/chosen": -243.63446044921875, + "logps/ref_chosen": -90.68864440917969, + "logps/ref_rejected": -78.24504852294922, + "logps/rejected": -304.3553161621094, + "loss": 0.9942, + "margin_dpo/margin_mean": 73.16445922851562, + "margin_dpo/margin_std": 115.3930435180664, + "step": 464 + }, + { + "epoch": 0.7029478458049887, + "grad_norm": 18.76186752319336, + "learning_rate": 1.238566782415197e-07, + "logits/chosen": 0.9639427661895752, + "logits/rejected": 0.8763701915740967, + "logps/chosen": -196.27841186523438, + "logps/ref_chosen": -68.18783569335938, + "logps/ref_rejected": -107.69645690917969, + "logps/rejected": -325.6939697265625, + "loss": 1.0667, + "margin_dpo/margin_mean": 89.90691375732422, + "margin_dpo/margin_std": 111.15441131591797, + "step": 465 + }, + { + "epoch": 0.7044595616024187, + "grad_norm": 25.630605697631836, + "learning_rate": 1.2271688498291334e-07, + "logits/chosen": 0.7273076772689819, + "logits/rejected": 0.7946709394454956, + "logps/chosen": -287.0624084472656, + "logps/ref_chosen": -109.91573333740234, + "logps/ref_rejected": -71.43077087402344, + "logps/rejected": -251.70777893066406, + "loss": 1.2282, + "margin_dpo/margin_mean": 3.1303582191467285, + "margin_dpo/margin_std": 107.84750366210938, + "step": 466 + }, + { + "epoch": 0.7059712773998488, + "grad_norm": 19.084739685058594, + "learning_rate": 1.2158065210664848e-07, + "logits/chosen": 0.861819326877594, + "logits/rejected": 0.7350021600723267, + "logps/chosen": -200.34225463867188, + "logps/ref_chosen": -44.50347900390625, + "logps/ref_rejected": -68.12409973144531, + "logps/rejected": -296.81658935546875, + "loss": 0.9962, + "margin_dpo/margin_mean": 72.85372924804688, + "margin_dpo/margin_std": 124.93696594238281, + "step": 467 + }, + { + "epoch": 0.7074829931972789, + "grad_norm": 17.534069061279297, + "learning_rate": 1.204480113956011e-07, + "logits/chosen": 0.9389829635620117, + "logits/rejected": 0.8962525129318237, + "logps/chosen": -154.1627197265625, + "logps/ref_chosen": -46.26074981689453, + "logps/ref_rejected": -77.6624755859375, + "logps/rejected": -269.74609375, + "loss": 0.9274, + "margin_dpo/margin_mean": 84.1816635131836, + "margin_dpo/margin_std": 95.72941589355469, + "step": 468 + }, + { + "epoch": 0.708994708994709, + "grad_norm": 18.879037857055664, + "learning_rate": 1.1931899453216697e-07, + "logits/chosen": 0.98770672082901, + "logits/rejected": 0.9201364517211914, + "logps/chosen": -188.12139892578125, + "logps/ref_chosen": -62.30226516723633, + "logps/ref_rejected": -88.74978637695312, + "logps/rejected": -278.533203125, + "loss": 0.9651, + "margin_dpo/margin_mean": 63.96430206298828, + "margin_dpo/margin_std": 68.25845336914062, + "step": 469 + }, + { + "epoch": 0.7105064247921391, + "grad_norm": 17.858858108520508, + "learning_rate": 1.1819363309737438e-07, + "logits/chosen": 1.0070170164108276, + "logits/rejected": 0.9005523920059204, + "logps/chosen": -177.07325744628906, + "logps/ref_chosen": -53.556175231933594, + "logps/ref_rejected": -83.76905059814453, + "logps/rejected": -305.9222717285156, + "loss": 1.0157, + "margin_dpo/margin_mean": 98.63612365722656, + "margin_dpo/margin_std": 116.62651062011719, + "step": 470 + }, + { + "epoch": 0.7120181405895691, + "grad_norm": 22.893625259399414, + "learning_rate": 1.1707195857000215e-07, + "logits/chosen": 0.9534423351287842, + "logits/rejected": 0.9319720268249512, + "logps/chosen": -163.5334014892578, + "logps/ref_chosen": -51.11562728881836, + "logps/ref_rejected": -63.00703048706055, + "logps/rejected": -267.5377197265625, + "loss": 0.9278, + "margin_dpo/margin_mean": 92.11293029785156, + "margin_dpo/margin_std": 100.85304260253906, + "step": 471 + }, + { + "epoch": 0.7135298563869993, + "grad_norm": 16.294567108154297, + "learning_rate": 1.1595400232569768e-07, + "logits/chosen": 0.7927904725074768, + "logits/rejected": 0.8178097009658813, + "logps/chosen": -197.45468139648438, + "logps/ref_chosen": -69.28681182861328, + "logps/ref_rejected": -85.08757019042969, + "logps/rejected": -298.28179931640625, + "loss": 1.103, + "margin_dpo/margin_mean": 85.0263900756836, + "margin_dpo/margin_std": 92.62736511230469, + "step": 472 + }, + { + "epoch": 0.7150415721844293, + "grad_norm": 19.770648956298828, + "learning_rate": 1.1483979563610069e-07, + "logits/chosen": 0.9858765602111816, + "logits/rejected": 0.7901525497436523, + "logps/chosen": -140.59815979003906, + "logps/ref_chosen": -34.896080017089844, + "logps/ref_rejected": -81.10395812988281, + "logps/rejected": -261.3387145996094, + "loss": 1.0053, + "margin_dpo/margin_mean": 74.53266906738281, + "margin_dpo/margin_std": 110.72260284423828, + "step": 473 + }, + { + "epoch": 0.7165532879818595, + "grad_norm": 23.826879501342773, + "learning_rate": 1.1372936966796709e-07, + "logits/chosen": 1.108849287033081, + "logits/rejected": 1.0479471683502197, + "logps/chosen": -195.81671142578125, + "logps/ref_chosen": -45.78113555908203, + "logps/ref_rejected": -68.88629150390625, + "logps/rejected": -293.0316162109375, + "loss": 1.1275, + "margin_dpo/margin_mean": 74.10977172851562, + "margin_dpo/margin_std": 110.27363586425781, + "step": 474 + }, + { + "epoch": 0.7180650037792895, + "grad_norm": 13.597807884216309, + "learning_rate": 1.126227554822985e-07, + "logits/chosen": 0.8909091949462891, + "logits/rejected": 0.9018880724906921, + "logps/chosen": -191.0635223388672, + "logps/ref_chosen": -58.526344299316406, + "logps/ref_rejected": -76.01811981201172, + "logps/rejected": -290.8351745605469, + "loss": 0.8632, + "margin_dpo/margin_mean": 82.27987670898438, + "margin_dpo/margin_std": 109.62013244628906, + "step": 475 + }, + { + "epoch": 0.7195767195767195, + "grad_norm": 22.143871307373047, + "learning_rate": 1.1151998403347243e-07, + "logits/chosen": 0.8504942059516907, + "logits/rejected": 0.801427960395813, + "logps/chosen": -185.2305145263672, + "logps/ref_chosen": -54.09751510620117, + "logps/ref_rejected": -84.35127258300781, + "logps/rejected": -310.95294189453125, + "loss": 1.0874, + "margin_dpo/margin_mean": 95.46868133544922, + "margin_dpo/margin_std": 103.3380126953125, + "step": 476 + }, + { + "epoch": 0.7210884353741497, + "grad_norm": 20.90325355529785, + "learning_rate": 1.1042108616837692e-07, + "logits/chosen": 0.9148606657981873, + "logits/rejected": 0.8745808601379395, + "logps/chosen": -211.44253540039062, + "logps/ref_chosen": -67.8685531616211, + "logps/ref_rejected": -72.77481842041016, + "logps/rejected": -246.86721801757812, + "loss": 1.1216, + "margin_dpo/margin_mean": 30.518428802490234, + "margin_dpo/margin_std": 85.03158569335938, + "step": 477 + }, + { + "epoch": 0.7226001511715797, + "grad_norm": 19.139371871948242, + "learning_rate": 1.0932609262554746e-07, + "logits/chosen": 0.8678562641143799, + "logits/rejected": 0.8669169545173645, + "logps/chosen": -157.50787353515625, + "logps/ref_chosen": -56.62529754638672, + "logps/ref_rejected": -58.720787048339844, + "logps/rejected": -254.533203125, + "loss": 1.1308, + "margin_dpo/margin_mean": 94.92986297607422, + "margin_dpo/margin_std": 100.27252197265625, + "step": 478 + }, + { + "epoch": 0.7241118669690099, + "grad_norm": 20.085996627807617, + "learning_rate": 1.0823503403430734e-07, + "logits/chosen": 0.8876814842224121, + "logits/rejected": 0.7483628988265991, + "logps/chosen": -176.6420135498047, + "logps/ref_chosen": -45.32330322265625, + "logps/ref_rejected": -55.02246856689453, + "logps/rejected": -235.74169921875, + "loss": 1.1586, + "margin_dpo/margin_mean": 49.40052795410156, + "margin_dpo/margin_std": 96.12824249267578, + "step": 479 + }, + { + "epoch": 0.7256235827664399, + "grad_norm": 21.663545608520508, + "learning_rate": 1.0714794091391072e-07, + "logits/chosen": 0.8385661244392395, + "logits/rejected": 0.8454256057739258, + "logps/chosen": -211.0986328125, + "logps/ref_chosen": -63.22759246826172, + "logps/ref_rejected": -74.44642639160156, + "logps/rejected": -270.2560729980469, + "loss": 1.0536, + "margin_dpo/margin_mean": 47.93859100341797, + "margin_dpo/margin_std": 117.68538665771484, + "step": 480 + }, + { + "epoch": 0.72713529856387, + "grad_norm": 17.779111862182617, + "learning_rate": 1.0606484367268906e-07, + "logits/chosen": 0.8755944967269897, + "logits/rejected": 0.9314774870872498, + "logps/chosen": -223.0485076904297, + "logps/ref_chosen": -79.15220642089844, + "logps/ref_rejected": -68.31973266601562, + "logps/rejected": -270.32794189453125, + "loss": 1.049, + "margin_dpo/margin_mean": 58.11188507080078, + "margin_dpo/margin_std": 97.23744201660156, + "step": 481 + }, + { + "epoch": 0.7286470143613001, + "grad_norm": 24.795852661132812, + "learning_rate": 1.0498577260720048e-07, + "logits/chosen": 0.8218737840652466, + "logits/rejected": 0.7339863777160645, + "logps/chosen": -193.96629333496094, + "logps/ref_chosen": -54.06950378417969, + "logps/ref_rejected": -87.615234375, + "logps/rejected": -284.41217041015625, + "loss": 1.1686, + "margin_dpo/margin_mean": 56.900150299072266, + "margin_dpo/margin_std": 103.31169128417969, + "step": 482 + }, + { + "epoch": 0.7301587301587301, + "grad_norm": 15.781610488891602, + "learning_rate": 1.0391075790138232e-07, + "logits/chosen": 0.894173800945282, + "logits/rejected": 0.9251315593719482, + "logps/chosen": -174.34373474121094, + "logps/ref_chosen": -54.128578186035156, + "logps/ref_rejected": -67.60116577148438, + "logps/rejected": -278.50946044921875, + "loss": 0.986, + "margin_dpo/margin_mean": 90.693115234375, + "margin_dpo/margin_std": 83.16683197021484, + "step": 483 + }, + { + "epoch": 0.7316704459561603, + "grad_norm": 22.09979820251465, + "learning_rate": 1.0283982962570681e-07, + "logits/chosen": 0.8971969485282898, + "logits/rejected": 0.8970204591751099, + "logps/chosen": -179.95797729492188, + "logps/ref_chosen": -50.43122100830078, + "logps/ref_rejected": -66.30445861816406, + "logps/rejected": -237.34353637695312, + "loss": 1.0404, + "margin_dpo/margin_mean": 41.512306213378906, + "margin_dpo/margin_std": 72.52426147460938, + "step": 484 + }, + { + "epoch": 0.7331821617535903, + "grad_norm": 17.71631622314453, + "learning_rate": 1.0177301773633992e-07, + "logits/chosen": 1.0137141942977905, + "logits/rejected": 0.9617197513580322, + "logps/chosen": -193.55841064453125, + "logps/ref_chosen": -51.1461067199707, + "logps/ref_rejected": -61.148231506347656, + "logps/rejected": -281.61785888671875, + "loss": 1.0243, + "margin_dpo/margin_mean": 78.0573501586914, + "margin_dpo/margin_std": 87.72028350830078, + "step": 485 + }, + { + "epoch": 0.7346938775510204, + "grad_norm": 18.199512481689453, + "learning_rate": 1.007103520743035e-07, + "logits/chosen": 0.9664397239685059, + "logits/rejected": 0.8338623046875, + "logps/chosen": -239.65943908691406, + "logps/ref_chosen": -70.02229309082031, + "logps/ref_rejected": -114.05645751953125, + "logps/rejected": -355.57293701171875, + "loss": 1.0931, + "margin_dpo/margin_mean": 71.87936401367188, + "margin_dpo/margin_std": 91.45281982421875, + "step": 486 + }, + { + "epoch": 0.7362055933484505, + "grad_norm": 21.441146850585938, + "learning_rate": 9.965186236464046e-08, + "logits/chosen": 0.8630800843238831, + "logits/rejected": 0.9823142290115356, + "logps/chosen": -227.5937042236328, + "logps/ref_chosen": -81.48747253417969, + "logps/ref_rejected": -57.63191604614258, + "logps/rejected": -252.54348754882812, + "loss": 1.0282, + "margin_dpo/margin_mean": 48.80535125732422, + "margin_dpo/margin_std": 73.48953247070312, + "step": 487 + }, + { + "epoch": 0.7377173091458806, + "grad_norm": 16.673267364501953, + "learning_rate": 9.859757821558337e-08, + "logits/chosen": 1.1053991317749023, + "logits/rejected": 1.0277700424194336, + "logps/chosen": -159.07391357421875, + "logps/ref_chosen": -40.96950912475586, + "logps/ref_rejected": -58.97332000732422, + "logps/rejected": -254.75424194335938, + "loss": 0.9396, + "margin_dpo/margin_mean": 77.67652130126953, + "margin_dpo/margin_std": 103.35063934326172, + "step": 488 + }, + { + "epoch": 0.7392290249433107, + "grad_norm": 17.88896942138672, + "learning_rate": 9.754752911772615e-08, + "logits/chosen": 0.9406849145889282, + "logits/rejected": 0.9168277978897095, + "logps/chosen": -206.83840942382812, + "logps/ref_chosen": -61.101173400878906, + "logps/ref_rejected": -98.35012817382812, + "logps/rejected": -302.9677734375, + "loss": 1.2049, + "margin_dpo/margin_mean": 58.880401611328125, + "margin_dpo/margin_std": 115.15332794189453, + "step": 489 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 17.050439834594727, + "learning_rate": 9.650174444319956e-08, + "logits/chosen": 1.056211233139038, + "logits/rejected": 1.0357109308242798, + "logps/chosen": -221.88174438476562, + "logps/ref_chosen": -62.3327751159668, + "logps/ref_rejected": -79.33484649658203, + "logps/rejected": -289.1636962890625, + "loss": 1.1471, + "margin_dpo/margin_mean": 50.279884338378906, + "margin_dpo/margin_std": 150.81320190429688, + "step": 490 + }, + { + "epoch": 0.7422524565381708, + "grad_norm": 15.308135032653809, + "learning_rate": 9.546025344484868e-08, + "logits/chosen": 0.9274444580078125, + "logits/rejected": 0.9464068412780762, + "logps/chosen": -178.4617919921875, + "logps/ref_chosen": -56.26042175292969, + "logps/ref_rejected": -60.113643646240234, + "logps/rejected": -242.59280395507812, + "loss": 1.0716, + "margin_dpo/margin_mean": 60.27778625488281, + "margin_dpo/margin_std": 79.55590057373047, + "step": 491 + }, + { + "epoch": 0.7437641723356009, + "grad_norm": 18.683191299438477, + "learning_rate": 9.442308525541589e-08, + "logits/chosen": 1.0775320529937744, + "logits/rejected": 0.8653636574745178, + "logps/chosen": -203.3031463623047, + "logps/ref_chosen": -45.579383850097656, + "logps/ref_rejected": -113.18993377685547, + "logps/rejected": -362.9856872558594, + "loss": 1.1972, + "margin_dpo/margin_mean": 92.07199096679688, + "margin_dpo/margin_std": 111.55503845214844, + "step": 492 + }, + { + "epoch": 0.745275888133031, + "grad_norm": 16.538679122924805, + "learning_rate": 9.339026888672468e-08, + "logits/chosen": 0.9095848798751831, + "logits/rejected": 0.843256950378418, + "logps/chosen": -190.7752685546875, + "logps/ref_chosen": -59.649986267089844, + "logps/ref_rejected": -87.15731811523438, + "logps/rejected": -255.50759887695312, + "loss": 0.9946, + "margin_dpo/margin_mean": 37.224998474121094, + "margin_dpo/margin_std": 83.05178833007812, + "step": 493 + }, + { + "epoch": 0.7467876039304611, + "grad_norm": 22.777530670166016, + "learning_rate": 9.236183322886945e-08, + "logits/chosen": 0.7308402061462402, + "logits/rejected": 0.6717466711997986, + "logps/chosen": -180.57354736328125, + "logps/ref_chosen": -52.495460510253906, + "logps/ref_rejected": -76.69441223144531, + "logps/rejected": -251.54196166992188, + "loss": 1.1615, + "margin_dpo/margin_mean": 46.76948547363281, + "margin_dpo/margin_std": 64.22396850585938, + "step": 494 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 20.922956466674805, + "learning_rate": 9.133780704940594e-08, + "logits/chosen": 0.8490627408027649, + "logits/rejected": 0.8381332755088806, + "logps/chosen": -224.40399169921875, + "logps/ref_chosen": -61.861454010009766, + "logps/ref_rejected": -66.13208770751953, + "logps/rejected": -220.16583251953125, + "loss": 1.1718, + "margin_dpo/margin_mean": -8.508773803710938, + "margin_dpo/margin_std": 103.41836547851562, + "step": 495 + }, + { + "epoch": 0.7498110355253212, + "grad_norm": 17.408597946166992, + "learning_rate": 9.031821899254797e-08, + "logits/chosen": 0.9441404938697815, + "logits/rejected": 0.7932374477386475, + "logps/chosen": -185.04629516601562, + "logps/ref_chosen": -45.550537109375, + "logps/ref_rejected": -100.87998962402344, + "logps/rejected": -331.1201477050781, + "loss": 1.0355, + "margin_dpo/margin_mean": 90.74439239501953, + "margin_dpo/margin_std": 99.05509948730469, + "step": 496 + }, + { + "epoch": 0.7513227513227513, + "grad_norm": 17.972551345825195, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": 0.9454480409622192, + "logits/rejected": 0.8560576438903809, + "logps/chosen": -160.0005645751953, + "logps/ref_chosen": -59.57224655151367, + "logps/ref_rejected": -98.0445556640625, + "logps/rejected": -283.73138427734375, + "loss": 1.0466, + "margin_dpo/margin_mean": 85.25851440429688, + "margin_dpo/margin_std": 80.23634338378906, + "step": 497 + }, + { + "epoch": 0.7528344671201814, + "grad_norm": 15.286055564880371, + "learning_rate": 8.829247120198563e-08, + "logits/chosen": 0.9226865768432617, + "logits/rejected": 0.8130264282226562, + "logps/chosen": -209.95169067382812, + "logps/ref_chosen": -62.02671432495117, + "logps/ref_rejected": -102.47027587890625, + "logps/rejected": -339.2867431640625, + "loss": 0.9486, + "margin_dpo/margin_mean": 88.89149475097656, + "margin_dpo/margin_std": 98.82290649414062, + "step": 498 + }, + { + "epoch": 0.7543461829176115, + "grad_norm": 19.15370750427246, + "learning_rate": 8.728636813280163e-08, + "logits/chosen": 0.9353519082069397, + "logits/rejected": 0.817765474319458, + "logps/chosen": -201.40264892578125, + "logps/ref_chosen": -59.132904052734375, + "logps/ref_rejected": -109.99728393554688, + "logps/rejected": -319.89678955078125, + "loss": 1.0559, + "margin_dpo/margin_mean": 67.6297607421875, + "margin_dpo/margin_std": 111.14754486083984, + "step": 499 + }, + { + "epoch": 0.7558578987150416, + "grad_norm": 19.469440460205078, + "learning_rate": 8.628481651367875e-08, + "logits/chosen": 0.7339733839035034, + "logits/rejected": 0.7467474341392517, + "logps/chosen": -233.1659393310547, + "logps/ref_chosen": -83.37364959716797, + "logps/ref_rejected": -85.28791809082031, + "logps/rejected": -321.4836730957031, + "loss": 1.0818, + "margin_dpo/margin_mean": 86.4034423828125, + "margin_dpo/margin_std": 75.89598083496094, + "step": 500 + }, + { + "epoch": 0.7573696145124716, + "grad_norm": 13.901487350463867, + "learning_rate": 8.528784436016878e-08, + "logits/chosen": 0.9171326756477356, + "logits/rejected": 0.9103375673294067, + "logps/chosen": -201.10220336914062, + "logps/ref_chosen": -71.76341247558594, + "logps/ref_rejected": -77.99368286132812, + "logps/rejected": -288.089599609375, + "loss": 0.9773, + "margin_dpo/margin_mean": 80.75711059570312, + "margin_dpo/margin_std": 61.29353713989258, + "step": 501 + }, + { + "epoch": 0.7588813303099018, + "grad_norm": 16.59943389892578, + "learning_rate": 8.4295479559726e-08, + "logits/chosen": 0.9081599712371826, + "logits/rejected": 0.8390029072761536, + "logps/chosen": -177.0962677001953, + "logps/ref_chosen": -60.16824722290039, + "logps/ref_rejected": -88.61361694335938, + "logps/rejected": -263.456787109375, + "loss": 1.0216, + "margin_dpo/margin_mean": 57.91515350341797, + "margin_dpo/margin_std": 97.9842300415039, + "step": 502 + }, + { + "epoch": 0.7603930461073318, + "grad_norm": 15.813238143920898, + "learning_rate": 8.330774987092712e-08, + "logits/chosen": 0.9412652254104614, + "logits/rejected": 0.9647856950759888, + "logps/chosen": -200.7902374267578, + "logps/ref_chosen": -64.67495727539062, + "logps/ref_rejected": -68.66828918457031, + "logps/rejected": -268.06390380859375, + "loss": 1.0442, + "margin_dpo/margin_mean": 63.280311584472656, + "margin_dpo/margin_std": 97.19824981689453, + "step": 503 + }, + { + "epoch": 0.7619047619047619, + "grad_norm": 15.342427253723145, + "learning_rate": 8.232468292269479e-08, + "logits/chosen": 0.9019661545753479, + "logits/rejected": 0.8831943273544312, + "logps/chosen": -195.31814575195312, + "logps/ref_chosen": -59.16814041137695, + "logps/ref_rejected": -80.79418182373047, + "logps/rejected": -313.3367919921875, + "loss": 0.88, + "margin_dpo/margin_mean": 96.392578125, + "margin_dpo/margin_std": 102.2872314453125, + "step": 504 + }, + { + "epoch": 0.763416477702192, + "grad_norm": 18.31743621826172, + "learning_rate": 8.134630621352483e-08, + "logits/chosen": 0.8820549249649048, + "logits/rejected": 0.8513251543045044, + "logps/chosen": -191.5747528076172, + "logps/ref_chosen": -70.2003173828125, + "logps/ref_rejected": -96.7729721069336, + "logps/rejected": -323.912841796875, + "loss": 1.1311, + "margin_dpo/margin_mean": 105.76544189453125, + "margin_dpo/margin_std": 129.75132751464844, + "step": 505 + }, + { + "epoch": 0.764928193499622, + "grad_norm": 19.81454849243164, + "learning_rate": 8.037264711071698e-08, + "logits/chosen": 0.9372712969779968, + "logits/rejected": 0.9639301300048828, + "logps/chosen": -213.41952514648438, + "logps/ref_chosen": -77.78474426269531, + "logps/ref_rejected": -76.35060119628906, + "logps/rejected": -262.79083251953125, + "loss": 1.1566, + "margin_dpo/margin_mean": 50.80542755126953, + "margin_dpo/margin_std": 70.04209899902344, + "step": 506 + }, + { + "epoch": 0.7664399092970522, + "grad_norm": 16.390592575073242, + "learning_rate": 7.940373284960933e-08, + "logits/chosen": 0.8365110158920288, + "logits/rejected": 0.869307279586792, + "logps/chosen": -263.01031494140625, + "logps/ref_chosen": -102.45763397216797, + "logps/ref_rejected": -107.32395935058594, + "logps/rejected": -325.25531005859375, + "loss": 1.0336, + "margin_dpo/margin_mean": 57.37864685058594, + "margin_dpo/margin_std": 107.75471496582031, + "step": 507 + }, + { + "epoch": 0.7679516250944822, + "grad_norm": 18.062192916870117, + "learning_rate": 7.843959053281663e-08, + "logits/chosen": 0.9670290946960449, + "logits/rejected": 0.8693418502807617, + "logps/chosen": -160.03451538085938, + "logps/ref_chosen": -54.93028259277344, + "logps/ref_rejected": -94.2146987915039, + "logps/rejected": -347.9501953125, + "loss": 0.9714, + "margin_dpo/margin_mean": 148.63128662109375, + "margin_dpo/margin_std": 169.13424682617188, + "step": 508 + }, + { + "epoch": 0.7694633408919124, + "grad_norm": 21.243953704833984, + "learning_rate": 7.748024712947204e-08, + "logits/chosen": 0.7485306262969971, + "logits/rejected": 0.8197051286697388, + "logps/chosen": -231.2813262939453, + "logps/ref_chosen": -68.31385803222656, + "logps/ref_rejected": -49.03086853027344, + "logps/rejected": -233.74880981445312, + "loss": 1.0812, + "margin_dpo/margin_mean": 21.75048065185547, + "margin_dpo/margin_std": 92.87283325195312, + "step": 509 + }, + { + "epoch": 0.7709750566893424, + "grad_norm": 18.86398696899414, + "learning_rate": 7.652572947447272e-08, + "logits/chosen": 0.8953909873962402, + "logits/rejected": 0.8276166319847107, + "logps/chosen": -233.6016845703125, + "logps/ref_chosen": -73.07958984375, + "logps/ref_rejected": -96.33815002441406, + "logps/rejected": -307.2120666503906, + "loss": 1.0692, + "margin_dpo/margin_mean": 50.35181427001953, + "margin_dpo/margin_std": 74.205322265625, + "step": 510 + }, + { + "epoch": 0.7724867724867724, + "grad_norm": 21.314716339111328, + "learning_rate": 7.557606426772961e-08, + "logits/chosen": 1.0289305448532104, + "logits/rejected": 0.9097828269004822, + "logps/chosen": -169.629638671875, + "logps/ref_chosen": -42.50914764404297, + "logps/ref_rejected": -75.06625366210938, + "logps/rejected": -301.7984619140625, + "loss": 0.9783, + "margin_dpo/margin_mean": 99.61170959472656, + "margin_dpo/margin_std": 98.32453155517578, + "step": 511 + }, + { + "epoch": 0.7739984882842026, + "grad_norm": 31.400192260742188, + "learning_rate": 7.463127807341966e-08, + "logits/chosen": 0.9768285155296326, + "logits/rejected": 1.0176584720611572, + "logps/chosen": -181.43753051757812, + "logps/ref_chosen": -56.2095947265625, + "logps/ref_rejected": -53.511173248291016, + "logps/rejected": -220.98828125, + "loss": 1.1303, + "margin_dpo/margin_mean": 42.249168395996094, + "margin_dpo/margin_std": 113.38427734375, + "step": 512 + }, + { + "epoch": 0.7755102040816326, + "grad_norm": 18.380573272705078, + "learning_rate": 7.369139731924401e-08, + "logits/chosen": 1.0797477960586548, + "logits/rejected": 1.0573055744171143, + "logps/chosen": -140.5891571044922, + "logps/ref_chosen": -38.419105529785156, + "logps/ref_rejected": -52.706207275390625, + "logps/rejected": -238.41525268554688, + "loss": 0.9919, + "margin_dpo/margin_mean": 83.53899383544922, + "margin_dpo/margin_std": 82.92984008789062, + "step": 513 + }, + { + "epoch": 0.7770219198790628, + "grad_norm": 18.939552307128906, + "learning_rate": 7.275644829568747e-08, + "logits/chosen": 0.9963523149490356, + "logits/rejected": 0.9776211977005005, + "logps/chosen": -203.09329223632812, + "logps/ref_chosen": -62.999786376953125, + "logps/ref_rejected": -66.38030242919922, + "logps/rejected": -280.66339111328125, + "loss": 1.0044, + "margin_dpo/margin_mean": 74.18960571289062, + "margin_dpo/margin_std": 45.82038116455078, + "step": 514 + }, + { + "epoch": 0.7785336356764928, + "grad_norm": 18.924774169921875, + "learning_rate": 7.182645715528435e-08, + "logits/chosen": 1.1489193439483643, + "logits/rejected": 1.0843408107757568, + "logps/chosen": -159.77142333984375, + "logps/ref_chosen": -38.9024658203125, + "logps/ref_rejected": -66.47667694091797, + "logps/rejected": -273.12286376953125, + "loss": 1.0422, + "margin_dpo/margin_mean": 85.77719116210938, + "margin_dpo/margin_std": 76.35650634765625, + "step": 515 + }, + { + "epoch": 0.780045351473923, + "grad_norm": 17.709348678588867, + "learning_rate": 7.090144991188568e-08, + "logits/chosen": 0.980697751045227, + "logits/rejected": 0.9438163042068481, + "logps/chosen": -185.6435546875, + "logps/ref_chosen": -56.46565246582031, + "logps/ref_rejected": -69.74240112304688, + "logps/rejected": -303.88299560546875, + "loss": 1.0754, + "margin_dpo/margin_mean": 104.96269989013672, + "margin_dpo/margin_std": 115.3629150390625, + "step": 516 + }, + { + "epoch": 0.781557067271353, + "grad_norm": 19.174524307250977, + "learning_rate": 6.998145243993284e-08, + "logits/chosen": 1.0660595893859863, + "logits/rejected": 1.0764927864074707, + "logps/chosen": -227.71185302734375, + "logps/ref_chosen": -54.366859436035156, + "logps/ref_rejected": -43.79294204711914, + "logps/rejected": -237.5216064453125, + "loss": 1.1571, + "margin_dpo/margin_mean": 20.383655548095703, + "margin_dpo/margin_std": 77.95108032226562, + "step": 517 + }, + { + "epoch": 0.783068783068783, + "grad_norm": 17.056671142578125, + "learning_rate": 6.906649047373245e-08, + "logits/chosen": 0.927460789680481, + "logits/rejected": 0.9620521068572998, + "logps/chosen": -204.52810668945312, + "logps/ref_chosen": -62.31662368774414, + "logps/ref_rejected": -56.51953887939453, + "logps/rejected": -246.00726318359375, + "loss": 1.0561, + "margin_dpo/margin_mean": 47.2762451171875, + "margin_dpo/margin_std": 91.59005737304688, + "step": 518 + }, + { + "epoch": 0.7845804988662132, + "grad_norm": 21.632381439208984, + "learning_rate": 6.815658960673781e-08, + "logits/chosen": 0.9049865007400513, + "logits/rejected": 0.8612052202224731, + "logps/chosen": -216.9791717529297, + "logps/ref_chosen": -56.87085723876953, + "logps/ref_rejected": -81.93241882324219, + "logps/rejected": -281.4555969238281, + "loss": 1.3521, + "margin_dpo/margin_mean": 39.41484069824219, + "margin_dpo/margin_std": 104.36680603027344, + "step": 519 + }, + { + "epoch": 0.7860922146636432, + "grad_norm": 17.4406795501709, + "learning_rate": 6.725177529083209e-08, + "logits/chosen": 1.0133739709854126, + "logits/rejected": 0.9440522789955139, + "logps/chosen": -191.7762908935547, + "logps/ref_chosen": -57.21955108642578, + "logps/ref_rejected": -66.96128845214844, + "logps/rejected": -295.70098876953125, + "loss": 1.0819, + "margin_dpo/margin_mean": 94.18293762207031, + "margin_dpo/margin_std": 142.52069091796875, + "step": 520 + }, + { + "epoch": 0.7876039304610734, + "grad_norm": 15.527779579162598, + "learning_rate": 6.63520728356167e-08, + "logits/chosen": 0.6215152740478516, + "logits/rejected": 0.6250849962234497, + "logps/chosen": -193.13436889648438, + "logps/ref_chosen": -82.06413269042969, + "logps/ref_rejected": -93.37128448486328, + "logps/rejected": -282.43743896484375, + "loss": 0.9091, + "margin_dpo/margin_mean": 77.99594116210938, + "margin_dpo/margin_std": 60.643978118896484, + "step": 521 + }, + { + "epoch": 0.7891156462585034, + "grad_norm": 17.787490844726562, + "learning_rate": 6.545750740770336e-08, + "logits/chosen": 0.9250505566596985, + "logits/rejected": 0.8676795959472656, + "logps/chosen": -171.79269409179688, + "logps/ref_chosen": -36.52648162841797, + "logps/ref_rejected": -69.4611587524414, + "logps/rejected": -259.8004455566406, + "loss": 1.2173, + "margin_dpo/margin_mean": 55.07307434082031, + "margin_dpo/margin_std": 102.25482940673828, + "step": 522 + }, + { + "epoch": 0.7906273620559335, + "grad_norm": 19.3404483795166, + "learning_rate": 6.456810403001012e-08, + "logits/chosen": 1.0570783615112305, + "logits/rejected": 0.8110660910606384, + "logps/chosen": -182.0962371826172, + "logps/ref_chosen": -43.33647918701172, + "logps/ref_rejected": -82.51911926269531, + "logps/rejected": -322.3846130371094, + "loss": 1.0791, + "margin_dpo/margin_mean": 101.10575103759766, + "margin_dpo/margin_std": 114.18901062011719, + "step": 523 + }, + { + "epoch": 0.7921390778533636, + "grad_norm": 18.590288162231445, + "learning_rate": 6.368388758106134e-08, + "logits/chosen": 0.7667090892791748, + "logits/rejected": 0.7514083385467529, + "logps/chosen": -188.31198120117188, + "logps/ref_chosen": -71.81331634521484, + "logps/ref_rejected": -80.51419067382812, + "logps/rejected": -263.0672302246094, + "loss": 1.0755, + "margin_dpo/margin_mean": 66.05437469482422, + "margin_dpo/margin_std": 116.67527770996094, + "step": 524 + }, + { + "epoch": 0.7936507936507936, + "grad_norm": 19.926000595092773, + "learning_rate": 6.280488279429185e-08, + "logits/chosen": 0.6996693015098572, + "logits/rejected": 0.661113977432251, + "logps/chosen": -237.54904174804688, + "logps/ref_chosen": -86.49774169921875, + "logps/ref_rejected": -98.95057678222656, + "logps/rejected": -275.597412109375, + "loss": 1.1514, + "margin_dpo/margin_mean": 25.595535278320312, + "margin_dpo/margin_std": 67.40962219238281, + "step": 525 + }, + { + "epoch": 0.7951625094482238, + "grad_norm": 18.31951332092285, + "learning_rate": 6.193111425735515e-08, + "logits/chosen": 0.8428486585617065, + "logits/rejected": 0.7279390096664429, + "logps/chosen": -215.59396362304688, + "logps/ref_chosen": -71.5922622680664, + "logps/ref_rejected": -89.43500518798828, + "logps/rejected": -250.84075927734375, + "loss": 1.1416, + "margin_dpo/margin_mean": 17.404056549072266, + "margin_dpo/margin_std": 81.23014831542969, + "step": 526 + }, + { + "epoch": 0.7966742252456538, + "grad_norm": 21.556499481201172, + "learning_rate": 6.106260641143546e-08, + "logits/chosen": 1.0022237300872803, + "logits/rejected": 0.8509221076965332, + "logps/chosen": -152.88330078125, + "logps/ref_chosen": -39.413856506347656, + "logps/ref_rejected": -96.52778625488281, + "logps/rejected": -277.23040771484375, + "loss": 1.2379, + "margin_dpo/margin_mean": 67.23316192626953, + "margin_dpo/margin_std": 81.01995086669922, + "step": 527 + }, + { + "epoch": 0.7981859410430839, + "grad_norm": 22.758813858032227, + "learning_rate": 6.019938355056422e-08, + "logits/chosen": 0.6834633350372314, + "logits/rejected": 0.8445563316345215, + "logps/chosen": -205.08164978027344, + "logps/ref_chosen": -77.37605285644531, + "logps/ref_rejected": -46.57087707519531, + "logps/rejected": -181.67904663085938, + "loss": 1.249, + "margin_dpo/margin_mean": 7.4025726318359375, + "margin_dpo/margin_std": 108.43580627441406, + "step": 528 + }, + { + "epoch": 0.799697656840514, + "grad_norm": 14.703264236450195, + "learning_rate": 5.934146982094049e-08, + "logits/chosen": 0.8407646417617798, + "logits/rejected": 0.8265160322189331, + "logps/chosen": -189.27963256835938, + "logps/ref_chosen": -58.75701141357422, + "logps/ref_rejected": -71.7961196899414, + "logps/rejected": -265.8705749511719, + "loss": 0.8373, + "margin_dpo/margin_mean": 63.551815032958984, + "margin_dpo/margin_std": 80.8453598022461, + "step": 529 + }, + { + "epoch": 0.8012093726379441, + "grad_norm": 16.264793395996094, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": 1.0589945316314697, + "logits/rejected": 0.912827730178833, + "logps/chosen": -172.25611877441406, + "logps/ref_chosen": -45.41581726074219, + "logps/ref_rejected": -80.25865173339844, + "logps/rejected": -290.4316711425781, + "loss": 1.1008, + "margin_dpo/margin_mean": 83.33270263671875, + "margin_dpo/margin_std": 107.66976165771484, + "step": 530 + }, + { + "epoch": 0.8027210884353742, + "grad_norm": 18.56505584716797, + "learning_rate": 5.7641665597021435e-08, + "logits/chosen": 1.0082396268844604, + "logits/rejected": 0.9188197255134583, + "logps/chosen": -170.84205627441406, + "logps/ref_chosen": -49.827571868896484, + "logps/ref_rejected": -87.68862915039062, + "logps/rejected": -295.68585205078125, + "loss": 1.1211, + "margin_dpo/margin_mean": 86.98274993896484, + "margin_dpo/margin_std": 96.60000610351562, + "step": 531 + }, + { + "epoch": 0.8042328042328042, + "grad_norm": 18.886518478393555, + "learning_rate": 5.679982264990424e-08, + "logits/chosen": 0.8886632919311523, + "logits/rejected": 0.7267423868179321, + "logps/chosen": -214.15121459960938, + "logps/ref_chosen": -59.289215087890625, + "logps/ref_rejected": -88.06941223144531, + "logps/rejected": -338.8141174316406, + "loss": 1.069, + "margin_dpo/margin_mean": 95.8827133178711, + "margin_dpo/margin_std": 99.42604064941406, + "step": 532 + }, + { + "epoch": 0.8057445200302343, + "grad_norm": 16.084077835083008, + "learning_rate": 5.596338392706076e-08, + "logits/chosen": 1.04569411277771, + "logits/rejected": 1.0000990629196167, + "logps/chosen": -164.958740234375, + "logps/ref_chosen": -57.924049377441406, + "logps/ref_rejected": -70.69439697265625, + "logps/rejected": -268.8208312988281, + "loss": 0.9852, + "margin_dpo/margin_mean": 91.09176635742188, + "margin_dpo/margin_std": 97.24915313720703, + "step": 533 + }, + { + "epoch": 0.8072562358276644, + "grad_norm": 18.018505096435547, + "learning_rate": 5.513237282548033e-08, + "logits/chosen": 0.8638824820518494, + "logits/rejected": 0.6540743708610535, + "logps/chosen": -193.56890869140625, + "logps/ref_chosen": -59.63468551635742, + "logps/ref_rejected": -109.55838012695312, + "logps/rejected": -294.1387634277344, + "loss": 1.0613, + "margin_dpo/margin_mean": 50.64615249633789, + "margin_dpo/margin_std": 116.63580322265625, + "step": 534 + }, + { + "epoch": 0.8087679516250945, + "grad_norm": 19.90399932861328, + "learning_rate": 5.430681259032957e-08, + "logits/chosen": 0.7901930212974548, + "logits/rejected": 0.7147120237350464, + "logps/chosen": -205.218994140625, + "logps/ref_chosen": -45.231201171875, + "logps/ref_rejected": -70.03333282470703, + "logps/rejected": -260.12701416015625, + "loss": 1.1115, + "margin_dpo/margin_mean": 30.105876922607422, + "margin_dpo/margin_std": 91.64392852783203, + "step": 535 + }, + { + "epoch": 0.8102796674225246, + "grad_norm": 12.067679405212402, + "learning_rate": 5.3486726314303175e-08, + "logits/chosen": 0.9899567365646362, + "logits/rejected": 0.9399088621139526, + "logps/chosen": -187.06640625, + "logps/ref_chosen": -61.66278839111328, + "logps/ref_rejected": -78.1522445678711, + "logps/rejected": -277.64288330078125, + "loss": 0.8678, + "margin_dpo/margin_mean": 74.0870361328125, + "margin_dpo/margin_std": 70.92481994628906, + "step": 536 + }, + { + "epoch": 0.8117913832199547, + "grad_norm": 18.532299041748047, + "learning_rate": 5.267213693697695e-08, + "logits/chosen": 0.9581376314163208, + "logits/rejected": 0.8182666301727295, + "logps/chosen": -162.33416748046875, + "logps/ref_chosen": -39.777610778808594, + "logps/ref_rejected": -122.8561782836914, + "logps/rejected": -347.7552490234375, + "loss": 1.0719, + "margin_dpo/margin_mean": 102.342529296875, + "margin_dpo/margin_std": 55.462013244628906, + "step": 537 + }, + { + "epoch": 0.8133030990173847, + "grad_norm": 15.579018592834473, + "learning_rate": 5.1863067244167144e-08, + "logits/chosen": 1.0169093608856201, + "logits/rejected": 1.08309006690979, + "logps/chosen": -190.21852111816406, + "logps/ref_chosen": -53.97972106933594, + "logps/ref_rejected": -40.60531997680664, + "logps/rejected": -223.557373046875, + "loss": 0.9732, + "margin_dpo/margin_mean": 46.71326446533203, + "margin_dpo/margin_std": 72.07386779785156, + "step": 538 + }, + { + "epoch": 0.8148148148148148, + "grad_norm": 18.707054138183594, + "learning_rate": 5.105953986729195e-08, + "logits/chosen": 0.7336928844451904, + "logits/rejected": 0.6928164958953857, + "logps/chosen": -191.83001708984375, + "logps/ref_chosen": -75.5906753540039, + "logps/ref_rejected": -90.88420104980469, + "logps/rejected": -264.90667724609375, + "loss": 1.0761, + "margin_dpo/margin_mean": 57.78313064575195, + "margin_dpo/margin_std": 57.98158264160156, + "step": 539 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 17.138023376464844, + "learning_rate": 5.026157728273966e-08, + "logits/chosen": 1.0368293523788452, + "logits/rejected": 0.9761728048324585, + "logps/chosen": -202.86624145507812, + "logps/ref_chosen": -53.29787063598633, + "logps/ref_rejected": -80.79837799072266, + "logps/rejected": -307.9012451171875, + "loss": 0.9363, + "margin_dpo/margin_mean": 77.53450012207031, + "margin_dpo/margin_std": 124.25914764404297, + "step": 540 + }, + { + "epoch": 0.817838246409675, + "grad_norm": 18.846561431884766, + "learning_rate": 4.9469201811239035e-08, + "logits/chosen": 0.8673323392868042, + "logits/rejected": 1.0076346397399902, + "logps/chosen": -233.70118713378906, + "logps/ref_chosen": -89.37522888183594, + "logps/ref_rejected": -50.772430419921875, + "logps/rejected": -251.73175048828125, + "loss": 0.9834, + "margin_dpo/margin_mean": 56.633365631103516, + "margin_dpo/margin_std": 85.56578063964844, + "step": 541 + }, + { + "epoch": 0.8193499622071051, + "grad_norm": 13.740031242370605, + "learning_rate": 4.868243561723534e-08, + "logits/chosen": 0.8200675845146179, + "logits/rejected": 0.7452304363250732, + "logps/chosen": -162.4728240966797, + "logps/ref_chosen": -53.12909698486328, + "logps/ref_rejected": -85.17762756347656, + "logps/rejected": -272.97418212890625, + "loss": 0.9604, + "margin_dpo/margin_mean": 78.45283508300781, + "margin_dpo/margin_std": 69.77384948730469, + "step": 542 + }, + { + "epoch": 0.8208616780045351, + "grad_norm": 15.152771949768066, + "learning_rate": 4.790130070827028e-08, + "logits/chosen": 0.8890964984893799, + "logits/rejected": 0.8061411380767822, + "logps/chosen": -171.46209716796875, + "logps/ref_chosen": -49.52074432373047, + "logps/ref_rejected": -89.69110870361328, + "logps/rejected": -296.0673828125, + "loss": 0.9858, + "margin_dpo/margin_mean": 84.43494415283203, + "margin_dpo/margin_std": 86.14839172363281, + "step": 543 + }, + { + "epoch": 0.8223733938019653, + "grad_norm": 18.41205596923828, + "learning_rate": 4.7125818934366454e-08, + "logits/chosen": 0.8170167207717896, + "logits/rejected": 0.8432954549789429, + "logps/chosen": -270.7913818359375, + "logps/ref_chosen": -79.90715026855469, + "logps/ref_rejected": -65.11402130126953, + "logps/rejected": -296.67626953125, + "loss": 0.9642, + "margin_dpo/margin_mean": 40.67804718017578, + "margin_dpo/margin_std": 80.78584289550781, + "step": 544 + }, + { + "epoch": 0.8238851095993953, + "grad_norm": 20.38388442993164, + "learning_rate": 4.635601198741607e-08, + "logits/chosen": 0.9498938322067261, + "logits/rejected": 0.8978836536407471, + "logps/chosen": -191.94818115234375, + "logps/ref_chosen": -49.8741455078125, + "logps/ref_rejected": -74.81036376953125, + "logps/rejected": -259.6213073730469, + "loss": 1.1264, + "margin_dpo/margin_mean": 42.736907958984375, + "margin_dpo/margin_std": 79.07319641113281, + "step": 545 + }, + { + "epoch": 0.8253968253968254, + "grad_norm": 19.26015853881836, + "learning_rate": 4.559190140057428e-08, + "logits/chosen": 0.9439926743507385, + "logits/rejected": 0.9385854601860046, + "logps/chosen": -254.52891540527344, + "logps/ref_chosen": -83.20497131347656, + "logps/ref_rejected": -79.42691040039062, + "logps/rejected": -289.6729736328125, + "loss": 1.0739, + "margin_dpo/margin_mean": 38.9221305847168, + "margin_dpo/margin_std": 98.08988952636719, + "step": 546 + }, + { + "epoch": 0.8269085411942555, + "grad_norm": 16.925718307495117, + "learning_rate": 4.483350854765672e-08, + "logits/chosen": 0.9709137678146362, + "logits/rejected": 0.7829012870788574, + "logps/chosen": -161.71536254882812, + "logps/ref_chosen": -30.964988708496094, + "logps/ref_rejected": -70.52467346191406, + "logps/rejected": -272.62640380859375, + "loss": 0.9703, + "margin_dpo/margin_mean": 71.35137939453125, + "margin_dpo/margin_std": 87.27641296386719, + "step": 547 + }, + { + "epoch": 0.8284202569916855, + "grad_norm": 20.741260528564453, + "learning_rate": 4.4080854642541826e-08, + "logits/chosen": 0.7766395211219788, + "logits/rejected": 0.7720953822135925, + "logps/chosen": -190.9144287109375, + "logps/ref_chosen": -63.495338439941406, + "logps/ref_rejected": -73.84056091308594, + "logps/rejected": -251.48435974121094, + "loss": 1.1606, + "margin_dpo/margin_mean": 50.224700927734375, + "margin_dpo/margin_std": 94.93733215332031, + "step": 548 + }, + { + "epoch": 0.8299319727891157, + "grad_norm": 27.655611038208008, + "learning_rate": 4.333396073857723e-08, + "logits/chosen": 0.9606146812438965, + "logits/rejected": 0.9492313861846924, + "logps/chosen": -232.9254608154297, + "logps/ref_chosen": -73.39987182617188, + "logps/ref_rejected": -76.37744140625, + "logps/rejected": -271.0856628417969, + "loss": 1.18, + "margin_dpo/margin_mean": 35.182621002197266, + "margin_dpo/margin_std": 107.7386703491211, + "step": 549 + }, + { + "epoch": 0.8314436885865457, + "grad_norm": 17.47970199584961, + "learning_rate": 4.259284772799099e-08, + "logits/chosen": 1.0232315063476562, + "logits/rejected": 1.0318495035171509, + "logps/chosen": -138.86331176757812, + "logps/ref_chosen": -41.076881408691406, + "logps/ref_rejected": -47.863609313964844, + "logps/rejected": -196.962158203125, + "loss": 1.1886, + "margin_dpo/margin_mean": 51.31211471557617, + "margin_dpo/margin_std": 87.56314086914062, + "step": 550 + }, + { + "epoch": 0.8329554043839759, + "grad_norm": 16.67303466796875, + "learning_rate": 4.1857536341307176e-08, + "logits/chosen": 1.0085614919662476, + "logits/rejected": 0.9332855939865112, + "logps/chosen": -187.7180633544922, + "logps/ref_chosen": -43.32201385498047, + "logps/ref_rejected": -80.82965850830078, + "logps/rejected": -318.88507080078125, + "loss": 1.0361, + "margin_dpo/margin_mean": 93.65936279296875, + "margin_dpo/margin_std": 136.92462158203125, + "step": 551 + }, + { + "epoch": 0.8344671201814059, + "grad_norm": 20.140682220458984, + "learning_rate": 4.112804714676593e-08, + "logits/chosen": 1.030794382095337, + "logits/rejected": 0.9187796115875244, + "logps/chosen": -200.61541748046875, + "logps/ref_chosen": -51.589683532714844, + "logps/ref_rejected": -92.36204528808594, + "logps/rejected": -306.091064453125, + "loss": 1.0185, + "margin_dpo/margin_mean": 64.70327758789062, + "margin_dpo/margin_std": 70.84175109863281, + "step": 552 + }, + { + "epoch": 0.8359788359788359, + "grad_norm": 18.41575050354004, + "learning_rate": 4.0404400549748144e-08, + "logits/chosen": 1.0242167711257935, + "logits/rejected": 0.8356890082359314, + "logps/chosen": -183.38101196289062, + "logps/ref_chosen": -46.85206604003906, + "logps/ref_rejected": -111.05335235595703, + "logps/rejected": -361.5396728515625, + "loss": 1.1174, + "margin_dpo/margin_mean": 113.9573745727539, + "margin_dpo/margin_std": 144.8773193359375, + "step": 553 + }, + { + "epoch": 0.8374905517762661, + "grad_norm": 18.322969436645508, + "learning_rate": 3.968661679220467e-08, + "logits/chosen": 0.8415111303329468, + "logits/rejected": 0.7810766696929932, + "logps/chosen": -176.80612182617188, + "logps/ref_chosen": -56.84330368041992, + "logps/ref_rejected": -71.64338684082031, + "logps/rejected": -253.65359497070312, + "loss": 1.0157, + "margin_dpo/margin_mean": 62.0473747253418, + "margin_dpo/margin_std": 105.61851501464844, + "step": 554 + }, + { + "epoch": 0.8390022675736961, + "grad_norm": 18.993671417236328, + "learning_rate": 3.89747159520904e-08, + "logits/chosen": 0.8262725472450256, + "logits/rejected": 0.7698884010314941, + "logps/chosen": -232.79461669921875, + "logps/ref_chosen": -88.44906616210938, + "logps/ref_rejected": -92.66055297851562, + "logps/rejected": -271.56744384765625, + "loss": 1.1031, + "margin_dpo/margin_mean": 34.56132125854492, + "margin_dpo/margin_std": 118.9439926147461, + "step": 555 + }, + { + "epoch": 0.8405139833711263, + "grad_norm": 17.27111053466797, + "learning_rate": 3.826871794280192e-08, + "logits/chosen": 0.9412505030632019, + "logits/rejected": 0.9116497039794922, + "logps/chosen": -217.48727416992188, + "logps/ref_chosen": -56.714725494384766, + "logps/ref_rejected": -64.49860382080078, + "logps/rejected": -274.18817138671875, + "loss": 1.1464, + "margin_dpo/margin_mean": 48.91699981689453, + "margin_dpo/margin_std": 71.00090026855469, + "step": 556 + }, + { + "epoch": 0.8420256991685563, + "grad_norm": 16.771560668945312, + "learning_rate": 3.756864251262143e-08, + "logits/chosen": 0.9594881534576416, + "logits/rejected": 0.9018399715423584, + "logps/chosen": -157.4939422607422, + "logps/ref_chosen": -35.662818908691406, + "logps/ref_rejected": -49.78130340576172, + "logps/rejected": -229.19021606445312, + "loss": 0.9731, + "margin_dpo/margin_mean": 57.57780456542969, + "margin_dpo/margin_std": 78.69767761230469, + "step": 557 + }, + { + "epoch": 0.8435374149659864, + "grad_norm": 15.087602615356445, + "learning_rate": 3.687450924416341e-08, + "logits/chosen": 1.0110355615615845, + "logits/rejected": 0.9612963199615479, + "logps/chosen": -196.357666015625, + "logps/ref_chosen": -67.20350646972656, + "logps/ref_rejected": -90.46063232421875, + "logps/rejected": -308.90771484375, + "loss": 0.9023, + "margin_dpo/margin_mean": 89.29290771484375, + "margin_dpo/margin_std": 79.98139953613281, + "step": 558 + }, + { + "epoch": 0.8450491307634165, + "grad_norm": 15.841297149658203, + "learning_rate": 3.6186337553827743e-08, + "logits/chosen": 0.8561170101165771, + "logits/rejected": 0.7312831878662109, + "logps/chosen": -225.37515258789062, + "logps/ref_chosen": -82.38276672363281, + "logps/ref_rejected": -148.32070922851562, + "logps/rejected": -394.68408203125, + "loss": 1.0114, + "margin_dpo/margin_mean": 103.37101745605469, + "margin_dpo/margin_std": 142.59449768066406, + "step": 559 + }, + { + "epoch": 0.8465608465608465, + "grad_norm": 18.28428077697754, + "learning_rate": 3.550414669125573e-08, + "logits/chosen": 0.8422687649726868, + "logits/rejected": 0.8341982364654541, + "logps/chosen": -237.72918701171875, + "logps/ref_chosen": -87.37367248535156, + "logps/ref_rejected": -98.05863952636719, + "logps/rejected": -306.2855529785156, + "loss": 1.0279, + "margin_dpo/margin_mean": 57.87139892578125, + "margin_dpo/margin_std": 98.06793212890625, + "step": 560 + }, + { + "epoch": 0.8480725623582767, + "grad_norm": 15.278297424316406, + "learning_rate": 3.482795573879241e-08, + "logits/chosen": 0.9140326976776123, + "logits/rejected": 0.8790804743766785, + "logps/chosen": -175.6079559326172, + "logps/ref_chosen": -46.72880935668945, + "logps/ref_rejected": -72.11878204345703, + "logps/rejected": -249.57186889648438, + "loss": 1.0484, + "margin_dpo/margin_mean": 48.573936462402344, + "margin_dpo/margin_std": 56.30522155761719, + "step": 561 + }, + { + "epoch": 0.8495842781557067, + "grad_norm": 17.868244171142578, + "learning_rate": 3.415778361095226e-08, + "logits/chosen": 0.9695894718170166, + "logits/rejected": 0.8319031000137329, + "logps/chosen": -219.78314208984375, + "logps/ref_chosen": -73.16322326660156, + "logps/ref_rejected": -130.28079223632812, + "logps/rejected": -364.35076904296875, + "loss": 0.9357, + "margin_dpo/margin_mean": 87.45001220703125, + "margin_dpo/margin_std": 103.97313690185547, + "step": 562 + }, + { + "epoch": 0.8510959939531368, + "grad_norm": 20.500097274780273, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": 1.082218885421753, + "logits/rejected": 1.0179955959320068, + "logps/chosen": -173.17086791992188, + "logps/ref_chosen": -52.58354187011719, + "logps/ref_rejected": -81.23396301269531, + "logps/rejected": -261.9834899902344, + "loss": 1.0687, + "margin_dpo/margin_mean": 60.16220474243164, + "margin_dpo/margin_std": 83.78437805175781, + "step": 563 + }, + { + "epoch": 0.8526077097505669, + "grad_norm": 20.62779426574707, + "learning_rate": 3.283557064487785e-08, + "logits/chosen": 0.9116697311401367, + "logits/rejected": 0.9356800317764282, + "logps/chosen": -168.27835083007812, + "logps/ref_chosen": -54.56080627441406, + "logps/ref_rejected": -52.1234245300293, + "logps/rejected": -250.03018188476562, + "loss": 0.9691, + "margin_dpo/margin_mean": 84.18922424316406, + "margin_dpo/margin_std": 93.12564849853516, + "step": 564 + }, + { + "epoch": 0.854119425547997, + "grad_norm": 17.084941864013672, + "learning_rate": 3.218356679178252e-08, + "logits/chosen": 0.9586303234100342, + "logits/rejected": 0.7509380578994751, + "logps/chosen": -274.4070129394531, + "logps/ref_chosen": -56.54259490966797, + "logps/ref_rejected": -125.5247573852539, + "logps/rejected": -408.77130126953125, + "loss": 0.9844, + "margin_dpo/margin_mean": 65.38215637207031, + "margin_dpo/margin_std": 83.48878479003906, + "step": 565 + }, + { + "epoch": 0.8556311413454271, + "grad_norm": 17.82448959350586, + "learning_rate": 3.1537655732553764e-08, + "logits/chosen": 0.9717875719070435, + "logits/rejected": 0.9099733233451843, + "logps/chosen": -195.46827697753906, + "logps/ref_chosen": -66.43058776855469, + "logps/ref_rejected": -88.7685775756836, + "logps/rejected": -317.3566589355469, + "loss": 1.0874, + "margin_dpo/margin_mean": 99.5504150390625, + "margin_dpo/margin_std": 77.15394592285156, + "step": 566 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 15.454371452331543, + "learning_rate": 3.089785553471233e-08, + "logits/chosen": 0.8322715759277344, + "logits/rejected": 0.8646911978721619, + "logps/chosen": -187.09246826171875, + "logps/ref_chosen": -61.010929107666016, + "logps/ref_rejected": -59.36076736450195, + "logps/rejected": -264.74932861328125, + "loss": 0.9967, + "margin_dpo/margin_mean": 79.30699920654297, + "margin_dpo/margin_std": 74.8924789428711, + "step": 567 + }, + { + "epoch": 0.8586545729402872, + "grad_norm": 16.217979431152344, + "learning_rate": 3.026418409484513e-08, + "logits/chosen": 1.0041697025299072, + "logits/rejected": 0.9253759384155273, + "logps/chosen": -160.50274658203125, + "logps/ref_chosen": -39.93986892700195, + "logps/ref_rejected": -80.44910430908203, + "logps/rejected": -245.42349243164062, + "loss": 0.9188, + "margin_dpo/margin_mean": 44.4115104675293, + "margin_dpo/margin_std": 74.29348754882812, + "step": 568 + }, + { + "epoch": 0.8601662887377173, + "grad_norm": 20.45829963684082, + "learning_rate": 2.963665913810451e-08, + "logits/chosen": 0.9038550853729248, + "logits/rejected": 0.9029619693756104, + "logps/chosen": -228.15231323242188, + "logps/ref_chosen": -81.28688049316406, + "logps/ref_rejected": -83.6900405883789, + "logps/rejected": -316.5882873535156, + "loss": 1.1757, + "margin_dpo/margin_mean": 86.03282165527344, + "margin_dpo/margin_std": 147.73619079589844, + "step": 569 + }, + { + "epoch": 0.8616780045351474, + "grad_norm": 17.267189025878906, + "learning_rate": 2.9015298217712453e-08, + "logits/chosen": 0.9757269620895386, + "logits/rejected": 0.9647901058197021, + "logps/chosen": -202.49285888671875, + "logps/ref_chosen": -53.46790313720703, + "logps/ref_rejected": -51.320770263671875, + "logps/rejected": -260.1011962890625, + "loss": 0.9055, + "margin_dpo/margin_mean": 59.75544738769531, + "margin_dpo/margin_std": 78.49115753173828, + "step": 570 + }, + { + "epoch": 0.8631897203325775, + "grad_norm": 19.252838134765625, + "learning_rate": 2.840011871446962e-08, + "logits/chosen": 0.9464564323425293, + "logits/rejected": 0.8572288751602173, + "logps/chosen": -164.54974365234375, + "logps/ref_chosen": -45.11099624633789, + "logps/ref_rejected": -71.55215454101562, + "logps/rejected": -285.15167236328125, + "loss": 1.1555, + "margin_dpo/margin_mean": 94.1607666015625, + "margin_dpo/margin_std": 116.37611389160156, + "step": 571 + }, + { + "epoch": 0.8647014361300076, + "grad_norm": 22.696313858032227, + "learning_rate": 2.7791137836269158e-08, + "logits/chosen": 0.9143853187561035, + "logits/rejected": 0.863966703414917, + "logps/chosen": -178.92416381835938, + "logps/ref_chosen": -50.03799819946289, + "logps/ref_rejected": -69.7855224609375, + "logps/rejected": -282.4919738769531, + "loss": 1.0403, + "margin_dpo/margin_mean": 83.82028198242188, + "margin_dpo/margin_std": 97.30913543701172, + "step": 572 + }, + { + "epoch": 0.8662131519274376, + "grad_norm": 17.95223617553711, + "learning_rate": 2.718837261761528e-08, + "logits/chosen": 0.9436647891998291, + "logits/rejected": 0.8891823291778564, + "logps/chosen": -223.79197692871094, + "logps/ref_chosen": -65.06491088867188, + "logps/ref_rejected": -74.16371154785156, + "logps/rejected": -237.40768432617188, + "loss": 1.1207, + "margin_dpo/margin_mean": 4.516890525817871, + "margin_dpo/margin_std": 93.70068359375, + "step": 573 + }, + { + "epoch": 0.8677248677248677, + "grad_norm": 13.765616416931152, + "learning_rate": 2.659183991914696e-08, + "logits/chosen": 0.9557688236236572, + "logits/rejected": 0.9775044918060303, + "logps/chosen": -214.97642517089844, + "logps/ref_chosen": -50.88386917114258, + "logps/ref_rejected": -51.98750686645508, + "logps/rejected": -228.66259765625, + "loss": 0.8728, + "margin_dpo/margin_mean": 12.582534790039062, + "margin_dpo/margin_std": 53.39247131347656, + "step": 574 + }, + { + "epoch": 0.8692365835222978, + "grad_norm": 17.010929107666016, + "learning_rate": 2.600155642716606e-08, + "logits/chosen": 0.9337095022201538, + "logits/rejected": 0.786598801612854, + "logps/chosen": -208.71426391601562, + "logps/ref_chosen": -61.41114044189453, + "logps/ref_rejected": -112.07469177246094, + "logps/rejected": -286.92864990234375, + "loss": 1.1869, + "margin_dpo/margin_mean": 27.55083465576172, + "margin_dpo/margin_std": 119.59794616699219, + "step": 575 + }, + { + "epoch": 0.8707482993197279, + "grad_norm": 15.698728561401367, + "learning_rate": 2.5417538653170754e-08, + "logits/chosen": 1.0223554372787476, + "logits/rejected": 0.9235273599624634, + "logps/chosen": -183.23175048828125, + "logps/ref_chosen": -54.792724609375, + "logps/ref_rejected": -84.78044128417969, + "logps/rejected": -297.049072265625, + "loss": 0.8786, + "margin_dpo/margin_mean": 83.82960510253906, + "margin_dpo/margin_std": 106.40802001953125, + "step": 576 + }, + { + "epoch": 0.872260015117158, + "grad_norm": 17.254444122314453, + "learning_rate": 2.4839802933393607e-08, + "logits/chosen": 1.0162341594696045, + "logits/rejected": 0.9787265062332153, + "logps/chosen": -163.14552307128906, + "logps/ref_chosen": -44.14771270751953, + "logps/ref_rejected": -65.19963836669922, + "logps/rejected": -230.81610107421875, + "loss": 1.113, + "margin_dpo/margin_mean": 46.61864471435547, + "margin_dpo/margin_std": 70.7182846069336, + "step": 577 + }, + { + "epoch": 0.873771730914588, + "grad_norm": 17.691556930541992, + "learning_rate": 2.4268365428344733e-08, + "logits/chosen": 1.0092999935150146, + "logits/rejected": 0.9245076179504395, + "logps/chosen": -200.7638702392578, + "logps/ref_chosen": -47.827537536621094, + "logps/ref_rejected": -84.31831359863281, + "logps/rejected": -295.13720703125, + "loss": 1.1273, + "margin_dpo/margin_mean": 57.8825798034668, + "margin_dpo/margin_std": 108.26670837402344, + "step": 578 + }, + { + "epoch": 0.8752834467120182, + "grad_norm": 17.17439842224121, + "learning_rate": 2.3703242122359357e-08, + "logits/chosen": 0.9395872950553894, + "logits/rejected": 0.883934497833252, + "logps/chosen": -172.1771240234375, + "logps/ref_chosen": -39.918418884277344, + "logps/ref_rejected": -67.36483001708984, + "logps/rejected": -293.5935974121094, + "loss": 0.9021, + "margin_dpo/margin_mean": 93.97006225585938, + "margin_dpo/margin_std": 82.55453491210938, + "step": 579 + }, + { + "epoch": 0.8767951625094482, + "grad_norm": 16.5377197265625, + "learning_rate": 2.3144448823151392e-08, + "logits/chosen": 0.8914264440536499, + "logits/rejected": 0.8536027669906616, + "logps/chosen": -203.43321228027344, + "logps/ref_chosen": -57.42926025390625, + "logps/ref_rejected": -71.1881332397461, + "logps/rejected": -251.05111694335938, + "loss": 1.0703, + "margin_dpo/margin_mean": 33.859039306640625, + "margin_dpo/margin_std": 79.80448150634766, + "step": 580 + }, + { + "epoch": 0.8783068783068783, + "grad_norm": 16.735671997070312, + "learning_rate": 2.259200116137039e-08, + "logits/chosen": 0.9245070219039917, + "logits/rejected": 0.8710756301879883, + "logps/chosen": -207.47396850585938, + "logps/ref_chosen": -64.48869323730469, + "logps/ref_rejected": -80.07521057128906, + "logps/rejected": -284.458251953125, + "loss": 1.0719, + "margin_dpo/margin_mean": 61.39775085449219, + "margin_dpo/margin_std": 92.19239807128906, + "step": 581 + }, + { + "epoch": 0.8798185941043084, + "grad_norm": 21.038320541381836, + "learning_rate": 2.204591459016525e-08, + "logits/chosen": 0.9050915241241455, + "logits/rejected": 0.7998449802398682, + "logps/chosen": -180.35040283203125, + "logps/ref_chosen": -52.15564727783203, + "logps/ref_rejected": -81.67626190185547, + "logps/rejected": -324.54791259765625, + "loss": 1.106, + "margin_dpo/margin_mean": 114.67691802978516, + "margin_dpo/margin_std": 95.79690551757812, + "step": 582 + }, + { + "epoch": 0.8813303099017384, + "grad_norm": 21.99834442138672, + "learning_rate": 2.1506204384751064e-08, + "logits/chosen": 1.0146265029907227, + "logits/rejected": 0.9865950345993042, + "logps/chosen": -198.27951049804688, + "logps/ref_chosen": -53.600677490234375, + "logps/ref_rejected": -66.55783081054688, + "logps/rejected": -231.4248046875, + "loss": 1.05, + "margin_dpo/margin_mean": 20.188133239746094, + "margin_dpo/margin_std": 88.58358001708984, + "step": 583 + }, + { + "epoch": 0.8828420256991686, + "grad_norm": 18.144161224365234, + "learning_rate": 2.09728856419826e-08, + "logits/chosen": 0.9950936436653137, + "logits/rejected": 0.8881626129150391, + "logps/chosen": -145.82467651367188, + "logps/ref_chosen": -39.92349624633789, + "logps/ref_rejected": -85.95851135253906, + "logps/rejected": -260.9383544921875, + "loss": 1.0972, + "margin_dpo/margin_mean": 69.07866668701172, + "margin_dpo/margin_std": 94.79948425292969, + "step": 584 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 15.75501537322998, + "learning_rate": 2.044597327993153e-08, + "logits/chosen": 0.9527912139892578, + "logits/rejected": 0.7973681092262268, + "logps/chosen": -196.22564697265625, + "logps/ref_chosen": -54.76570129394531, + "logps/ref_rejected": -115.46517944335938, + "logps/rejected": -307.131591796875, + "loss": 1.1429, + "margin_dpo/margin_mean": 50.206451416015625, + "margin_dpo/margin_std": 92.417724609375, + "step": 585 + }, + { + "epoch": 0.8858654572940288, + "grad_norm": 18.22370147705078, + "learning_rate": 1.9925482037469187e-08, + "logits/chosen": 0.9899888038635254, + "logits/rejected": 1.0135765075683594, + "logps/chosen": -180.9400634765625, + "logps/ref_chosen": -62.72941207885742, + "logps/ref_rejected": -46.449256896972656, + "logps/rejected": -233.3268585205078, + "loss": 0.9577, + "margin_dpo/margin_mean": 68.66695404052734, + "margin_dpo/margin_std": 74.32232666015625, + "step": 586 + }, + { + "epoch": 0.8873771730914588, + "grad_norm": 21.38698387145996, + "learning_rate": 1.9411426473854687e-08, + "logits/chosen": 1.0005683898925781, + "logits/rejected": 0.954791784286499, + "logps/chosen": -218.3308868408203, + "logps/ref_chosen": -58.70615768432617, + "logps/ref_rejected": -70.26844024658203, + "logps/rejected": -261.1861267089844, + "loss": 1.0981, + "margin_dpo/margin_mean": 31.292953491210938, + "margin_dpo/margin_std": 83.6708984375, + "step": 587 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 19.388957977294922, + "learning_rate": 1.890382096832699e-08, + "logits/chosen": 0.8030673265457153, + "logits/rejected": 0.7511934041976929, + "logps/chosen": -196.69418334960938, + "logps/ref_chosen": -66.02383422851562, + "logps/ref_rejected": -92.533203125, + "logps/rejected": -316.143798828125, + "loss": 1.0657, + "margin_dpo/margin_mean": 92.94023895263672, + "margin_dpo/margin_std": 82.5245361328125, + "step": 588 + }, + { + "epoch": 0.890400604686319, + "grad_norm": 16.032787322998047, + "learning_rate": 1.840267971970344e-08, + "logits/chosen": 0.8890354633331299, + "logits/rejected": 0.8605284690856934, + "logps/chosen": -199.17193603515625, + "logps/ref_chosen": -52.7588005065918, + "logps/ref_rejected": -68.78264617919922, + "logps/rejected": -266.2153015136719, + "loss": 0.9575, + "margin_dpo/margin_mean": 51.01952362060547, + "margin_dpo/margin_std": 97.24200439453125, + "step": 589 + }, + { + "epoch": 0.891912320483749, + "grad_norm": 16.486291885375977, + "learning_rate": 1.7908016745981856e-08, + "logits/chosen": 0.9132862091064453, + "logits/rejected": 0.8946816921234131, + "logps/chosen": -198.87548828125, + "logps/ref_chosen": -53.84131622314453, + "logps/ref_rejected": -64.9970703125, + "logps/rejected": -240.9401092529297, + "loss": 0.9877, + "margin_dpo/margin_mean": 30.908885955810547, + "margin_dpo/margin_std": 70.15216827392578, + "step": 590 + }, + { + "epoch": 0.8934240362811792, + "grad_norm": 18.833675384521484, + "learning_rate": 1.7419845883949098e-08, + "logits/chosen": 0.9794512987136841, + "logits/rejected": 0.9539260268211365, + "logps/chosen": -170.65090942382812, + "logps/ref_chosen": -50.85618591308594, + "logps/ref_rejected": -61.08381271362305, + "logps/rejected": -261.77154541015625, + "loss": 0.9912, + "margin_dpo/margin_mean": 80.89300537109375, + "margin_dpo/margin_std": 80.72918701171875, + "step": 591 + }, + { + "epoch": 0.8949357520786092, + "grad_norm": 18.000661849975586, + "learning_rate": 1.6938180788793556e-08, + "logits/chosen": 1.0009410381317139, + "logits/rejected": 0.9112914800643921, + "logps/chosen": -176.25234985351562, + "logps/ref_chosen": -51.354652404785156, + "logps/ref_rejected": -67.10594177246094, + "logps/rejected": -271.6142578125, + "loss": 0.985, + "margin_dpo/margin_mean": 79.610595703125, + "margin_dpo/margin_std": 81.41339111328125, + "step": 592 + }, + { + "epoch": 0.8964474678760394, + "grad_norm": 18.60577392578125, + "learning_rate": 1.6463034933723336e-08, + "logits/chosen": 0.8909306526184082, + "logits/rejected": 0.8740547299385071, + "logps/chosen": -187.12136840820312, + "logps/ref_chosen": -61.935089111328125, + "logps/ref_rejected": -82.4512939453125, + "logps/rejected": -286.46282958984375, + "loss": 1.0216, + "margin_dpo/margin_mean": 78.82524871826172, + "margin_dpo/margin_std": 103.09696960449219, + "step": 593 + }, + { + "epoch": 0.8979591836734694, + "grad_norm": 18.425268173217773, + "learning_rate": 1.5994421609589385e-08, + "logits/chosen": 0.8772367835044861, + "logits/rejected": 0.8558773994445801, + "logps/chosen": -211.46441650390625, + "logps/ref_chosen": -63.053680419921875, + "logps/ref_rejected": -68.9009017944336, + "logps/rejected": -248.54051208496094, + "loss": 1.0588, + "margin_dpo/margin_mean": 31.228851318359375, + "margin_dpo/margin_std": 52.22967529296875, + "step": 594 + }, + { + "epoch": 0.8994708994708994, + "grad_norm": 14.107276916503906, + "learning_rate": 1.553235392451377e-08, + "logits/chosen": 1.0201103687286377, + "logits/rejected": 0.9065714478492737, + "logps/chosen": -164.76438903808594, + "logps/ref_chosen": -36.28746032714844, + "logps/ref_rejected": -76.62598419189453, + "logps/rejected": -292.2261962890625, + "loss": 0.9302, + "margin_dpo/margin_mean": 87.12327575683594, + "margin_dpo/margin_std": 96.01534271240234, + "step": 595 + }, + { + "epoch": 0.9009826152683296, + "grad_norm": 16.283180236816406, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": 0.8756155371665955, + "logits/rejected": 0.8407232761383057, + "logps/chosen": -196.04852294921875, + "logps/ref_chosen": -58.18864440917969, + "logps/ref_rejected": -69.7575454711914, + "logps/rejected": -244.40557861328125, + "loss": 1.2206, + "margin_dpo/margin_mean": 36.78814697265625, + "margin_dpo/margin_std": 108.71817016601562, + "step": 596 + }, + { + "epoch": 0.9024943310657596, + "grad_norm": 14.265636444091797, + "learning_rate": 1.4627906988186111e-08, + "logits/chosen": 0.9384099245071411, + "logits/rejected": 0.9882324934005737, + "logps/chosen": -195.47030639648438, + "logps/ref_chosen": -62.06855773925781, + "logps/ref_rejected": -62.94855880737305, + "logps/rejected": -221.61302185058594, + "loss": 1.0398, + "margin_dpo/margin_mean": 25.262706756591797, + "margin_dpo/margin_std": 77.90994262695312, + "step": 597 + }, + { + "epoch": 0.9040060468631897, + "grad_norm": 22.982269287109375, + "learning_rate": 1.4185553036259095e-08, + "logits/chosen": 0.8697057962417603, + "logits/rejected": 0.8449388742446899, + "logps/chosen": -206.47000122070312, + "logps/ref_chosen": -48.21617889404297, + "logps/ref_rejected": -68.27192687988281, + "logps/rejected": -257.6476745605469, + "loss": 1.2139, + "margin_dpo/margin_mean": 31.121919631958008, + "margin_dpo/margin_std": 93.97370147705078, + "step": 598 + }, + { + "epoch": 0.9055177626606198, + "grad_norm": 17.03192138671875, + "learning_rate": 1.3749795321332885e-08, + "logits/chosen": 0.8180956244468689, + "logits/rejected": 0.8315231800079346, + "logps/chosen": -224.57041931152344, + "logps/ref_chosen": -61.865814208984375, + "logps/ref_rejected": -62.11842346191406, + "logps/rejected": -258.51904296875, + "loss": 1.146, + "margin_dpo/margin_mean": 33.696022033691406, + "margin_dpo/margin_std": 111.64860534667969, + "step": 599 + }, + { + "epoch": 0.9070294784580499, + "grad_norm": 17.681379318237305, + "learning_rate": 1.3320646032487393e-08, + "logits/chosen": 0.9060215353965759, + "logits/rejected": 0.9525216817855835, + "logps/chosen": -173.75914001464844, + "logps/ref_chosen": -55.92002868652344, + "logps/ref_rejected": -53.79228210449219, + "logps/rejected": -239.77134704589844, + "loss": 1.1391, + "margin_dpo/margin_mean": 68.13994598388672, + "margin_dpo/margin_std": 82.62960815429688, + "step": 600 + }, + { + "epoch": 0.9070294784580499, + "eval_logits/chosen": 0.8960238695144653, + "eval_logits/rejected": 0.863480806350708, + "eval_logps/chosen": -211.5150146484375, + "eval_logps/ref_chosen": -75.30646514892578, + "eval_logps/ref_rejected": -77.75511932373047, + "eval_logps/rejected": -274.14215087890625, + "eval_loss": 0.5347773432731628, + "eval_margin_dpo/margin_mean": 60.178462982177734, + "eval_margin_dpo/margin_std": 94.62100982666016, + "eval_runtime": 37.4693, + "eval_samples_per_second": 61.464, + "eval_steps_per_second": 1.922, + "step": 600 + }, + { + "epoch": 0.90854119425548, + "grad_norm": 14.842012405395508, + "learning_rate": 1.2898117173950868e-08, + "logits/chosen": 0.8835675716400146, + "logits/rejected": 0.8951883912086487, + "logps/chosen": -201.0157470703125, + "logps/ref_chosen": -70.18791198730469, + "logps/ref_rejected": -71.40547180175781, + "logps/rejected": -291.8153381347656, + "loss": 0.9772, + "margin_dpo/margin_mean": 89.58202362060547, + "margin_dpo/margin_std": 125.6627197265625, + "step": 601 + }, + { + "epoch": 0.91005291005291, + "grad_norm": 18.32841682434082, + "learning_rate": 1.2482220564763667e-08, + "logits/chosen": 0.9220967292785645, + "logits/rejected": 0.8645837306976318, + "logps/chosen": -187.869873046875, + "logps/ref_chosen": -70.41340637207031, + "logps/ref_rejected": -97.12376403808594, + "logps/rejected": -304.54473876953125, + "loss": 0.989, + "margin_dpo/margin_mean": 89.96450805664062, + "margin_dpo/margin_std": 100.75538635253906, + "step": 602 + }, + { + "epoch": 0.9115646258503401, + "grad_norm": 19.300460815429688, + "learning_rate": 1.2072967838448051e-08, + "logits/chosen": 0.9347689747810364, + "logits/rejected": 0.8060243129730225, + "logps/chosen": -211.57089233398438, + "logps/ref_chosen": -62.89923858642578, + "logps/ref_rejected": -108.35671997070312, + "logps/rejected": -351.4485778808594, + "loss": 0.9984, + "margin_dpo/margin_mean": 94.42021179199219, + "margin_dpo/margin_std": 152.901123046875, + "step": 603 + }, + { + "epoch": 0.9130763416477702, + "grad_norm": 15.397801399230957, + "learning_rate": 1.1670370442682459e-08, + "logits/chosen": 0.9415032267570496, + "logits/rejected": 0.9144819974899292, + "logps/chosen": -188.33102416992188, + "logps/ref_chosen": -62.4847412109375, + "logps/ref_rejected": -62.371429443359375, + "logps/rejected": -235.138427734375, + "loss": 1.1185, + "margin_dpo/margin_mean": 46.92070770263672, + "margin_dpo/margin_std": 84.41587829589844, + "step": 604 + }, + { + "epoch": 0.9145880574452003, + "grad_norm": 18.700698852539062, + "learning_rate": 1.1274439638981532e-08, + "logits/chosen": 1.0574252605438232, + "logits/rejected": 0.938467264175415, + "logps/chosen": -192.36363220214844, + "logps/ref_chosen": -48.73389434814453, + "logps/ref_rejected": -83.2470703125, + "logps/rejected": -292.3554992675781, + "loss": 1.0445, + "margin_dpo/margin_mean": 65.47867584228516, + "margin_dpo/margin_std": 119.60535430908203, + "step": 605 + }, + { + "epoch": 0.9160997732426304, + "grad_norm": 17.30083465576172, + "learning_rate": 1.0885186502381016e-08, + "logits/chosen": 0.9336838722229004, + "logits/rejected": 0.846339225769043, + "logps/chosen": -190.03634643554688, + "logps/ref_chosen": -53.08481979370117, + "logps/ref_rejected": -80.11920166015625, + "logps/rejected": -295.1481018066406, + "loss": 0.989, + "margin_dpo/margin_mean": 78.07736206054688, + "margin_dpo/margin_std": 93.74771118164062, + "step": 606 + }, + { + "epoch": 0.9176114890400605, + "grad_norm": 16.944414138793945, + "learning_rate": 1.0502621921127774e-08, + "logits/chosen": 0.7050179243087769, + "logits/rejected": 0.8792567253112793, + "logps/chosen": -264.5372009277344, + "logps/ref_chosen": -97.28004455566406, + "logps/ref_rejected": -86.22888946533203, + "logps/rejected": -283.00103759765625, + "loss": 1.0318, + "margin_dpo/margin_mean": 29.514984130859375, + "margin_dpo/margin_std": 90.30196380615234, + "step": 607 + }, + { + "epoch": 0.9191232048374905, + "grad_norm": 18.15805435180664, + "learning_rate": 1.0126756596375685e-08, + "logits/chosen": 0.8514116406440735, + "logits/rejected": 0.8852680921554565, + "logps/chosen": -181.86837768554688, + "logps/ref_chosen": -66.10746765136719, + "logps/ref_rejected": -59.404441833496094, + "logps/rejected": -243.4190673828125, + "loss": 1.0596, + "margin_dpo/margin_mean": 68.25370788574219, + "margin_dpo/margin_std": 68.52870178222656, + "step": 608 + }, + { + "epoch": 0.9206349206349206, + "grad_norm": 14.3372163772583, + "learning_rate": 9.757601041885694e-09, + "logits/chosen": 1.0790916681289673, + "logits/rejected": 0.9650440216064453, + "logps/chosen": -173.4683380126953, + "logps/ref_chosen": -49.93000030517578, + "logps/ref_rejected": -98.36846923828125, + "logps/rejected": -298.0489501953125, + "loss": 0.9481, + "margin_dpo/margin_mean": 76.14215087890625, + "margin_dpo/margin_std": 91.03385925292969, + "step": 609 + }, + { + "epoch": 0.9221466364323507, + "grad_norm": 31.100370407104492, + "learning_rate": 9.395165583732379e-09, + "logits/chosen": 0.9039211869239807, + "logits/rejected": 0.9379135370254517, + "logps/chosen": -220.49267578125, + "logps/ref_chosen": -71.89997863769531, + "logps/ref_rejected": -89.20841979980469, + "logps/rejected": -285.78515625, + "loss": 1.1377, + "margin_dpo/margin_mean": 47.98402404785156, + "margin_dpo/margin_std": 88.36325073242188, + "step": 610 + }, + { + "epoch": 0.9236583522297808, + "grad_norm": 18.051315307617188, + "learning_rate": 9.03946036001449e-09, + "logits/chosen": 0.9056074619293213, + "logits/rejected": 0.9064819812774658, + "logps/chosen": -214.71875, + "logps/ref_chosen": -67.77259826660156, + "logps/ref_rejected": -66.90919494628906, + "logps/rejected": -263.32421875, + "loss": 1.1144, + "margin_dpo/margin_mean": 49.4688606262207, + "margin_dpo/margin_std": 66.27508544921875, + "step": 611 + }, + { + "epoch": 0.9251700680272109, + "grad_norm": 15.253009796142578, + "learning_rate": 8.690495320571839e-09, + "logits/chosen": 0.6356790065765381, + "logits/rejected": 0.6141253113746643, + "logps/chosen": -254.52713012695312, + "logps/ref_chosen": -98.77779388427734, + "logps/ref_rejected": -129.40658569335938, + "logps/rejected": -327.4943542480469, + "loss": 0.9714, + "margin_dpo/margin_mean": 42.33841323852539, + "margin_dpo/margin_std": 53.188514709472656, + "step": 612 + }, + { + "epoch": 0.926681783824641, + "grad_norm": 22.37784767150879, + "learning_rate": 8.348280226706722e-09, + "logits/chosen": 0.9218869209289551, + "logits/rejected": 0.9058539867401123, + "logps/chosen": -176.6582794189453, + "logps/ref_chosen": -39.422340393066406, + "logps/ref_rejected": -48.479248046875, + "logps/rejected": -244.13925170898438, + "loss": 0.9836, + "margin_dpo/margin_mean": 58.42406463623047, + "margin_dpo/margin_std": 89.93438720703125, + "step": 613 + }, + { + "epoch": 0.9281934996220711, + "grad_norm": 24.711509704589844, + "learning_rate": 8.012824650910937e-09, + "logits/chosen": 1.0547163486480713, + "logits/rejected": 0.9173270463943481, + "logps/chosen": -167.89730834960938, + "logps/ref_chosen": -52.9261474609375, + "logps/ref_rejected": -68.24092102050781, + "logps/rejected": -296.45574951171875, + "loss": 1.0583, + "margin_dpo/margin_mean": 113.24365234375, + "margin_dpo/margin_std": 79.29691314697266, + "step": 614 + }, + { + "epoch": 0.9297052154195011, + "grad_norm": 25.62435531616211, + "learning_rate": 7.684137976598088e-09, + "logits/chosen": 0.9779610633850098, + "logits/rejected": 0.9523606896400452, + "logps/chosen": -217.04598999023438, + "logps/ref_chosen": -70.00674438476562, + "logps/ref_rejected": -101.93522644042969, + "logps/rejected": -298.4114074707031, + "loss": 1.0525, + "margin_dpo/margin_mean": 49.43693542480469, + "margin_dpo/margin_std": 92.1063003540039, + "step": 615 + }, + { + "epoch": 0.9312169312169312, + "grad_norm": 16.771865844726562, + "learning_rate": 7.36222939784098e-09, + "logits/chosen": 0.9289509057998657, + "logits/rejected": 0.8705403804779053, + "logps/chosen": -209.6519012451172, + "logps/ref_chosen": -66.25517272949219, + "logps/ref_rejected": -92.31936645507812, + "logps/rejected": -286.00390625, + "loss": 1.0395, + "margin_dpo/margin_mean": 50.28779220581055, + "margin_dpo/margin_std": 81.98731994628906, + "step": 616 + }, + { + "epoch": 0.9327286470143613, + "grad_norm": 19.335580825805664, + "learning_rate": 7.047107919114586e-09, + "logits/chosen": 0.9605733752250671, + "logits/rejected": 0.9251487255096436, + "logps/chosen": -212.73922729492188, + "logps/ref_chosen": -70.3993911743164, + "logps/ref_rejected": -90.81809997558594, + "logps/rejected": -292.81744384765625, + "loss": 1.0436, + "margin_dpo/margin_mean": 59.65951919555664, + "margin_dpo/margin_std": 60.16530227661133, + "step": 617 + }, + { + "epoch": 0.9342403628117913, + "grad_norm": 19.181501388549805, + "learning_rate": 6.738782355044048e-09, + "logits/chosen": 0.9526699781417847, + "logits/rejected": 0.8337723016738892, + "logps/chosen": -169.63429260253906, + "logps/ref_chosen": -62.809661865234375, + "logps/ref_rejected": -109.43612670898438, + "logps/rejected": -310.1464538574219, + "loss": 0.9829, + "margin_dpo/margin_mean": 93.88569641113281, + "margin_dpo/margin_std": 91.3293228149414, + "step": 618 + }, + { + "epoch": 0.9357520786092215, + "grad_norm": 16.765586853027344, + "learning_rate": 6.437261330158206e-09, + "logits/chosen": 0.9776492118835449, + "logits/rejected": 0.9457724094390869, + "logps/chosen": -158.1160888671875, + "logps/ref_chosen": -47.85075378417969, + "logps/ref_rejected": -72.59744262695312, + "logps/rejected": -258.400634765625, + "loss": 0.9783, + "margin_dpo/margin_mean": 75.53782653808594, + "margin_dpo/margin_std": 94.00806427001953, + "step": 619 + }, + { + "epoch": 0.9372637944066515, + "grad_norm": 19.020383834838867, + "learning_rate": 6.142553278648238e-09, + "logits/chosen": 0.8768226504325867, + "logits/rejected": 0.8743495345115662, + "logps/chosen": -224.13729858398438, + "logps/ref_chosen": -79.60867309570312, + "logps/ref_rejected": -86.96884155273438, + "logps/rejected": -288.10784912109375, + "loss": 1.1095, + "margin_dpo/margin_mean": 56.610382080078125, + "margin_dpo/margin_std": 97.45252990722656, + "step": 620 + }, + { + "epoch": 0.9387755102040817, + "grad_norm": 17.923816680908203, + "learning_rate": 5.854666444131934e-09, + "logits/chosen": 0.7942125797271729, + "logits/rejected": 0.8802160620689392, + "logps/chosen": -215.23654174804688, + "logps/ref_chosen": -68.8500747680664, + "logps/ref_rejected": -53.94578552246094, + "logps/rejected": -231.7372589111328, + "loss": 1.0581, + "margin_dpo/margin_mean": 31.405014038085938, + "margin_dpo/margin_std": 76.86921691894531, + "step": 621 + }, + { + "epoch": 0.9402872260015117, + "grad_norm": 20.861454010009766, + "learning_rate": 5.573608879422875e-09, + "logits/chosen": 0.8228497505187988, + "logits/rejected": 0.8221659660339355, + "logps/chosen": -211.20352172851562, + "logps/ref_chosen": -49.52813720703125, + "logps/ref_rejected": -56.0748291015625, + "logps/rejected": -251.8733673095703, + "loss": 1.0707, + "margin_dpo/margin_mean": 34.12314987182617, + "margin_dpo/margin_std": 85.17213439941406, + "step": 622 + }, + { + "epoch": 0.9417989417989417, + "grad_norm": 18.039180755615234, + "learning_rate": 5.299388446305342e-09, + "logits/chosen": 0.9442222118377686, + "logits/rejected": 0.8388766050338745, + "logps/chosen": -213.2373809814453, + "logps/ref_chosen": -51.98558807373047, + "logps/ref_rejected": -89.86474609375, + "logps/rejected": -364.82171630859375, + "loss": 0.9715, + "margin_dpo/margin_mean": 113.70519256591797, + "margin_dpo/margin_std": 154.77015686035156, + "step": 623 + }, + { + "epoch": 0.9433106575963719, + "grad_norm": 16.79059410095215, + "learning_rate": 5.03201281531429e-09, + "logits/chosen": 0.9908918142318726, + "logits/rejected": 0.9922984838485718, + "logps/chosen": -189.19586181640625, + "logps/ref_chosen": -61.39537811279297, + "logps/ref_rejected": -64.496826171875, + "logps/rejected": -265.69671630859375, + "loss": 0.9914, + "margin_dpo/margin_mean": 73.39938354492188, + "margin_dpo/margin_std": 75.53822326660156, + "step": 624 + }, + { + "epoch": 0.9448223733938019, + "grad_norm": 16.281400680541992, + "learning_rate": 4.7714894655209174e-09, + "logits/chosen": 1.096935510635376, + "logits/rejected": 0.9919509887695312, + "logps/chosen": -173.40103149414062, + "logps/ref_chosen": -49.35209274291992, + "logps/ref_rejected": -75.47923278808594, + "logps/rejected": -266.97930908203125, + "loss": 1.1236, + "margin_dpo/margin_mean": 67.45115661621094, + "margin_dpo/margin_std": 78.00559997558594, + "step": 625 + }, + { + "epoch": 0.9463340891912321, + "grad_norm": 16.243318557739258, + "learning_rate": 4.517825684323323e-09, + "logits/chosen": 1.0832741260528564, + "logits/rejected": 1.023711085319519, + "logps/chosen": -122.06949615478516, + "logps/ref_chosen": -32.55897521972656, + "logps/ref_rejected": -71.60407257080078, + "logps/rejected": -270.57061767578125, + "loss": 1.0152, + "margin_dpo/margin_mean": 109.45604705810547, + "margin_dpo/margin_std": 145.14871215820312, + "step": 626 + }, + { + "epoch": 0.9478458049886621, + "grad_norm": 18.35875701904297, + "learning_rate": 4.271028567242818e-09, + "logits/chosen": 0.8411812782287598, + "logits/rejected": 0.708235502243042, + "logps/chosen": -198.689453125, + "logps/ref_chosen": -65.68330383300781, + "logps/ref_rejected": -124.0399169921875, + "logps/rejected": -374.5428161621094, + "loss": 0.9747, + "margin_dpo/margin_mean": 117.49674224853516, + "margin_dpo/margin_std": 99.51753234863281, + "step": 627 + }, + { + "epoch": 0.9493575207860923, + "grad_norm": 17.412473678588867, + "learning_rate": 4.0311050177251895e-09, + "logits/chosen": 0.857243001461029, + "logits/rejected": 0.9196332693099976, + "logps/chosen": -188.25320434570312, + "logps/ref_chosen": -70.50054168701172, + "logps/ref_rejected": -69.05288696289062, + "logps/rejected": -286.83795166015625, + "loss": 1.0335, + "margin_dpo/margin_mean": 100.03239440917969, + "margin_dpo/margin_std": 102.7291259765625, + "step": 628 + }, + { + "epoch": 0.9508692365835223, + "grad_norm": 17.979310989379883, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": 0.9377484321594238, + "logits/rejected": 1.008423089981079, + "logps/chosen": -184.6234893798828, + "logps/ref_chosen": -68.93040466308594, + "logps/ref_rejected": -49.09862518310547, + "logps/rejected": -198.7704620361328, + "loss": 1.0083, + "margin_dpo/margin_mean": 33.978755950927734, + "margin_dpo/margin_std": 70.85810089111328, + "step": 629 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 17.69316864013672, + "learning_rate": 3.5719052736323806e-09, + "logits/chosen": 1.0553034543991089, + "logits/rejected": 0.9892318844795227, + "logps/chosen": -134.7643585205078, + "logps/ref_chosen": -36.853294372558594, + "logps/ref_rejected": -71.96033477783203, + "logps/rejected": -252.25209045410156, + "loss": 0.9744, + "margin_dpo/margin_mean": 82.38069152832031, + "margin_dpo/margin_std": 89.0606460571289, + "step": 630 + }, + { + "epoch": 0.9538926681783825, + "grad_norm": 18.01328468322754, + "learning_rate": 3.352641923861144e-09, + "logits/chosen": 0.9821938872337341, + "logits/rejected": 0.938322901725769, + "logps/chosen": -195.1949462890625, + "logps/ref_chosen": -64.66175079345703, + "logps/ref_rejected": -89.40802001953125, + "logps/rejected": -301.36553955078125, + "loss": 0.9122, + "margin_dpo/margin_mean": 81.42431640625, + "margin_dpo/margin_std": 62.148040771484375, + "step": 631 + }, + { + "epoch": 0.9554043839758125, + "grad_norm": 18.51304054260254, + "learning_rate": 3.140277830901428e-09, + "logits/chosen": 0.9517176151275635, + "logits/rejected": 0.9564006328582764, + "logps/chosen": -203.13833618164062, + "logps/ref_chosen": -74.05264282226562, + "logps/ref_rejected": -66.81067657470703, + "logps/rejected": -266.5632629394531, + "loss": 0.93, + "margin_dpo/margin_mean": 70.66688537597656, + "margin_dpo/margin_std": 84.40769958496094, + "step": 632 + }, + { + "epoch": 0.9569160997732427, + "grad_norm": 17.859493255615234, + "learning_rate": 2.9348189350335007e-09, + "logits/chosen": 0.9254288077354431, + "logits/rejected": 0.87281334400177, + "logps/chosen": -146.23092651367188, + "logps/ref_chosen": -45.47814178466797, + "logps/ref_rejected": -66.22723388671875, + "logps/rejected": -234.7810516357422, + "loss": 1.0317, + "margin_dpo/margin_mean": 67.80101776123047, + "margin_dpo/margin_std": 67.22663879394531, + "step": 633 + }, + { + "epoch": 0.9584278155706727, + "grad_norm": 23.980148315429688, + "learning_rate": 2.736270983384276e-09, + "logits/chosen": 0.8979266285896301, + "logits/rejected": 0.8862963914871216, + "logps/chosen": -212.658203125, + "logps/ref_chosen": -57.611724853515625, + "logps/ref_rejected": -54.337623596191406, + "logps/rejected": -215.5278778076172, + "loss": 1.3386, + "margin_dpo/margin_mean": 6.1437668800354, + "margin_dpo/margin_std": 45.94617462158203, + "step": 634 + }, + { + "epoch": 0.9599395313681028, + "grad_norm": 18.18239974975586, + "learning_rate": 2.5446395297668287e-09, + "logits/chosen": 0.945531964302063, + "logits/rejected": 0.9129035472869873, + "logps/chosen": -211.33154296875, + "logps/ref_chosen": -49.714500427246094, + "logps/ref_rejected": -67.80001831054688, + "logps/rejected": -269.0682067871094, + "loss": 1.1286, + "margin_dpo/margin_mean": 39.65115737915039, + "margin_dpo/margin_std": 108.38533020019531, + "step": 635 + }, + { + "epoch": 0.9614512471655329, + "grad_norm": 15.897844314575195, + "learning_rate": 2.359929934524829e-09, + "logits/chosen": 0.9161902666091919, + "logits/rejected": 0.8111059069633484, + "logps/chosen": -202.0909423828125, + "logps/ref_chosen": -50.605045318603516, + "logps/ref_rejected": -77.53902435302734, + "logps/rejected": -293.85711669921875, + "loss": 0.9728, + "margin_dpo/margin_mean": 64.83220672607422, + "margin_dpo/margin_std": 55.20343017578125, + "step": 636 + }, + { + "epoch": 0.9629629629629629, + "grad_norm": 18.38071060180664, + "learning_rate": 2.1821473643827137e-09, + "logits/chosen": 0.9871467351913452, + "logits/rejected": 0.8986793160438538, + "logps/chosen": -245.16378784179688, + "logps/ref_chosen": -74.90892028808594, + "logps/ref_rejected": -97.07221984863281, + "logps/rejected": -350.8941650390625, + "loss": 1.0848, + "margin_dpo/margin_mean": 83.56706237792969, + "margin_dpo/margin_std": 141.6215057373047, + "step": 637 + }, + { + "epoch": 0.9644746787603931, + "grad_norm": 16.282268524169922, + "learning_rate": 2.0112967923011646e-09, + "logits/chosen": 0.8492100238800049, + "logits/rejected": 0.823014497756958, + "logps/chosen": -249.89724731445312, + "logps/ref_chosen": -67.56559753417969, + "logps/ref_rejected": -83.56005859375, + "logps/rejected": -284.5699157714844, + "loss": 0.998, + "margin_dpo/margin_mean": 18.678207397460938, + "margin_dpo/margin_std": 95.15095520019531, + "step": 638 + }, + { + "epoch": 0.9659863945578231, + "grad_norm": 18.59486198425293, + "learning_rate": 1.847382997337943e-09, + "logits/chosen": 0.9617091417312622, + "logits/rejected": 0.8375701308250427, + "logps/chosen": -166.54019165039062, + "logps/ref_chosen": -40.15863037109375, + "logps/ref_rejected": -65.15992736816406, + "logps/rejected": -268.45574951171875, + "loss": 0.9745, + "margin_dpo/margin_mean": 76.91429138183594, + "margin_dpo/margin_std": 78.58573913574219, + "step": 639 + }, + { + "epoch": 0.9674981103552532, + "grad_norm": 19.1875, + "learning_rate": 1.690410564514244e-09, + "logits/chosen": 0.8923076391220093, + "logits/rejected": 0.7876547574996948, + "logps/chosen": -186.8981475830078, + "logps/ref_chosen": -56.694557189941406, + "logps/ref_rejected": -100.86506652832031, + "logps/rejected": -306.697265625, + "loss": 1.0631, + "margin_dpo/margin_mean": 75.62860870361328, + "margin_dpo/margin_std": 84.25181579589844, + "step": 640 + }, + { + "epoch": 0.9690098261526833, + "grad_norm": 16.5269775390625, + "learning_rate": 1.5403838846864692e-09, + "logits/chosen": 0.9277850389480591, + "logits/rejected": 1.0083123445510864, + "logps/chosen": -200.206298828125, + "logps/ref_chosen": -67.92603302001953, + "logps/ref_rejected": -50.31890106201172, + "logps/rejected": -228.96484375, + "loss": 1.0154, + "margin_dpo/margin_mean": 46.365638732910156, + "margin_dpo/margin_std": 66.46011352539062, + "step": 641 + }, + { + "epoch": 0.9705215419501134, + "grad_norm": 26.048898696899414, + "learning_rate": 1.3973071544233218e-09, + "logits/chosen": 0.9023500084877014, + "logits/rejected": 0.9192708134651184, + "logps/chosen": -210.30873107910156, + "logps/ref_chosen": -66.0007553100586, + "logps/ref_rejected": -71.08174133300781, + "logps/rejected": -260.4029235839844, + "loss": 1.1862, + "margin_dpo/margin_mean": 45.013214111328125, + "margin_dpo/margin_std": 130.67271423339844, + "step": 642 + }, + { + "epoch": 0.9720332577475435, + "grad_norm": 18.16506004333496, + "learning_rate": 1.261184375888541e-09, + "logits/chosen": 0.8603549599647522, + "logits/rejected": 0.8541243076324463, + "logps/chosen": -236.55422973632812, + "logps/ref_chosen": -85.6345443725586, + "logps/ref_rejected": -86.74612426757812, + "logps/rejected": -362.2666320800781, + "loss": 1.0099, + "margin_dpo/margin_mean": 124.60084533691406, + "margin_dpo/margin_std": 134.28097534179688, + "step": 643 + }, + { + "epoch": 0.9735449735449735, + "grad_norm": 15.612887382507324, + "learning_rate": 1.1320193567288527e-09, + "logits/chosen": 1.0640028715133667, + "logits/rejected": 1.0360305309295654, + "logps/chosen": -185.77064514160156, + "logps/ref_chosen": -43.66929626464844, + "logps/ref_rejected": -61.423343658447266, + "logps/rejected": -251.08602905273438, + "loss": 1.2055, + "margin_dpo/margin_mean": 47.56130599975586, + "margin_dpo/margin_std": 103.23611450195312, + "step": 644 + }, + { + "epoch": 0.9750566893424036, + "grad_norm": 18.053247451782227, + "learning_rate": 1.0098157099674987e-09, + "logits/chosen": 0.9287192821502686, + "logits/rejected": 0.9003345966339111, + "logps/chosen": -219.2329864501953, + "logps/ref_chosen": -58.112247467041016, + "logps/ref_rejected": -62.135780334472656, + "logps/rejected": -301.72528076171875, + "loss": 1.0062, + "margin_dpo/margin_mean": 78.46878051757812, + "margin_dpo/margin_std": 99.51702880859375, + "step": 645 + }, + { + "epoch": 0.9765684051398337, + "grad_norm": 19.156396865844727, + "learning_rate": 8.945768539031783e-10, + "logits/chosen": 0.8444064855575562, + "logits/rejected": 0.7318651676177979, + "logps/chosen": -187.85931396484375, + "logps/ref_chosen": -48.80539321899414, + "logps/ref_rejected": -79.34556579589844, + "logps/rejected": -289.73748779296875, + "loss": 1.1103, + "margin_dpo/margin_mean": 71.3379898071289, + "margin_dpo/margin_std": 87.06074523925781, + "step": 646 + }, + { + "epoch": 0.9780801209372638, + "grad_norm": 22.503517150878906, + "learning_rate": 7.863060120144316e-10, + "logits/chosen": 0.9451459050178528, + "logits/rejected": 0.7789862155914307, + "logps/chosen": -220.54901123046875, + "logps/ref_chosen": -63.22569274902344, + "logps/ref_rejected": -149.20523071289062, + "logps/rejected": -385.1798095703125, + "loss": 0.8997, + "margin_dpo/margin_mean": 78.65129089355469, + "margin_dpo/margin_std": 98.49140930175781, + "step": 647 + }, + { + "epoch": 0.9795918367346939, + "grad_norm": 15.99667739868164, + "learning_rate": 6.850062128694045e-10, + "logits/chosen": 0.8275370597839355, + "logits/rejected": 0.7766451835632324, + "logps/chosen": -226.41717529296875, + "logps/ref_chosen": -67.55680084228516, + "logps/ref_rejected": -78.43806457519531, + "logps/rejected": -259.7667236328125, + "loss": 1.0994, + "margin_dpo/margin_mean": 22.468292236328125, + "margin_dpo/margin_std": 122.872314453125, + "step": 648 + }, + { + "epoch": 0.981103552532124, + "grad_norm": 26.316551208496094, + "learning_rate": 5.906802900412788e-10, + "logits/chosen": 0.9751644134521484, + "logits/rejected": 0.926426887512207, + "logps/chosen": -177.2069091796875, + "logps/ref_chosen": -42.90663146972656, + "logps/ref_rejected": -64.04318237304688, + "logps/rejected": -287.71142578125, + "loss": 1.1161, + "margin_dpo/margin_mean": 89.36796569824219, + "margin_dpo/margin_std": 82.30953979492188, + "step": 649 + }, + { + "epoch": 0.982615268329554, + "grad_norm": 17.684829711914062, + "learning_rate": 5.033308820289184e-10, + "logits/chosen": 1.0313094854354858, + "logits/rejected": 0.9337575435638428, + "logps/chosen": -158.346923828125, + "logps/ref_chosen": -44.43085479736328, + "logps/ref_rejected": -88.01769256591797, + "logps/rejected": -314.0736083984375, + "loss": 1.0373, + "margin_dpo/margin_mean": 112.13986206054688, + "margin_dpo/margin_std": 143.96426391601562, + "step": 650 + }, + { + "epoch": 0.9841269841269841, + "grad_norm": 16.09817886352539, + "learning_rate": 4.2296043218295606e-10, + "logits/chosen": 0.8436592221260071, + "logits/rejected": 0.8099097013473511, + "logps/chosen": -188.1514892578125, + "logps/ref_chosen": -54.74213790893555, + "logps/ref_rejected": -69.22908020019531, + "logps/rejected": -218.41043090820312, + "loss": 1.0272, + "margin_dpo/margin_mean": 15.772006034851074, + "margin_dpo/margin_std": 73.69939422607422, + "step": 651 + }, + { + "epoch": 0.9856386999244142, + "grad_norm": 21.886402130126953, + "learning_rate": 3.4957118863768176e-10, + "logits/chosen": 0.9487487077713013, + "logits/rejected": 0.9406764507293701, + "logps/chosen": -158.75241088867188, + "logps/ref_chosen": -39.43302917480469, + "logps/ref_rejected": -45.17872619628906, + "logps/rejected": -234.00820922851562, + "loss": 1.1864, + "margin_dpo/margin_mean": 69.51010131835938, + "margin_dpo/margin_std": 92.36702728271484, + "step": 652 + }, + { + "epoch": 0.9871504157218443, + "grad_norm": 16.707674026489258, + "learning_rate": 2.831652042480093e-10, + "logits/chosen": 0.7637461423873901, + "logits/rejected": 0.8205589056015015, + "logps/chosen": -223.41229248046875, + "logps/ref_chosen": -68.25508117675781, + "logps/ref_rejected": -78.01954650878906, + "logps/rejected": -293.35223388671875, + "loss": 0.9948, + "margin_dpo/margin_mean": 60.175498962402344, + "margin_dpo/margin_std": 88.66094970703125, + "step": 653 + }, + { + "epoch": 0.9886621315192744, + "grad_norm": 18.35707664489746, + "learning_rate": 2.2374433653205016e-10, + "logits/chosen": 0.9188174605369568, + "logits/rejected": 0.8119950890541077, + "logps/chosen": -183.5347442626953, + "logps/ref_chosen": -44.32666778564453, + "logps/ref_rejected": -75.5877685546875, + "logps/rejected": -248.86541748046875, + "loss": 1.062, + "margin_dpo/margin_mean": 34.06956100463867, + "margin_dpo/margin_std": 89.29640197753906, + "step": 654 + }, + { + "epoch": 0.9901738473167044, + "grad_norm": 15.2190580368042, + "learning_rate": 1.7131024761923852e-10, + "logits/chosen": 0.7755975723266602, + "logits/rejected": 0.8196406364440918, + "logps/chosen": -192.36354064941406, + "logps/ref_chosen": -72.61821746826172, + "logps/ref_rejected": -61.733245849609375, + "logps/rejected": -256.1627197265625, + "loss": 0.8594, + "margin_dpo/margin_mean": 74.68414306640625, + "margin_dpo/margin_std": 70.4334487915039, + "step": 655 + }, + { + "epoch": 0.9916855631141346, + "grad_norm": 15.999876022338867, + "learning_rate": 1.2586440420372934e-10, + "logits/chosen": 0.7340442538261414, + "logits/rejected": 0.7868098020553589, + "logps/chosen": -193.5328369140625, + "logps/ref_chosen": -88.81657409667969, + "logps/ref_rejected": -67.2658462524414, + "logps/rejected": -228.84466552734375, + "loss": 0.9997, + "margin_dpo/margin_mean": 56.862571716308594, + "margin_dpo/margin_std": 70.70635986328125, + "step": 656 + }, + { + "epoch": 0.9931972789115646, + "grad_norm": 20.399385452270508, + "learning_rate": 8.740807750345913e-11, + "logits/chosen": 1.0027674436569214, + "logits/rejected": 0.8794127702713013, + "logps/chosen": -157.93316650390625, + "logps/ref_chosen": -34.980865478515625, + "logps/ref_rejected": -85.61087036132812, + "logps/rejected": -322.360595703125, + "loss": 0.9962, + "margin_dpo/margin_mean": 113.79745483398438, + "margin_dpo/margin_std": 116.89727783203125, + "step": 657 + }, + { + "epoch": 0.9947089947089947, + "grad_norm": 18.558809280395508, + "learning_rate": 5.594234322453539e-11, + "logits/chosen": 1.0621709823608398, + "logits/rejected": 1.0080801248550415, + "logps/chosen": -221.82943725585938, + "logps/ref_chosen": -67.89546203613281, + "logps/ref_rejected": -78.8687515258789, + "logps/rejected": -280.9857177734375, + "loss": 1.1266, + "margin_dpo/margin_mean": 48.18297576904297, + "margin_dpo/margin_std": 105.40860748291016, + "step": 658 + }, + { + "epoch": 0.9962207105064248, + "grad_norm": 17.150558471679688, + "learning_rate": 3.146808153123293e-11, + "logits/chosen": 1.0832101106643677, + "logits/rejected": 0.9403669834136963, + "logps/chosen": -138.60635375976562, + "logps/ref_chosen": -34.2476806640625, + "logps/ref_rejected": -74.97065734863281, + "logps/rejected": -273.552001953125, + "loss": 1.091, + "margin_dpo/margin_mean": 94.22268676757812, + "margin_dpo/margin_std": 84.5680923461914, + "step": 659 + }, + { + "epoch": 0.9977324263038548, + "grad_norm": 18.795955657958984, + "learning_rate": 1.3985977021235829e-11, + "logits/chosen": 0.9274425506591797, + "logits/rejected": 0.9459260106086731, + "logps/chosen": -189.62442016601562, + "logps/ref_chosen": -57.3446044921875, + "logps/ref_rejected": -62.27751922607422, + "logps/rejected": -275.3188171386719, + "loss": 0.8589, + "margin_dpo/margin_mean": 80.76148223876953, + "margin_dpo/margin_std": 117.38458251953125, + "step": 660 + }, + { + "epoch": 0.999244142101285, + "grad_norm": 18.007423400878906, + "learning_rate": 3.4965187065971735e-12, + "logits/chosen": 0.883224368095398, + "logits/rejected": 0.9031695127487183, + "logps/chosen": -226.634521484375, + "logps/ref_chosen": -63.84727478027344, + "logps/ref_rejected": -49.75703430175781, + "logps/rejected": -239.585205078125, + "loss": 1.1426, + "margin_dpo/margin_mean": 27.040931701660156, + "margin_dpo/margin_std": 113.18049621582031, + "step": 661 + }, + { + "epoch": 0.999244142101285, + "step": 661, + "total_flos": 0.0, + "train_loss": 1.1443162902220294, + "train_runtime": 1702.0491, + "train_samples_per_second": 24.874, + "train_steps_per_second": 0.388 + } + ], + "logging_steps": 1, + "max_steps": 661, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}